Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(semantic-hl): properly highlight longidents with spaces #932

Merged
merged 3 commits into from
Nov 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@

- Support utf-8 position encoding clients (#919)

## Fixes

- Fix semantic highlighting of "long identifiers," e.g., `Foo.Bar.x` (#TODO)

# 1.14.2

## Fixes
Expand Down
86 changes: 59 additions & 27 deletions ocaml-lsp-server/src/semantic_highlighting.ml
Original file line number Diff line number Diff line change
Expand Up @@ -330,42 +330,71 @@ end = struct
end

(** To traverse OCaml parsetree and produce semantic tokens. *)
module Parsetree_fold () : sig
module Parsetree_fold (M : sig
val source : string
end) : sig
val apply : Mreader.parsetree -> Tokens.t
end = struct
(* mutable state *)
let tokens = Tokens.create ()

let source_excerpt ({ loc_start; loc_end; _ } : Loc.t) =
let start_offset = loc_start.pos_cnum in
let end_offset = loc_end.pos_cnum in
String.sub M.source ~pos:start_offset ~len:(end_offset - start_offset)

let add_token loc token_type token_modifiers =
Tokens.append_token tokens loc token_type token_modifiers

let add_token' pos ~length token_type token_modifiers =
Tokens.append_token' tokens pos ~length token_type token_modifiers

let lident ({ txt = lid; loc } : Longident.t Loc.loc) rightmost_name
(* TODO: make sure we follow specs when parsing -
https://v2.ocaml.org/manual/names.html#sss:refer-named *)
let lident ({ loc; _ } : Longident.t Loc.loc) rightmost_name
?(modifiers = Token_modifiers_set.empty) () =
let start : Position.t option =
Position.of_lexical_position loc.loc_start
in
Option.iter start ~f:(fun start ->
let lst = Longident.flatten lid in
let rec aux offset = function
| [] -> ()
| [ constr_name ] ->
add_token'
{ start with character = start.character + offset }
~length:(String.length constr_name)
rightmost_name
modifiers
| mod_name :: rest ->
add_token'
{ start with character = start.character + offset }
~length:(String.length mod_name)
Token_type.module_
Token_modifiers_set.empty;
aux (offset + String.length mod_name + 1) rest
in
aux 0 lst)
let start = Position.of_lexical_position loc.loc_start in
match start with
| None -> ()
| Some start ->
let lid = source_excerpt loc in

let i = ref 0 in
let line = ref start.line in
let character = ref start.character in

let parse_word () : Position.t * [ `Length of int ] =
let left_pos = { Position.line = !line; character = !character } in
while
!i < String.length lid
&&
match lid.[!i] with
| '\n' | ' ' | '.' -> false
| _ -> true
do
incr character;
incr i
done;
(left_pos, `Length (!character - left_pos.character))
in

while !i < String.length lid do
match lid.[!i] with
| '\n' ->
incr line;
character := 0;
incr i
| ' ' | '.' ->
incr character;
incr i
| _ ->
let pos, `Length length = parse_word () in
let token_type, mods =
if !i = String.length lid then (rightmost_name, modifiers)
else (Token_type.module_, Token_modifiers_set.empty)
in
add_token' pos ~length token_type mods
done

let constructor_arguments (self : Ast_iterator.iterator)
(ca : Parsetree.constructor_arguments) =
Expand Down Expand Up @@ -872,10 +901,13 @@ let gen_new_id =
string_of_int x

let compute_tokens doc =
let+ parsetree =
Document.Merlin.with_pipeline_exn doc Mpipeline.reader_parsetree
let+ parsetree, source =
Document.Merlin.with_pipeline_exn doc (fun p ->
(Mpipeline.reader_parsetree p, Mpipeline.raw_source p))
in
let module Fold = Parsetree_fold () in
let module Fold = Parsetree_fold (struct
let source = Msource.text source
end) in
Fold.apply parsetree

let compute_encoded_tokens doc =
Expand Down
108 changes: 108 additions & 0 deletions ocaml-lsp-server/test/e2e-new/semantic_hl_helpers.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
open Test.Import

type token =
{ delta_line : int
; delta_char : int
; len : int
; type_ : int
; mods : int
}

let tokens encoded_tokens =
Array.init
(Array.length encoded_tokens / 5)
~f:(fun i ->
let ix = i * 5 in
{ delta_line = encoded_tokens.(ix)
; delta_char = encoded_tokens.(ix + 1)
; len = encoded_tokens.(ix + 2)
; type_ = encoded_tokens.(ix + 3)
; mods = encoded_tokens.(ix + 4)
})

let modifiers ~(legend : string array) (encoded_mods : int) =
let rec loop encoded_mods i acc =
if encoded_mods = 0 then acc
else
let k = Stdlib.Int.logand encoded_mods 1 in
let new_val = Stdlib.Int.shift_right encoded_mods 1 in
if k = 0 then loop new_val (i + 1) acc
else loop new_val (i + 1) (legend.(k) :: acc)
in
loop encoded_mods 0 [] |> List.rev

let annotate_src_with_tokens ~(legend : SemanticTokensLegend.t)
~(encoded_tokens : int array) ~(annot_mods : bool) (src : string) : string =
let token_types = legend.SemanticTokensLegend.tokenTypes |> Array.of_list in
let token_mods =
legend.SemanticTokensLegend.tokenModifiers |> Array.of_list
in
let src_ix = ref 0 in
let tokens = Array.Iter.create (tokens encoded_tokens) in
let token = ref @@ Array.Iter.next_exn tokens in
let token_id = ref 0 in
let line = ref !token.delta_line in
let character = ref !token.delta_char in
let src_len = String.length src in
let b = Buffer.create src_len in
while !src_ix < src_len do
if !line = 0 && !character = 0 then (
Printf.bprintf
b
"<%s%s-%d>"
token_types.(!token.type_)
(if annot_mods then
"|" ^ String.concat ~sep:"," (modifiers ~legend:token_mods !token.mods)
else "")
!token_id;
Buffer.add_substring b src !src_ix !token.len;
src_ix := !src_ix + !token.len;
Printf.bprintf b "</%d>" !token_id;
match Array.Iter.next tokens with
| None ->
(* copy the rest of src *)
Buffer.add_substring b src !src_ix (src_len - !src_ix);
src_ix := src_len
| Some next_token ->
incr token_id;
character := next_token.delta_char - !token.len;
token := next_token;
line := !token.delta_line)
else
let ch = src.[!src_ix] in
(match ch with
| '\n' ->
decr line;
character := !token.delta_char
| _ -> decr character);
Buffer.add_char b ch;
incr src_ix
done;
Buffer.to_bytes b |> Bytes.to_string

(* for tests below *)
let legend =
SemanticTokensLegend.create ~tokenModifiers:[] ~tokenTypes:[ "var"; "mod" ]

let%expect_test "annotate single-line src" =
annotate_src_with_tokens
~legend
~encoded_tokens:[| 0; 4; 3; 0; 0 |]
~annot_mods:false
{|let foo = bar|}
|> print_endline;
[%expect {| let <var-0>foo</0> = bar |}]

let%expect_test "annotate multi-line src" =
annotate_src_with_tokens
~legend
~encoded_tokens:
[| 0; 4; 3; 0; 0; 0; 6; 3; 0; 0; 1; 4; 3; 0; 0; 0; 6; 3; 1; 0 |]
~annot_mods:false
{|let foo = bar
let jar = dar|}
|> print_endline;
[%expect
{|
let <var-0>foo</0> = <var-1>bar</1>
let <var-2>jar</2> = <mod-3>dar</3> |}]
8 changes: 8 additions & 0 deletions ocaml-lsp-server/test/e2e-new/semantic_hl_helpers.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
open Test.Import

val annotate_src_with_tokens :
legend:SemanticTokensLegend.t
-> encoded_tokens:int array
-> annot_mods:bool
-> string
-> string
Loading