From 2bbdb99395302edb1d3b8c09060a2e9fd9b6b99d Mon Sep 17 00:00:00 2001 From: Ulugbek Abdullaev Date: Mon, 21 Nov 2022 13:08:01 +0500 Subject: [PATCH 1/3] refactor(semantic-hl): annotate src with tokens --- .../test/e2e-new/semantic_hl_helpers.ml | 108 +++++++++++++++ .../test/e2e-new/semantic_hl_helpers.mli | 8 ++ .../test/e2e-new/semantic_hl_tests.ml | 125 +++++++++++++----- ocaml-lsp-server/test/e2e-new/test.ml | 33 +++++ 4 files changed, 238 insertions(+), 36 deletions(-) create mode 100644 ocaml-lsp-server/test/e2e-new/semantic_hl_helpers.ml create mode 100644 ocaml-lsp-server/test/e2e-new/semantic_hl_helpers.mli diff --git a/ocaml-lsp-server/test/e2e-new/semantic_hl_helpers.ml b/ocaml-lsp-server/test/e2e-new/semantic_hl_helpers.ml new file mode 100644 index 000000000..7848f682c --- /dev/null +++ b/ocaml-lsp-server/test/e2e-new/semantic_hl_helpers.ml @@ -0,0 +1,108 @@ +open Test.Import + +type token = + { delta_line : int + ; delta_char : int + ; len : int + ; type_ : int + ; mods : int + } + +let tokens encoded_tokens = + Array.init + (Array.length encoded_tokens / 5) + ~f:(fun i -> + let ix = i * 5 in + { delta_line = encoded_tokens.(ix) + ; delta_char = encoded_tokens.(ix + 1) + ; len = encoded_tokens.(ix + 2) + ; type_ = encoded_tokens.(ix + 3) + ; mods = encoded_tokens.(ix + 4) + }) + +let modifiers ~(legend : string array) (encoded_mods : int) = + let rec loop encoded_mods i acc = + if encoded_mods = 0 then acc + else + let k = Stdlib.Int.logand encoded_mods 1 in + let new_val = Stdlib.Int.shift_right encoded_mods 1 in + if k = 0 then loop new_val (i + 1) acc + else loop new_val (i + 1) (legend.(k) :: acc) + in + loop encoded_mods 0 [] |> List.rev + +let annotate_src_with_tokens ~(legend : SemanticTokensLegend.t) + ~(encoded_tokens : int array) ~(annot_mods : bool) (src : string) : string = + let token_types = legend.SemanticTokensLegend.tokenTypes |> Array.of_list in + let token_mods = + legend.SemanticTokensLegend.tokenModifiers |> Array.of_list + in + let src_ix = ref 0 in + let tokens = Array.Iter.create (tokens encoded_tokens) in + let token = ref @@ Array.Iter.next_exn tokens in + let token_id = ref 0 in + let line = ref !token.delta_line in + let character = ref !token.delta_char in + let src_len = String.length src in + let b = Buffer.create src_len in + while !src_ix < src_len do + if !line = 0 && !character = 0 then ( + Printf.bprintf + b + "<%s%s-%d>" + token_types.(!token.type_) + (if annot_mods then + "|" ^ String.concat ~sep:"," (modifiers ~legend:token_mods !token.mods) + else "") + !token_id; + Buffer.add_substring b src !src_ix !token.len; + src_ix := !src_ix + !token.len; + Printf.bprintf b "" !token_id; + match Array.Iter.next tokens with + | None -> + (* copy the rest of src *) + Buffer.add_substring b src !src_ix (src_len - !src_ix); + src_ix := src_len + | Some next_token -> + incr token_id; + character := next_token.delta_char - !token.len; + token := next_token; + line := !token.delta_line) + else + let ch = src.[!src_ix] in + (match ch with + | '\n' -> + decr line; + character := !token.delta_char + | _ -> decr character); + Buffer.add_char b ch; + incr src_ix + done; + Buffer.to_bytes b |> Bytes.to_string + +(* for tests below *) +let legend = + SemanticTokensLegend.create ~tokenModifiers:[] ~tokenTypes:[ "var"; "mod" ] + +let%expect_test "annotate single-line src" = + annotate_src_with_tokens + ~legend + ~encoded_tokens:[| 0; 4; 3; 0; 0 |] + ~annot_mods:false + {|let foo = bar|} + |> print_endline; + [%expect {| let foo = bar |}] + +let%expect_test "annotate multi-line src" = + annotate_src_with_tokens + ~legend + ~encoded_tokens: + [| 0; 4; 3; 0; 0; 0; 6; 3; 0; 0; 1; 4; 3; 0; 0; 0; 6; 3; 1; 0 |] + ~annot_mods:false + {|let foo = bar +let jar = dar|} + |> print_endline; + [%expect + {| + let foo = bar + let jar = dar |}] diff --git a/ocaml-lsp-server/test/e2e-new/semantic_hl_helpers.mli b/ocaml-lsp-server/test/e2e-new/semantic_hl_helpers.mli new file mode 100644 index 000000000..a8ef27121 --- /dev/null +++ b/ocaml-lsp-server/test/e2e-new/semantic_hl_helpers.mli @@ -0,0 +1,8 @@ +open Test.Import + +val annotate_src_with_tokens : + legend:SemanticTokensLegend.t + -> encoded_tokens:int array + -> annot_mods:bool + -> string + -> string diff --git a/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml b/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml index 6a06c80e8..494030a34 100644 --- a/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml +++ b/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml @@ -69,13 +69,18 @@ let client_capabilities = in ClientCapabilities.create ~textDocument () +type 'resp req_ctx = + { initializeResult : InitializeResult.t + ; resp : 'resp + } + let test : type resp. src:string -> (SemanticTokensParams.t -> resp Client.out_request) - -> (resp -> Yojson.Safe.t) + -> (resp req_ctx -> unit Fiber.t) -> unit = - fun ~src req json_of_resp -> + fun ~src req consume_resp -> let wait_for_diagnostics = Fiber.Ivar.create () in let handler = Client.Handler.make @@ -101,7 +106,9 @@ let test : (InitializeParams.create ~capabilities:client_capabilities ()) in let run () = - let* (_ : InitializeResult.t) = Client.initialized client in + let* (initializeResult : InitializeResult.t) = + Client.initialized client + in let uri = DocumentUri.of_path "test.ml" in let textDocument = TextDocumentItem.create ~uri ~languageId:"ocaml" ~version:0 ~text:src @@ -117,11 +124,7 @@ let test : let params = SemanticTokensParams.create ~textDocument () in Client.request client (req params) in - let* () = - json_of_resp resp - |> Yojson.Safe.pretty_to_string ~std:false - |> print_endline |> Fiber.return - in + let* () = consume_resp { initializeResult; resp } in let* () = Fiber.fork_and_join_unit (fun () -> Fiber.Ivar.read wait_for_diagnostics) @@ -132,38 +135,85 @@ let test : Fiber.fork_and_join_unit run_client run) let test_semantic_tokens_full src = - test - ~src - (fun p -> SemanticTokensFull p) - (fun resp -> - Option.map resp ~f:SemanticTokens.yojson_of_t - |> Option.value ~default:`Null) + let print_resp { initializeResult; resp } = + Fiber.return + @@ + match resp with + | None -> print_endline "empty response" + | Some { SemanticTokens.data; _ } -> + let legend = + match + initializeResult.InitializeResult.capabilities + .ServerCapabilities.semanticTokensProvider + with + | None -> failwith "no server capabilities for semantic tokens" + | Some (`SemanticTokensOptions { legend; _ }) -> legend + | Some (`SemanticTokensRegistrationOptions { legend; _ }) -> legend + in + print_endline + @@ Semantic_hl_helpers.annotate_src_with_tokens + ~legend + ~encoded_tokens:data + ~annot_mods:true + src + in + test ~src (fun p -> SemanticTokensFull p) print_resp let%expect_test "tokens for ocaml_lsp_server.ml" = test_semantic_tokens_full Semantic_hl_data.src0; [%expect {| - { - "data": [ - 1, 7, 3, 0, 2, 1, 7, 1, 1, 1, 2, 7, 3, 3, 1, 1, 6, 3, 10, 1, 0, 7, 6, 1, - 0, 1, 6, 3, 10, 1, 0, 17, 3, 1, 0, 0, 17, 6, 1, 0, 2, 6, 1, 8, 1, 0, 4, - 4, 1, 0, 2, 6, 1, 12, 1, 0, 4, 4, 1, 0, 0, 8, 1, 1, 0, 2, 7, 1, 1, 1, 0, - 4, 3, 1, 0, 2, 7, 3, 3, 1, 1, 6, 3, 10, 1, 0, 7, 6, 1, 0, 1, 6, 3, 10, 1, - 0, 17, 3, 1, 0, 0, 17, 6, 1, 0, 2, 6, 1, 8, 0, 0, 4, 2, 10, 0, 2, 6, 1, - 12, 2, 0, 2, 2, 10, 0, 0, 5, 1, 19, 0, 3, 12, 3, 4, 0, 1, 7, 1, 5, 1, 1, - 6, 3, 9, 0, 0, 6, 3, 0, 0, 0, 4, 1, 1, 0, 1, 6, 3, 9, 0, 0, 6, 3, 1, 0, - 4, 5, 1, 3, 1, 0, 4, 3, 0, 0, 0, 4, 3, 1, 0, 1, 4, 3, 10, 1, 0, 7, 6, 1, - 0, 1, 4, 3, 10, 1, 0, 20, 3, 1, 0, 0, 20, 6, 1, 0, 2, 4, 1, 12, 2, 0, 3, - 3, 8, 0, 0, 6, 1, 1, 0, 1, 8, 3, 8, 0, 1, 4, 3, 0, 0, 0, 4, 3, 10, 0, 0, - 4, 1, 8, 0, 0, 5, 1, 8, 0, 0, 2, 1, 12, 0, 0, 2, 13, 12, 0, 0, 14, 1, 19, - 0, 1, 4, 3, 0, 0, 0, 4, 3, 10, 0, 0, 13, 1, 8, 0, 0, 6, 13, 12, 0, 0, 14, - 1, 8, 0, 1, 4, 3, 0, 0, 0, 4, 3, 10, 0, 0, 16, 1, 8, 0, 0, 6, 1, 8, 0, 2, - 7, 3, 0, 2, 0, 5, 3, 0, 0, 0, 6, 3, 4, 0, 1, 9, 9, 0, 2, 1, 9, 1, 1, 1, - 0, 4, 6, 1, 0, 4, 7, 8, 0, 2, 0, 11, 3, 0, 0, 1, 7, 1, 5, 1, 1, 6, 3, 9, - 0, 0, 6, 3, 0, 0, 0, 4, 1, 1, 0, 1, 6, 3, 9, 0, 0, 6, 3, 1, 0 - ], - "resultId": "0" - } |}] + module Moo : sig + type t + + type koo = + | Foo of string + | Bar of [ `Int of int | `String of string ] + + val u : unit + + val f : unit -> t + end = struct + type t = int + + type koo = + | Foo of string + | Bar of [ `Int of int | `String of string ] + + let u = () + + let f () = 0 + end + + module type Bar = sig + type t = + { foo : Moo.t + ; bar : int + } + end + + type t = Moo.koo = + | Foo of string + | Bar of [ `BarInt of int | `BarString of string ] + + let f (foo : t) = + match foo with + | Moo.Foo s -> s ^ string_of_int 0 + | Moo.Bar (`BarInt i) -> string_of_int i + | Moo.Bar (`BarString s) -> s + + module Foo (Arg : Bar) = struct + module Inner_foo = struct + type t = string + end + end + + module Foo_inst = Foo (struct + type t = + { foo : Moo.t + ; bar : int + } + end) |}] let test_semantic_tokens_full_debug src = test @@ -176,7 +226,10 @@ let test_semantic_tokens_full_debug src = (SemanticTokensParams.yojson_of_t p |> Jsonrpc.Structured.t_of_yojson) }) - Fun.id + (fun { resp; _ } -> + resp + |> Yojson.Safe.pretty_to_string ~std:false + |> print_endline |> Fiber.return) let%expect_test "tokens for ocaml_lsp_server.ml" = test_semantic_tokens_full_debug Semantic_hl_data.src0; diff --git a/ocaml-lsp-server/test/e2e-new/test.ml b/ocaml-lsp-server/test/e2e-new/test.ml index 45be35b55..150b8788e 100644 --- a/ocaml-lsp-server/test/e2e-new/test.ml +++ b/ocaml-lsp-server/test/e2e-new/test.ml @@ -25,6 +25,39 @@ module Import = struct in List.rev (take [] n l) end + + module Array = struct + include Array + + module Iter : sig + type 'a t + + val create : 'a array -> 'a t + + val has_next : 'a t -> bool + + val next : 'a t -> 'a option + + val next_exn : 'a t -> 'a + end = struct + type 'a t = + { contents : 'a array + ; mutable ix : int + } + + let create contents = { contents; ix = 0 } + + let has_next t = t.ix < Array.length t.contents + + let next_exn t = + let { contents; ix } = t in + let v = contents.(ix) in + t.ix <- ix + 1; + v + + let next t = if has_next t then Some (next_exn t) else None + end + end end include Fiber.O From e9c4ecd19d3629ca10c8c710448312b36c508e83 Mon Sep 17 00:00:00 2001 From: Ulugbek Abdullaev Date: Mon, 21 Nov 2022 19:07:35 +0500 Subject: [PATCH 2/3] fix(semantic-hl): show longidents with spaces and infix names are incorrectly highlighted --- .../test/e2e-new/semantic_hl_tests.ml | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml b/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml index 494030a34..4ce78a35f 100644 --- a/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml +++ b/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml @@ -687,3 +687,39 @@ let%expect_test "tokens for ocaml_lsp_server.ml" = "modifiers": [] } ] |}] + +let%expect_test "FIXME: highlighting longidents with space between identifiers" + = + test_semantic_tokens_full + @@ String.trim {| +let foo = Bar.jar + +let joo = Bar. jar + |}; + [%expect + {| + let foo = Bar.jar + + let joo = Bar. jar |}] + +let%expect_test "FIXME: highlighting longidents with space between identifiers \ + and infix fns" = + test_semantic_tokens_full + @@ String.trim {| +Bar.(+) ;; + +Bar.( + ) ;; + +Bar. (+) ;; + +Bar. ( + ) ;; + |}; + [%expect + {| + Bar.(+) ;; + + Bar.( + ) ;; + + Bar. (+) ;; + + Bar. ( + ) ;; |}] From c67e535df44e653ca10e048b6bada92e3edd57db Mon Sep 17 00:00:00 2001 From: Ulugbek Abdullaev Date: Mon, 21 Nov 2022 19:09:42 +0500 Subject: [PATCH 3/3] fix(semantic-hl): fix longidents with spaces incorrect highlighting --- CHANGES.md | 4 + ocaml-lsp-server/src/semantic_highlighting.ml | 86 +++++++++++++------ .../test/e2e-new/semantic_hl_tests.ml | 31 +++++-- 3 files changed, 85 insertions(+), 36 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 5954b924c..27d2fed75 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,10 @@ - Support utf-8 position encoding clients (#919) +## Fixes + +- Fix semantic highlighting of "long identifiers," e.g., `Foo.Bar.x` (#TODO) + # 1.14.2 ## Fixes diff --git a/ocaml-lsp-server/src/semantic_highlighting.ml b/ocaml-lsp-server/src/semantic_highlighting.ml index b9ad9807c..ac2320c83 100644 --- a/ocaml-lsp-server/src/semantic_highlighting.ml +++ b/ocaml-lsp-server/src/semantic_highlighting.ml @@ -330,42 +330,71 @@ end = struct end (** To traverse OCaml parsetree and produce semantic tokens. *) -module Parsetree_fold () : sig +module Parsetree_fold (M : sig + val source : string +end) : sig val apply : Mreader.parsetree -> Tokens.t end = struct (* mutable state *) let tokens = Tokens.create () + let source_excerpt ({ loc_start; loc_end; _ } : Loc.t) = + let start_offset = loc_start.pos_cnum in + let end_offset = loc_end.pos_cnum in + String.sub M.source ~pos:start_offset ~len:(end_offset - start_offset) + let add_token loc token_type token_modifiers = Tokens.append_token tokens loc token_type token_modifiers let add_token' pos ~length token_type token_modifiers = Tokens.append_token' tokens pos ~length token_type token_modifiers - let lident ({ txt = lid; loc } : Longident.t Loc.loc) rightmost_name + (* TODO: make sure we follow specs when parsing - + https://v2.ocaml.org/manual/names.html#sss:refer-named *) + let lident ({ loc; _ } : Longident.t Loc.loc) rightmost_name ?(modifiers = Token_modifiers_set.empty) () = - let start : Position.t option = - Position.of_lexical_position loc.loc_start - in - Option.iter start ~f:(fun start -> - let lst = Longident.flatten lid in - let rec aux offset = function - | [] -> () - | [ constr_name ] -> - add_token' - { start with character = start.character + offset } - ~length:(String.length constr_name) - rightmost_name - modifiers - | mod_name :: rest -> - add_token' - { start with character = start.character + offset } - ~length:(String.length mod_name) - Token_type.module_ - Token_modifiers_set.empty; - aux (offset + String.length mod_name + 1) rest - in - aux 0 lst) + let start = Position.of_lexical_position loc.loc_start in + match start with + | None -> () + | Some start -> + let lid = source_excerpt loc in + + let i = ref 0 in + let line = ref start.line in + let character = ref start.character in + + let parse_word () : Position.t * [ `Length of int ] = + let left_pos = { Position.line = !line; character = !character } in + while + !i < String.length lid + && + match lid.[!i] with + | '\n' | ' ' | '.' -> false + | _ -> true + do + incr character; + incr i + done; + (left_pos, `Length (!character - left_pos.character)) + in + + while !i < String.length lid do + match lid.[!i] with + | '\n' -> + incr line; + character := 0; + incr i + | ' ' | '.' -> + incr character; + incr i + | _ -> + let pos, `Length length = parse_word () in + let token_type, mods = + if !i = String.length lid then (rightmost_name, modifiers) + else (Token_type.module_, Token_modifiers_set.empty) + in + add_token' pos ~length token_type mods + done let constructor_arguments (self : Ast_iterator.iterator) (ca : Parsetree.constructor_arguments) = @@ -872,10 +901,13 @@ let gen_new_id = string_of_int x let compute_tokens doc = - let+ parsetree = - Document.Merlin.with_pipeline_exn doc Mpipeline.reader_parsetree + let+ parsetree, source = + Document.Merlin.with_pipeline_exn doc (fun p -> + (Mpipeline.reader_parsetree p, Mpipeline.raw_source p)) in - let module Fold = Parsetree_fold () in + let module Fold = Parsetree_fold (struct + let source = Msource.text source + end) in Fold.apply parsetree let compute_encoded_tokens doc = diff --git a/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml b/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml index 4ce78a35f..46b1196d0 100644 --- a/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml +++ b/ocaml-lsp-server/test/e2e-new/semantic_hl_tests.ml @@ -688,8 +688,7 @@ let%expect_test "tokens for ocaml_lsp_server.ml" = } ] |}] -let%expect_test "FIXME: highlighting longidents with space between identifiers" - = +let%expect_test "highlighting longidents with space between identifiers" = test_semantic_tokens_full @@ String.trim {| let foo = Bar.jar @@ -700,10 +699,10 @@ let joo = Bar. jar {| let foo = Bar.jar - let joo = Bar. jar |}] + let joo = Bar. jar |}] -let%expect_test "FIXME: highlighting longidents with space between identifiers \ - and infix fns" = +let%expect_test "highlighting longidents with space between identifiers and \ + infix fns" = test_semantic_tokens_full @@ String.trim {| Bar.(+) ;; @@ -716,10 +715,24 @@ Bar. ( + ) ;; |}; [%expect {| - Bar.(+) ;; + Bar.(+) ;; - Bar.( + ) ;; + Bar.( + ) ;; - Bar. (+) ;; + Bar. (+) ;; - Bar. ( + ) ;; |}] + Bar. ( + ) ;; |}] + +let%expect_test "longidents in records" = + test_semantic_tokens_full + @@ String.trim + {| +module M = struct type r = { foo : int ; bar : string } end + +let x = { M . foo = 0 ; bar = "bar"} + |}; + [%expect + {| + module M = struct type r = { foo : int ; bar : string } end + + let x = { M . foo = 0 ; bar = "bar"} |}]