Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve uri parsing #739

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ possible and does not make any assumptions about IO.
(description "An LSP server for OCaml.")
(depends
yojson
uri
(re (>= 1.5.0))
(ppx_yojson_conv_lib (>= "v0.14"))
dune-rpc
Expand Down
2 changes: 1 addition & 1 deletion lsp/src/dune
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
(library
(name lsp)
(public_name lsp)
(libraries jsonrpc ppx_yojson_conv_lib dyn uutf yojson)
(libraries jsonrpc ppx_yojson_conv_lib dyn uutf yojson uri)
(lint
(pps ppx_yojson_conv)))

Expand Down
5 changes: 5 additions & 0 deletions lsp/src/import.ml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ module String = struct

let index = index_opt

let is_empty s = length s = 0

let rec check_prefix s ~prefix len i =
i = len || (s.[i] = prefix.[i] && check_prefix s ~prefix len (i + 1))

Expand All @@ -32,6 +34,9 @@ module String = struct
let prefix_len = length prefix in
len >= prefix_len && check_prefix s ~prefix prefix_len 0

let add_prefix_if_not_exists s ~prefix =
if is_prefix s ~prefix then s else prefix ^ s

let next_occurrence ~pattern text from =
let plen = String.length pattern in
let last = String.length text - plen in
Expand Down
120 changes: 91 additions & 29 deletions lsp/src/uri0.ml
Original file line number Diff line number Diff line change
@@ -1,31 +1,109 @@
(* This module is based on the [vscode-uri] implementation:
https://github.com/microsoft/vscode-uri/blob/main/src/uri.ts. It only
supports scheme, authority and path. Query, port and fragment are not
implemented *)

open Import

module Private = struct
let win32 = ref Sys.win32
end

type t = Uri_lexer.t =
{ scheme : string option
{ scheme : string
; authority : string
; path : string
}

let t_of_yojson json = Json.Conv.string_of_yojson json |> Uri_lexer.of_string
let backslash_to_slash =
String.map ~f:(function
| '\\' -> '/'
| c -> c)

let slash_to_backslash =
String.map ~f:(function
| '/' -> '\\'
| c -> c)

let of_path path =
let path = if !Private.win32 then backslash_to_slash path else path in
Uri_lexer.of_path path

let to_path { path; authority; scheme } =
let path =
let len = String.length path in
if len = 0 then "/"
else
let buff = Buffer.create 64 in
(if (not (String.is_empty authority)) && len > 1 && scheme = "file" then (
Buffer.add_string buff "//";
Buffer.add_string buff authority;
Buffer.add_string buff path)
else if len < 3 then Buffer.add_string buff path
else
let c0 = path.[0] in
let c1 = path.[1] in
let c2 = path.[2] in
if
c0 = '/'
&& ((c1 >= 'A' && c1 <= 'Z') || (c1 >= 'a' && c1 <= 'z'))
&& c2 = ':'
then (
Buffer.add_char buff (Char.lowercase_ascii c1);
Buffer.add_substring buff path 2 (String.length path - 2))
else Buffer.add_string buff path);
Buffer.contents buff
in
if !Private.win32 then slash_to_backslash path else path

let of_string = Uri_lexer.of_string

let encode ?(allow_slash = false) s =
let allowed_chars = if allow_slash then "/" else "" in
Uri.pct_encode ~component:(`Custom (`Generic, allowed_chars, "")) s

let to_string { scheme; authority; path } =
let b = Buffer.create 64 in
scheme
|> Option.iter (fun s ->
Buffer.add_string b s;
Buffer.add_char b ':');
Buffer.add_string b "//";
Buffer.add_string b authority;
if not (String.is_prefix path ~prefix:"/") then Buffer.add_char b '/';
Buffer.add_string b path;
Buffer.contents b
let buff = Buffer.create 64 in

if not (String.is_empty scheme) then (
Buffer.add_string buff scheme;
Buffer.add_char buff ':');

if authority = "file" || scheme = "file" then Buffer.add_string buff "//";

(*TODO: implement full logic:
https://github.com/microsoft/vscode-uri/blob/96acdc0be5f9d5f2640e1c1f6733bbf51ec95177/src/uri.ts#L605 *)
(if not (String.is_empty authority) then
let s = String.lowercase_ascii authority in
Buffer.add_string buff (encode s));

(if not (String.is_empty path) then
let encode = encode ~allow_slash:true in
let encoded_colon = "%3A" in
let len = String.length path in
if len >= 3 && path.[0] = '/' && path.[2] = ':' then (
let drive_letter = Char.lowercase_ascii path.[1] in
if drive_letter >= 'a' && drive_letter <= 'z' then (
Buffer.add_char buff '/';
Buffer.add_char buff drive_letter;
Buffer.add_string buff encoded_colon;
let s = String.sub path ~pos:3 ~len:(len - 3) in
Buffer.add_string buff (encode s)))
else if len >= 2 && path.[1] = ':' then (
let drive_letter = Char.lowercase_ascii path.[0] in
if drive_letter >= 'a' && drive_letter <= 'z' then (
Buffer.add_char buff drive_letter;
Buffer.add_string buff encoded_colon;
let s = String.sub path ~pos:2 ~len:(len - 2) in
Buffer.add_string buff (encode s)))
else Buffer.add_string buff (encode path));

Buffer.contents buff

let yojson_of_t t = `String (to_string t)

let t_of_yojson json = Json.Conv.string_of_yojson json |> of_string

let equal = ( = )

let compare (x : t) (y : t) = Stdlib.compare x y
Expand All @@ -35,23 +113,7 @@ let hash = Hashtbl.hash
let to_dyn { scheme; authority; path } =
let open Dyn in
record
[ ("scheme", (option string) scheme)
[ ("scheme", string scheme)
; ("authority", string authority)
; ("path", string path)
]

let to_path t =
let path =
t.path
|> String.replace_all ~pattern:"\\" ~with_:"/"
|> String.replace_all ~pattern:"%5C" ~with_:"/"
|> String.replace_all ~pattern:"%3A" ~with_:":"
|> String.replace_all ~pattern:"%20" ~with_:" "
|> String.replace_all ~pattern:"%3D" ~with_:"="
|> String.replace_all ~pattern:"%3F" ~with_:"?"
in
if !Private.win32 then path else Filename.concat "/" path

let of_path (path : string) =
let path = Uri_lexer.escape_path path in
{ path; scheme = Some "file"; authority = "" }
4 changes: 2 additions & 2 deletions lsp/src/uri_lexer.mli
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
type t =
{ scheme : string option
{ scheme : string
; authority : string
; path : string
}

val of_string : string -> t

val escape_path : string -> string
val of_path : string -> t
55 changes: 31 additions & 24 deletions lsp/src/uri_lexer.mll
Original file line number Diff line number Diff line change
@@ -1,38 +1,45 @@
{
type t =
{ scheme : string option
{ scheme : string
; authority : string
; path : string
}
}

rule path = parse
| '/'? { path1 (Buffer.create 12) lexbuf }
and path1 buf = parse
| '\\' { Buffer.add_char buf '/' ; path1 buf lexbuf }
| "%5" ['c' 'C'] { Buffer.add_char buf '/' ; path1 buf lexbuf }
| "%3" ['a' 'A'] { Buffer.add_char buf ':' ; path1 buf lexbuf }
| "%3" ['d' 'D'] { Buffer.add_char buf '=' ; path1 buf lexbuf }
| "%3" ['f' 'F'] { Buffer.add_char buf '?' ; path1 buf lexbuf }
| "%20" { Buffer.add_char buf ' ' ; path1 buf lexbuf }
| _ as c { Buffer.add_char buf c ; path1 buf lexbuf }
| eof { Buffer.contents buf }
rule uri = parse
([^':''/''?''#']+ as scheme ':') ?
("//" ([^'/''?''#']* as authority)) ?
([^'?''#']* as path)
{
let open Import in
let scheme = scheme |> Option.value ~default:"file" in
let authority =
authority |> Option.map Uri.pct_decode |> Option.value ~default:""
in
let path =
let path = path |> Uri.pct_decode in
match scheme with
| "http" | "https" | "file" ->
String.add_prefix_if_not_exists path ~prefix:"/"
| _ -> path
in
{ scheme; authority; path; }
}

and uri = parse
| ([^ ':']+) as scheme ':' { uri1 (Some scheme) lexbuf }
| "" { uri1 None lexbuf }
and uri1 scheme = parse
| "//" ([^ '/']* as authority) { uri2 scheme authority lexbuf }
| "" { uri2 scheme "" lexbuf }
and uri2 scheme authority = parse
| "" { { scheme ; authority ; path = path lexbuf } }
and path = parse
| "" { { scheme = "file"; authority = ""; path = "/" } }
| "//" ([^ '/']* as authority) (['/']_* as path) { { scheme = "file"; authority; path } }
| "//" ([^ '/']* as authority) { { scheme = "file"; authority; path = "/" } }
| ("/" _* as path) { { scheme = "file"; authority = ""; path } }
| (_* as path) { { scheme = "file"; authority = ""; path = "/" ^ path } }

{
let escape_path s =
let lexbuf = Lexing.from_string s in
path lexbuf

{
let of_string s =
let lexbuf = Lexing.from_string s in
uri lexbuf

let of_path s =
let lexbuf = Lexing.from_string s in
path lexbuf
}
Loading