Skip to content

Commit

Permalink
refactor: detect large files in Io functions
Browse files Browse the repository at this point in the history
`Io.read_all` and related functions read the contents of a file in a
string, which has a size limit (`Sys.max_string_length`) and can be an
issue in 32-bit systems. This makes an explicit check and raises a
`Code_error` in these situations.

Signed-off-by: Etienne Millon <[email protected]>
  • Loading branch information
emillon committed Jan 24, 2024
1 parent 8305a5b commit 81d9c1e
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 6 deletions.
19 changes: 15 additions & 4 deletions otherlibs/stdune/src/io.ml
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ struct
if r = len then Bytes.unsafe_to_string buf else Bytes.sub_string buf ~pos:0 ~len:r
;;

let read_all =
let read_all_unless_large =
(* We use 65536 because that is the size of OCaml's IO buffers. *)
let chunk_size = 65536 in
(* Generic function for channels such that seeking is unsupported or
Expand All @@ -286,7 +286,7 @@ struct
loop ()
in
try loop () with
| End_of_file -> Buffer.contents buffer
| End_of_file -> Ok (Buffer.contents buffer)
in
fun t ->
(* Optimisation for regular files: if the channel supports seeking, we
Expand All @@ -295,6 +295,7 @@ struct
regular files so this optimizations seems worth it. *)
match in_channel_length t with
| exception Sys_error _ -> read_all_generic t (Buffer.create chunk_size)
| n when n > Sys.max_string_length -> Error ()
| n ->
(* For some files [in_channel_length] returns an invalid value. For
instance for files in /proc it returns [0] and on Windows the
Expand All @@ -307,7 +308,7 @@ struct
end of the file *)
let s = eagerly_input_string t n in
(match input_char t with
| exception End_of_file -> s
| exception End_of_file -> Ok s
| c ->
(* The [+ chunk_size] is to make sure there is at least [chunk_size]
free space so that the first [Buffer.add_channel buffer t
Expand All @@ -318,7 +319,17 @@ struct
read_all_generic t buffer)
;;

let read_file ?binary fn = with_file_in fn ~f:read_all ?binary
let path_to_dyn path = String.to_dyn (Path.to_string path)

let read_file ?binary fn =
match with_file_in fn ~f:read_all_unless_large ?binary with
| Ok x -> x
| Error () ->
Code_error.raise
"read_file: file is larger than Sys.max_string_length"
[ "fn", path_to_dyn fn ]
;;

let lines_of_file fn = with_file_in fn ~f:input_lines ~binary:false
let zero_strings_of_file fn = with_file_in fn ~f:input_zero_separated ~binary:true

Expand Down
12 changes: 11 additions & 1 deletion otherlibs/stdune/src/io.mli
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,17 @@ val input_lines : in_channel -> string list
unrelated channels because it uses a statically-allocated global buffer. *)
val copy_channels : in_channel -> out_channel -> unit

val read_all : in_channel -> string
(** Try to read everything from a channel. Returns [Error ()] if the contents
are larger than [Sys.max_string_length]. This is generally a problem only
on 32-bit systems.
Overflow detection does not happen in the following cases:
- channel is not a file (for example, a pipe)
- if the detected size is unreliable (/proc)
- race condition with another process changing the size of the underlying
file.
In these cases, an exception might be raised by [Buffer] functions.
*)
val read_all_unless_large : in_channel -> (string, unit) result

include Io_intf.S with type path = Path.t
module String_path : Io_intf.S with type path = string
Expand Down
6 changes: 5 additions & 1 deletion test/blackbox-tests/utils/melc_stdlib_prefix.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ open Stdune

let command cmd args =
let p = Unix.open_process_args_in cmd (Array.of_list (cmd :: args)) in
let output = Io.read_all p in
let output =
match Io.read_all_unless_large p with
| Ok x -> x
| Error () -> assert false
in
match Unix.close_process_in p with
| WEXITED n when n = 0 -> Ok output
| WEXITED n -> Error n
Expand Down

0 comments on commit 81d9c1e

Please sign in to comment.