Skip to content

Commit

Permalink
Cache directory targets
Browse files Browse the repository at this point in the history
This adds support for caching directory targets.

Signed-off-by: Roman Leshchinskiy <[email protected]>
  • Loading branch information
Roman Leshchinskiy committed Dec 19, 2023
1 parent b56aadb commit b39b554
Show file tree
Hide file tree
Showing 10 changed files with 149 additions and 101 deletions.
147 changes: 91 additions & 56 deletions src/dune_cache/local.ml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ module Artifacts = struct
let entries =
Targets.Produced.foldi artifacts ~init:[] ~f:(fun target file_digest entries ->
let entry : Metadata_entry.t =
{ file_name = Path.Build.basename target; file_digest }
{ file_path = Path.Local.to_string target; file_digest }
in
entry :: entries)
|> List.rev
Expand All @@ -95,16 +95,20 @@ module Artifacts = struct
If any of the targets couldn't be stored in the temporary directory, then
the result is [Error] with the corresponding exception. Otherwise, the
result is [Ok ()]. *)
let store_targets_to ~temp_dir ~targets ~mode : unit Or_exn.t =
let store_targets_to ~temp_dir ~(targets : _ Targets.Produced.t) ~mode : unit Or_exn.t =
let portable_hardlink_or_copy =
match (mode : Dune_cache_storage.Mode.t) with
| Hardlink -> Io.portable_hardlink
| Copy -> fun ~src ~dst -> Io.copy_file ~src ~dst ()
in
Result.try_with (fun () ->
(* CR-someday rleshchinskiy: We recreate the directory structure here but it might be
simpler to just use file digests instead of file names and no subdirectories. *)
Path.Local.Map.iteri targets.dirs ~f:(fun path _ ->
Path.mkdir_p (Path.append_local temp_dir path));
Targets.Produced.iteri targets ~f:(fun path _ ->
let path_in_build_dir = Path.build path in
let path_in_temp_dir = Path.relative temp_dir (Path.basename path_in_build_dir) in
let path_in_build_dir = Path.build (Path.Build.append_local targets.root path) in
let path_in_temp_dir = Path.append_local temp_dir path in
portable_hardlink_or_copy ~src:path_in_build_dir ~dst:path_in_temp_dir))
;;

Expand All @@ -117,7 +121,7 @@ module Artifacts = struct
let open Fiber.O in
Fiber.collect_errors (fun () ->
Targets.Produced.parallel_map targets ~f:(fun path { Target.executable } ->
let file = Path.relative temp_dir (Path.Build.basename path) in
let file = Path.append_local temp_dir path in
compute_digest ~executable file))
>>| Result.map_error ~f:(function
| exn :: _ -> exn.Exn_with_backtrace.exn
Expand All @@ -130,8 +134,7 @@ module Artifacts = struct
artifacts
~init:Store_result.empty
~f:(fun target digest results ->
let file_name = Path.Build.basename target in
let path_in_temp_dir = Path.relative temp_dir file_name in
let path_in_temp_dir = Path.append_local temp_dir target in
let path_in_cache = file_path ~file_digest:digest in
let store_using_hardlinks () =
match
Expand All @@ -143,7 +146,9 @@ module Artifacts = struct
(* We end up here if the cache already contains an entry for this
artifact. We deduplicate by keeping only one copy, in the
cache. *)
let path_in_build_dir = Path.build target in
let path_in_build_dir =
Path.build (Path.Build.append_local artifacts.root target)
in
(match
Path.unlink_no_err path_in_temp_dir;
(* At first, we deduplicate the temporary file. Doing this
Expand Down Expand Up @@ -219,63 +224,93 @@ module Artifacts = struct
Store_artifacts_result.of_store_result ~artifacts result)
;;

let create_all_or_nothing ~create ~destroy targets =
Targets.Produced.foldi targets ~init:(Ok []) ~f:(fun target digest ->
function
| Ok created ->
(match create target digest with
| Error error ->
List.iter created ~f:destroy;
Error error
| Ok () -> Ok (target :: created))
| Error _ as error -> error)
|> Result.map ~f:(fun _ -> ())
;;
module File_restore = struct
exception E of Digest.t Targets.Produced.t Restore_result.t

module Unwind : sig
type t

val make : unit -> t
val push : t -> (unit -> unit) -> unit
val unwind : t -> unit
end = struct
type t = (unit -> unit) list ref

let make () = ref []
let push t f = t := f :: !t

let unwind t =
List.iter !t ~f:(fun f ->
try f () with
| _ -> ());
t := []
;;
end

type file_restore_error =
| Not_found
| Other of exn
let hardlink ~src ~dst =
try link_even_if_there_are_too_many_links_already ~src ~dst with
| Unix.Unix_error (Unix.ENOENT, _, _) -> raise_notrace (E Not_found_in_cache)
| exn -> raise_notrace (E (Error exn))
;;

let copy ~src ~dst =
try Io.copy_file ~src ~dst () with
| Sys_error _ -> raise_notrace (E Not_found_in_cache)
;;

let create_all_or_none
(mode : Dune_cache_storage.Mode.t)
(artifacts : _ Targets.Produced.t)
=
let unwind = Unwind.make () in
let rec mk_dir (dir : Path.Local.t) =
(match Path.Local.parent dir with
| Some parent when not (Path.Local.is_root parent) -> mk_dir parent
| Some _ | None -> ());
let path = Path.build (Path.Build.append_local artifacts.root dir) in
if not (Path.exists path)
then (
Path.mkdir_p path;
Unwind.push unwind (fun () -> Path.rmdir path))
in
let mk_file file file_digest =
let target = Path.Build.append_local artifacts.root file in
let dst = Path.build target in
let src = file_path ~file_digest in
(match mode with
| Hardlink -> hardlink ~src ~dst
| Copy -> copy ~src ~dst);
Unwind.push unwind (fun () -> Path.Build.unlink_no_err target)
in
try
Path.Local.Map.iteri artifacts.dirs ~f:(fun dir _ -> mk_dir dir);
Targets.Produced.iteri artifacts ~f:mk_file
with
| exn ->
Unwind.unwind unwind;
reraise exn
;;
end

let restore ~mode ~rule_digest ~target_dir =
Restore_result.bind (list ~rule_digest) ~f:(fun (entries : Metadata_entry.t list) ->
let artifacts =
Filename.Map.of_list_map_exn
Path.Local.Map.of_list_map_exn
entries
~f:(fun { Metadata_entry.file_name; file_digest } -> file_name, file_digest)
~f:(fun { Metadata_entry.file_path; file_digest } ->
Path.Local.of_string file_path, file_digest)
|> Targets.Produced.of_files target_dir
in
match
create_all_or_nothing
artifacts
~destroy:Path.Build.unlink_no_err
~create:(fun path_in_build_dir file_digest ->
let path_in_cache = file_path ~file_digest in
match (mode : Dune_cache_storage.Mode.t) with
| Hardlink ->
(match
link_even_if_there_are_too_many_links_already
~src:path_in_cache
~dst:(Path.build path_in_build_dir)
with
| exception Unix.Unix_error (Unix.ENOENT, _, _) ->
Error (Not_found : file_restore_error)
| exception exn -> Error (Other exn)
| () -> Ok ())
| Copy ->
(match
Io.copy_file ~src:path_in_cache ~dst:(Path.build path_in_build_dir) ()
with
| exception Sys_error _ -> Error Not_found
| () -> Ok ()))
try
File_restore.create_all_or_none mode artifacts;
Restored artifacts
with
| Ok () -> Restored artifacts
| Error Not_found ->
(* We reach this point when one of the entries mentioned in the
metadata is missing. The trimmer will eventually delete such
"broken" metadata, so it is reasonable to consider that this
[rule_digest] is not found in the cache. *)
Not_found_in_cache
| Error (Other e) -> Error e)
| File_restore.E result ->
(* If [result] is [Not_found_in_cache] then one of the entries mentioned in
the metadata is missing. The trimmer will eventually delete such "broken"
metadata, so it is reasonable to consider that this [rule_digest] is not
found in the cache. *)
result)
;;
end

Expand Down
12 changes: 6 additions & 6 deletions src/dune_cache_storage/dune_cache_storage.ml
Original file line number Diff line number Diff line change
Expand Up @@ -215,22 +215,22 @@ end
module Artifacts = struct
module Metadata_entry = struct
type t =
{ file_name : string
{ file_path : string
; file_digest : Digest.t
}

let equal x y =
Digest.equal x.file_digest y.file_digest && String.equal x.file_name y.file_name
Digest.equal x.file_digest y.file_digest && String.equal x.file_path y.file_path
;;

let to_sexp { file_name; file_digest } =
Sexp.List [ Atom file_name; Atom (Digest.to_string file_digest) ]
let to_sexp { file_path; file_digest } =
Sexp.List [ Atom file_path; Atom (Digest.to_string file_digest) ]
;;

let of_sexp = function
| Sexp.List [ Atom file_name; Atom file_digest ] ->
| Sexp.List [ Atom file_path; Atom file_digest ] ->
(match Digest.from_hex file_digest with
| Some file_digest -> Ok { file_name; file_digest }
| Some file_digest -> Ok { file_path; file_digest }
| None ->
Error
(Failure
Expand Down
2 changes: 1 addition & 1 deletion src/dune_cache_storage/dune_cache_storage.mli
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ end
module Artifacts : sig
module Metadata_entry : sig
type t =
{ file_name : string
{ file_path : string (** Can have more than one component for directory targets *)
; file_digest : Digest.t
}
end
Expand Down
2 changes: 0 additions & 2 deletions src/dune_engine/build_system.ml
Original file line number Diff line number Diff line change
Expand Up @@ -517,10 +517,8 @@ end = struct
let rule_digest =
compute_rule_digest rule ~deps ~action ~sandbox_mode ~execution_parameters
in
(* CR-someday amokhov: Add support for rules with directory targets. *)
let can_go_in_shared_cache =
action.can_go_in_shared_cache
&& Filename.Set.is_empty targets.dirs
&& (not
(always_rerun
|| is_action_dynamic
Expand Down
19 changes: 11 additions & 8 deletions src/dune_shared_cache/dune_shared_cache.ml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ struct
(* If this function fails to store the rule to the shared cache, it returns
[None] because we don't want this to be a catastrophic error. We simply log
this incident and continue without saving the rule to the shared cache. *)
let try_to_store_to_shared_cache ~mode ~rule_digest ~action ~targets
let try_to_store_to_shared_cache ~mode ~rule_digest ~action ~produced_targets
: Digest.t Targets.Produced.t option Fiber.t
=
let open Fiber.O in
Expand All @@ -136,15 +136,18 @@ struct
]
in
let update_cached_digests ~targets_and_digests =
Targets.Produced.iteri targets_and_digests ~f:Cached_digest.set
Targets.Produced.iteri targets_and_digests ~f:(fun path digest ->
Cached_digest.set (Path.Build.append_local targets_and_digests.root path) digest)
in
match
(* CR-soon rleshchinskiy: Don't drop directory targets here. *)
Targets.Produced.drop_dirs targets
|> Targets.Produced.map_with_errors ~all_errors:false ~f:(fun target () ->
match Dune_cache.Local.Target.create target with
| Some t -> Ok t
| None -> Error ())
Targets.Produced.map_with_errors
produced_targets
~all_errors:false
~f:(fun target () ->
match Dune_cache.Local.Target.create target with
| Some t -> Ok t
| None -> Error ())
with
| Error _ -> Fiber.return None
| Ok targets ->
Expand Down Expand Up @@ -299,7 +302,7 @@ struct
when can_go_in_shared_cache ->
let open Fiber.O in
let+ produced_targets_with_digests =
try_to_store_to_shared_cache ~mode ~rule_digest ~targets:produced_targets ~action
try_to_store_to_shared_cache ~mode ~rule_digest ~produced_targets ~action
in
(match produced_targets_with_digests with
| Some produced_targets_with_digests -> produced_targets_with_digests
Expand Down
41 changes: 28 additions & 13 deletions src/dune_targets/dune_targets.ml
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,26 @@ module Produced = struct
Ok { root = validated.root; files; dirs }
;;

let of_files root files = { root; files; dirs = Path.Local.Map.empty }
let drop_dirs t = { t with dirs = Path.Local.Map.empty }
let of_files root files =
let f file payload t =
let parent = Path.Local.parent_exn file in
if Path.Local.is_root parent
then
{ t with
files = Filename.Map.add_exn t.files (Path.Local.to_string file) payload
}
else (
let fn = Path.Local.basename file in
{ t with
dirs =
Path.Local.Map.update t.dirs parent ~f:(fun files ->
let files = Option.value files ~default:Filename.Map.empty in
Some (Filename.Map.add_exn files fn payload))
})
in
let init = { root; files = Filename.Map.empty; dirs = Path.Local.Map.empty } in
Path.Local.Map.foldi files ~init ~f
;;

let all_files_seq { root = _; files; dirs } =
Seq.append
Expand Down Expand Up @@ -325,23 +343,21 @@ module Produced = struct
Filename.Map.exists files ~f || Path.Local.Map.exists dirs ~f:(String.Map.exists ~f)
;;

let foldi { root; files; dirs } ~init ~f =
let foldi { root = _; files; dirs } ~init ~f =
let acc =
Filename.Map.foldi files ~init ~f:(fun file acc ->
f (Path.Build.relative root file) acc)
f (Path.Local.of_string file) acc)
in
Path.Local.Map.foldi dirs ~init:acc ~f:(fun dir filenames acc ->
let dir = Path.Build.append_local root dir in
String.Map.foldi filenames ~init:acc ~f:(fun filename payload acc ->
f (Path.Build.relative dir filename) payload acc))
f (Path.Local.relative dir filename) payload acc))
;;

let iteri { root; files; dirs } ~f =
Filename.Map.iteri files ~f:(fun file acc -> f (Path.Build.relative root file) acc);
let iteri { root = _; files; dirs } ~f =
Filename.Map.iteri files ~f:(fun file acc -> f (Path.Local.of_string file) acc);
Path.Local.Map.iteri dirs ~f:(fun dir filenames ->
let dir = Path.Build.append_local root dir in
String.Map.iteri filenames ~f:(fun filename payload ->
f (Path.Build.relative dir filename) payload))
f (Path.Local.relative dir filename) payload))
;;

module Path_traversal = Fiber.Make_map_traversals (Path.Local.Map)
Expand All @@ -353,12 +369,11 @@ module Produced = struct
Fiber.fork_and_join
(fun () ->
Filename_traversal.parallel_map files ~f:(fun file ->
f (Path.Build.relative root file)))
f (Path.Local.of_string file)))
(fun () ->
Path_traversal.parallel_map dirs ~f:(fun dir files ->
let dir = Path.Build.append_local root dir in
Filename_traversal.parallel_map files ~f:(fun file payload ->
f (Path.Build.relative dir file) payload)))
f (Path.Local.relative dir file) payload)))
in
{ root; files; dirs }
;;
Expand Down
13 changes: 5 additions & 8 deletions src/dune_targets/dune_targets.mli
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,8 @@ module Produced : sig
and collecting all contained files. *)
val of_validated : Validated.t -> (unit t, Error.t) result

(** Populates only the [files] field, leaving [dirs] empty. *)
val of_files : Path.Build.t -> 'a Filename.Map.t -> 'a t

(** Drop all directory targets, leaving only file targets. *)
val drop_dirs : 'a t -> 'a t
(** Construct from a set of files in the root directory. *)
val of_files : Path.Build.t -> 'a Path.Local.Map.t -> 'a t

(** Union of [t.files] and all files in [t.dirs] as [Seq.t] for efficient traversal.
The resulting [Path.Local.t]s are relative to [t.root]. *)
Expand All @@ -117,9 +114,9 @@ module Produced : sig

val equal : 'a t -> 'a t -> equal:('a -> 'a -> bool) -> bool
val exists : 'a t -> f:('a -> bool) -> bool
val foldi : 'a t -> init:'acc -> f:(Path.Build.t -> 'a -> 'acc -> 'acc) -> 'acc
val iteri : 'a t -> f:(Path.Build.t -> 'a -> unit) -> unit
val parallel_map : 'a t -> f:(Path.Build.t -> 'a -> 'b Fiber.t) -> 'b t Fiber.t
val foldi : 'a t -> init:'acc -> f:(Path.Local.t -> 'a -> 'acc -> 'acc) -> 'acc
val iteri : 'a t -> f:(Path.Local.t -> 'a -> unit) -> unit
val parallel_map : 'a t -> f:(Path.Local.t -> 'a -> 'b Fiber.t) -> 'b t Fiber.t

(** Aggregate all content digests. *)
val digest : Digest.t t -> Digest.t
Expand Down
Loading

0 comments on commit b39b554

Please sign in to comment.