From f356dc6c474e93ae783be633d762bf4611515b94 Mon Sep 17 00:00:00 2001 From: Alain Mebsout Date: Wed, 17 Jul 2024 16:13:31 +0200 Subject: [PATCH 1/4] Build: add camlp-stream dependency to stdlib-unix library --- manifest/externals.ml | 2 ++ manifest/product_octez.ml | 1 + opam/octez-libs.opam | 1 + opam/virtual/octez-deps.opam | 1 + src/lib_stdlib_unix/dune | 3 ++- 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/manifest/externals.ml b/manifest/externals.ml index 8e358b2f66c7..054e5021028b 100644 --- a/manifest/externals.ml +++ b/manifest/externals.ml @@ -48,6 +48,8 @@ let bigstringaf = external_lib "bigstringaf" V.(at_least "0.5.0") let bisect_ppx = opam_only "bisect_ppx" V.(at_least "2.7.0") +let camlp_streams = external_lib "camlp-streams" V.(at_least "5.0.1") + let camlzip = external_lib "camlzip" V.(at_least "1.11" && less_than "1.12") let caqti = external_lib "caqti" V.True diff --git a/manifest/product_octez.ml b/manifest/product_octez.ml index a55881a7bb40..f9083674def2 100644 --- a/manifest/product_octez.ml +++ b/manifest/product_octez.ml @@ -1463,6 +1463,7 @@ let octez_stdlib_unix = conf_libev; uri; progress; + camlp_streams; ] let _octez_stdlib_unix_test = diff --git a/opam/octez-libs.opam b/opam/octez-libs.opam index 98db7bf5fcf1..9ee0d3a65dbe 100644 --- a/opam/octez-libs.opam +++ b/opam/octez-libs.opam @@ -57,6 +57,7 @@ depends: [ "mtime" { >= "2.0.0" } "conf-libev" "progress" { >= "0.1.0" } + "camlp-streams" { >= "5.0.1" } "uutf" "lwt-exit" "ppxlib" diff --git a/opam/virtual/octez-deps.opam b/opam/virtual/octez-deps.opam index 95c1da8bc263..2a239aa3f366 100644 --- a/opam/virtual/octez-deps.opam +++ b/opam/virtual/octez-deps.opam @@ -21,6 +21,7 @@ depends: [ "bigstring" "bigstringaf" { >= "0.5.0" } "bisect_ppx" { >= "2.7.0" } + "camlp-streams" { >= "5.0.1" } "camlzip" { >= "1.11" & < "1.12" } "caqti" "caqti-driver-sqlite3" { >= "2.0.1" } diff --git a/src/lib_stdlib_unix/dune b/src/lib_stdlib_unix/dune index 835054e6159d..26ef36efbe79 100644 --- a/src/lib_stdlib_unix/dune +++ b/src/lib_stdlib_unix/dune @@ -22,7 +22,8 @@ mtime mtime.clock.os uri - progress) + progress + camlp-streams) (flags (:standard) -open Tezos_error_monad -- GitLab From 7a9a9dcbb999c633f328a655e716204c04a1d09f Mon Sep 17 00:00:00 2001 From: Alain Mebsout Date: Wed, 17 Jul 2024 16:20:52 +0200 Subject: [PATCH 2/4] lib_stdlib_unix: move file copy utilities (non-lwt) --- src/lib_smart_rollup_node/snapshot_utils.ml | 95 ++-------------- src/lib_smart_rollup_node/snapshot_utils.mli | 7 -- src/lib_smart_rollup_node/snapshots.ml | 9 +- src/lib_stdlib_unix/utils.ml | 107 +++++++++++++++---- src/lib_stdlib_unix/utils.mli | 56 ++++++---- 5 files changed, 134 insertions(+), 140 deletions(-) diff --git a/src/lib_smart_rollup_node/snapshot_utils.ml b/src/lib_smart_rollup_node/snapshot_utils.ml index 2cdc2089bbdd..b096e2dd22b4 100644 --- a/src/lib_smart_rollup_node/snapshot_utils.ml +++ b/src/lib_smart_rollup_node/snapshot_utils.ml @@ -127,47 +127,15 @@ let read_snapshot_metadata (module Reader : READER_INPUT) = assert (snapshot_version = version) ; Data_encoding.Binary.of_bytes_exn snaphsot_metadata_encoding metadata_bytes -let list_files dir ~include_file f = - let rec list_files_in_dir stream - ((dir, relative_dir, dir_handle) as current_dir_info) = - match Unix.readdir dir_handle with - | "." | ".." -> list_files_in_dir stream current_dir_info - | basename -> - let full_path = Filename.concat dir basename in - let relative_path = Filename.concat relative_dir basename in - let stream = - if Sys.is_directory full_path then - let sub_dir_handle = Unix.opendir full_path in - list_files_in_dir stream (full_path, relative_path, sub_dir_handle) - else if include_file ~relative_path then - Stream.icons (f ~full_path ~relative_path) stream - else stream - in - list_files_in_dir stream current_dir_info - | exception End_of_file -> - Unix.closedir dir_handle ; - stream - in - let dir_handle = Unix.opendir dir in - list_files_in_dir Stream.sempty (dir, "", dir_handle) - -let total_bytes_to_export dir ~include_file = - let file_stream = - list_files dir ~include_file @@ fun ~full_path ~relative_path:_ -> - let {Unix.st_size; _} = Unix.lstat full_path in - st_size - in - let total = ref 0 in - Stream.iter (fun size -> total := !total + size) file_stream ; - !total - let create (module Reader : READER) (module Writer : WRITER) metadata ~dir ~include_file ~dest = let module Archive_writer = Tar.Make (struct include Reader include Writer end) in - let total = total_bytes_to_export dir ~include_file in + let total = + Tezos_stdlib_unix.Utils.directory_contents_size dir ~include_file + in let progress_bar = Progress_bar.progress_bar ~counter:`Bytes @@ -195,7 +163,8 @@ let create (module Reader : READER) (module Writer : WRITER) metadata ~dir raise e in let file_stream = - list_files dir ~include_file @@ fun ~full_path ~relative_path -> + Tezos_stdlib_unix.Utils.list_files dir ~include_file + @@ fun ~full_path ~relative_path -> let {Unix.st_perm; st_size; st_mtime; _} = Unix.lstat full_path in let header = Tar.Header.make @@ -222,58 +191,6 @@ let create (module Reader : READER) (module Writer : WRITER) metadata ~dir Writer.close_out out_chan ; raise e -let rec create_dir ?(perm = 0o755) dir = - let stat = - try Some (Unix.stat dir) with Unix.Unix_error (ENOENT, _, _) -> None - in - match stat with - | Some {st_kind = S_DIR; _} -> () - | Some _ -> Stdlib.failwith "Not a directory" - | None -> ( - create_dir ~perm (Filename.dirname dir) ; - try Unix.mkdir dir perm - with Unix.Unix_error (EEXIST, _, _) -> - (* This is the case where the directory has been created at the same - time. *) - ()) - -let copy_file ~src ~dst = - let in_chan = open_in src in - let out_chan = open_out dst in - try - let buffer_size = 64 * 1024 in - let buf = Bytes.create buffer_size in - let rec copy () = - let read_bytes = input in_chan buf 0 buffer_size in - output out_chan buf 0 read_bytes ; - if read_bytes > 0 then copy () - in - copy () ; - flush out_chan ; - close_in in_chan ; - close_out out_chan - with e -> - close_in in_chan ; - close_out out_chan ; - raise e - -(* TODO: https://gitlab.com/tezos/tezos/-/issues/6857 - Use Lwt_utils_unix.copy_dir instead when file descriptors issue is fixed. *) -let copy_dir ?(perm = 0o755) src dst = - create_dir ~perm dst ; - let files = - list_files src ~include_file:(fun ~relative_path:_ -> true) - @@ fun ~full_path ~relative_path -> - let dst_file = Filename.concat dst relative_path in - (full_path, dst_file) - in - Stream.iter - (fun (src, dst) -> - let dst_dir = Filename.dirname dst in - create_dir ~perm dst_dir ; - copy_file ~src ~dst) - files - let extract (module Reader : READER) (module Writer : WRITER) metadata_check ~snapshot_file ~dest = let open Lwt_result_syntax in @@ -292,7 +209,7 @@ let extract (module Reader : READER) (module Writer : WRITER) metadata_check end) in let out_channel_of_header (header : Tar.Header.t) = let path = Filename.concat dest header.file_name in - create_dir (Filename.dirname path) ; + Tezos_stdlib_unix.Utils.create_dir (Filename.dirname path) ; Writer.open_out path in let in_chan = Reader.open_in snapshot_file in diff --git a/src/lib_smart_rollup_node/snapshot_utils.mli b/src/lib_smart_rollup_node/snapshot_utils.mli index 11ed298ba9f4..79e47d3a4ec0 100644 --- a/src/lib_smart_rollup_node/snapshot_utils.mli +++ b/src/lib_smart_rollup_node/snapshot_utils.mli @@ -72,10 +72,3 @@ val compress : snapshot_file:string -> string (** [read_metadata reader ~snapshot_file] reads the metadata from the snapshot file without extracting it. *) val read_metadata : reader -> snapshot_file:string -> snapshot_metadata - -(** [copy_file ~src ~dst] copies the file [src] to [dst]. *) -val copy_file : src:string -> dst:string -> unit - -(** [copy_dir ?perm src dst] copies the content of directory [src] in the - directory [dst] (created with [perm], [0o755] by default). *) -val copy_dir : ?perm:int -> string -> string -> unit diff --git a/src/lib_smart_rollup_node/snapshots.ml b/src/lib_smart_rollup_node/snapshots.ml index a4439d09a9af..b86340f44dc6 100644 --- a/src/lib_smart_rollup_node/snapshots.ml +++ b/src/lib_smart_rollup_node/snapshots.ml @@ -787,21 +787,24 @@ let export_compact cctxt ~no_checks ~compression ~data_dir ~dest ~filename = @@ Context.export_snapshot context first_block.context ~path:tmp_context_dir in let ( // ) = Filename.concat in + (* TODO: https://gitlab.com/tezos/tezos/-/issues/6857 + Use Lwt_utils_unix.copy_dir instead when file descriptors issue is fixed. *) let copy_dir a = let dir = data_dir // a in if Sys.file_exists dir && Sys.is_directory dir then - copy_dir dir (tmp_dir // a) + Tezos_stdlib_unix.Utils.copy_dir dir (tmp_dir // a) in let copy_file a = let path = data_dir // a in - if Sys.file_exists path then copy_file ~src:path ~dst:(tmp_dir // a) + if Sys.file_exists path then + Tezos_stdlib_unix.Utils.copy_file ~src:path ~dst:(tmp_dir // a) in Format.eprintf "Acquiring process lock@." ; let* () = Utils.with_lockfile (Node_context.processing_lockfile_path ~data_dir) @@ fun () -> Format.eprintf "Copying data@." ; - Snapshot_utils.copy_dir store_dir tmp_store_dir ; + Tezos_stdlib_unix.Utils.copy_dir store_dir tmp_store_dir ; copy_file "metadata" ; return_unit in diff --git a/src/lib_stdlib_unix/utils.ml b/src/lib_stdlib_unix/utils.ml index f5b20c671b78..02a6bf2998ed 100644 --- a/src/lib_stdlib_unix/utils.ml +++ b/src/lib_stdlib_unix/utils.ml @@ -1,25 +1,8 @@ (*****************************************************************************) (* *) -(* Open Source License *) -(* Copyright (c) 2018 Nomadic Labs *) -(* *) -(* Permission is hereby granted, free of charge, to any person obtaining a *) -(* copy of this software and associated documentation files (the "Software"),*) -(* to deal in the Software without restriction, including without limitation *) -(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *) -(* and/or sell copies of the Software, and to permit persons to whom the *) -(* Software is furnished to do so, subject to the following conditions: *) -(* *) -(* The above copyright notice and this permission notice shall be included *) -(* in all copies or substantial portions of the Software. *) -(* *) -(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*) -(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *) -(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *) -(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*) -(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *) -(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *) -(* DEALINGS IN THE SOFTWARE. *) +(* SPDX-License-Identifier: MIT *) +(* SPDX-FileCopyrightText: 2018-2024 Nomadic Labs *) +(* SPDX-FileCopyrightText: 2024 Functori *) (* *) (*****************************************************************************) @@ -38,3 +21,87 @@ let display_progress ?(refresh_rate = (1, 1)) msgf = let display_progress_end () = if Unix.isatty Unix.stderr then Format.eprintf "@." + +let list_files dir ?(include_file = fun ~relative_path:_ -> true) f = + let rec list_files_in_dir stream + ((dir, relative_dir, dir_handle) as current_dir_info) = + match Unix.readdir dir_handle with + | "." | ".." -> list_files_in_dir stream current_dir_info + | basename -> + let full_path = Filename.concat dir basename in + let relative_path = Filename.concat relative_dir basename in + let stream = + if Sys.is_directory full_path then + let sub_dir_handle = Unix.opendir full_path in + list_files_in_dir stream (full_path, relative_path, sub_dir_handle) + else if include_file ~relative_path then + Stream.icons (f ~full_path ~relative_path) stream + else stream + in + list_files_in_dir stream current_dir_info + | exception End_of_file -> + Unix.closedir dir_handle ; + stream + in + let dir_handle = Unix.opendir dir in + list_files_in_dir Stream.sempty (dir, "", dir_handle) + +let directory_contents_size ?include_file dir = + let file_stream = + list_files dir ?include_file @@ fun ~full_path ~relative_path:_ -> + let {Unix.st_size; _} = Unix.lstat full_path in + st_size + in + let total = ref 0 in + Stream.iter (fun size -> total := !total + size) file_stream ; + !total + +let rec create_dir ?(perm = 0o755) dir = + let stat = + try Some (Unix.stat dir) with Unix.Unix_error (ENOENT, _, _) -> None + in + match stat with + | Some {st_kind = S_DIR; _} -> () + | Some _ -> Stdlib.failwith "Not a directory" + | None -> ( + create_dir ~perm (Filename.dirname dir) ; + try Unix.mkdir dir perm + with Unix.Unix_error (EEXIST, _, _) -> + (* This is the case where the directory has been created at the same + time. *) + ()) + +let copy_file ~src ~dst = + let in_chan = open_in src in + let out_chan = open_out dst in + try + let buffer_size = 64 * 1024 in + let buf = Bytes.create buffer_size in + let rec copy () = + let read_bytes = input in_chan buf 0 buffer_size in + output out_chan buf 0 read_bytes ; + if read_bytes > 0 then copy () + in + copy () ; + flush out_chan ; + close_in in_chan ; + close_out out_chan + with e -> + close_in in_chan ; + close_out out_chan ; + raise e + +let copy_dir ?(perm = 0o755) src dst = + create_dir ~perm dst ; + let files = + list_files src ~include_file:(fun ~relative_path:_ -> true) + @@ fun ~full_path ~relative_path -> + let dst_file = Filename.concat dst relative_path in + (full_path, dst_file) + in + Stream.iter + (fun (src, dst) -> + let dst_dir = Filename.dirname dst in + create_dir ~perm dst_dir ; + copy_file ~src ~dst) + files diff --git a/src/lib_stdlib_unix/utils.mli b/src/lib_stdlib_unix/utils.mli index 23f8e6bb64f0..70d8408de993 100644 --- a/src/lib_stdlib_unix/utils.mli +++ b/src/lib_stdlib_unix/utils.mli @@ -1,35 +1,20 @@ (*****************************************************************************) (* *) -(* Open Source License *) -(* Copyright (c) 2018 Nomadic Labs *) -(* *) -(* Permission is hereby granted, free of charge, to any person obtaining a *) -(* copy of this software and associated documentation files (the "Software"),*) -(* to deal in the Software without restriction, including without limitation *) -(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *) -(* and/or sell copies of the Software, and to permit persons to whom the *) -(* Software is furnished to do so, subject to the following conditions: *) -(* *) -(* The above copyright notice and this permission notice shall be included *) -(* in all copies or substantial portions of the Software. *) -(* *) -(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*) -(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *) -(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *) -(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*) -(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *) -(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *) -(* DEALINGS IN THE SOFTWARE. *) +(* SPDX-License-Identifier: MIT *) +(* SPDX-FileCopyrightText: 2018-2024 Nomadic Labs *) +(* SPDX-FileCopyrightText: 2024 Functori *) (* *) (*****************************************************************************) +(** {2 Displaying progress} *) + (** Print over the current [stdout] line. Takes a message formatting function of the form [(fun m -> m )]. Message formatting occurs only when the line is actually printed (i.e. when [(fst refresh_rate) mod (snd refresh_rate) = 0]). [refresh_rate] defaults to always printing the supplied message. - {2 Examples:} + {3 Examples:} - [display_progress (fun m -> m "Loading... %d/100" percent)]. @@ -44,3 +29,32 @@ val display_progress : (** Finalizes progress display *) val display_progress_end : unit -> unit + +(** {2 Files manipulation} *) + +(** [list_files dir ~include_file f] lists the files in directory [dir] which + satisfy predicate [include_file] (by default all files) and applies [f] on + each element. The result is returned as a stream. *) +val list_files : + string -> + ?include_file:(relative_path:string -> bool) -> + (full_path:string -> relative_path:string -> 'a) -> + 'a Stream.t + +(** [directory_contents_size ~include_file] returns the total size of contents + of directory [dir] which satisfy the predicate [include_file]. *) +val directory_contents_size : + ?include_file:(relative_path:string -> bool) -> string -> int + +(** [create_dir ~perm path] creates directory [path] with permissions [perm] if + it does not exist. All directories in [path] are created if necessary, à la + [mkdir -p]. *) +val create_dir : ?perm:int -> string -> unit + +(** [copy_file ~src ~dst] copies the file [src] to [dst]. *) +val copy_file : src:string -> dst:string -> unit + +(** [copy_dir ?perm src dst] copies the content of + directory [src] in the directory [dst] (created with [perm], [0o755] by + default). *) +val copy_dir : ?perm:int -> string -> string -> unit -- GitLab From aa51f07f08b94405202b1f9fceb0ceb28c75cca1 Mon Sep 17 00:00:00 2001 From: Alain Mebsout Date: Wed, 17 Jul 2024 15:50:23 +0200 Subject: [PATCH 3/4] Rollup node: copy_dir utility can display a progress bar --- src/lib_stdlib_unix/utils.ml | 23 ++++++++++++++++++++--- src/lib_stdlib_unix/utils.mli | 8 +++++--- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/lib_stdlib_unix/utils.ml b/src/lib_stdlib_unix/utils.ml index 02a6bf2998ed..db205ce8923e 100644 --- a/src/lib_stdlib_unix/utils.ml +++ b/src/lib_stdlib_unix/utils.ml @@ -71,7 +71,7 @@ let rec create_dir ?(perm = 0o755) dir = time. *) ()) -let copy_file ~src ~dst = +let copy_file ~count_progress ~src ~dst = let in_chan = open_in src in let out_chan = open_out dst in try @@ -80,6 +80,7 @@ let copy_file ~src ~dst = let rec copy () = let read_bytes = input in_chan buf 0 buffer_size in output out_chan buf 0 read_bytes ; + count_progress read_bytes ; if read_bytes > 0 then copy () in copy () ; @@ -91,8 +92,22 @@ let copy_file ~src ~dst = close_out out_chan ; raise e -let copy_dir ?(perm = 0o755) src dst = +let copy_dir ?(perm = 0o755) ?progress src dst = create_dir ~perm dst ; + let maybe_report_progress = + match progress with + | None -> fun f -> f (fun _ -> ()) + | Some (message, color) -> + let total = + directory_contents_size src ~include_file:(fun ~relative_path:_ -> + true) + in + let progress_bar = + Progress_bar.progress_bar ~counter:`Bytes ~message ~color total + in + fun f -> Progress_bar.with_reporter progress_bar f + in + maybe_report_progress @@ fun count_progress -> let files = list_files src ~include_file:(fun ~relative_path:_ -> true) @@ fun ~full_path ~relative_path -> @@ -103,5 +118,7 @@ let copy_dir ?(perm = 0o755) src dst = (fun (src, dst) -> let dst_dir = Filename.dirname dst in create_dir ~perm dst_dir ; - copy_file ~src ~dst) + copy_file ~count_progress ~src ~dst) files + +let copy_file = copy_file ~count_progress:(fun _ -> ()) diff --git a/src/lib_stdlib_unix/utils.mli b/src/lib_stdlib_unix/utils.mli index 70d8408de993..ba8ac9b1dc3d 100644 --- a/src/lib_stdlib_unix/utils.mli +++ b/src/lib_stdlib_unix/utils.mli @@ -54,7 +54,9 @@ val create_dir : ?perm:int -> string -> unit (** [copy_file ~src ~dst] copies the file [src] to [dst]. *) val copy_file : src:string -> dst:string -> unit -(** [copy_dir ?perm src dst] copies the content of +(** [copy_dir ?perm ~progress:(message, color) src dst] copies the content of directory [src] in the directory [dst] (created with [perm], [0o755] by - default). *) -val copy_dir : ?perm:int -> string -> string -> unit + default). If [progress] is provided, a progress bar is displayed on terminal + outputs with the given message and color. *) +val copy_dir : + ?perm:int -> ?progress:string * Terminal.Color.t -> string -> string -> unit -- GitLab From 0ba7def97e54ccf20b9c6e90d0d7e06155269922 Mon Sep 17 00:00:00 2001 From: Alain Mebsout Date: Wed, 24 Jul 2024 15:16:30 +0200 Subject: [PATCH 4/4] lib_stdlib_unix: simplify list files functions --- src/lib_smart_rollup_node/snapshot_utils.ml | 40 +++++++++----- src/lib_stdlib_unix/utils.ml | 58 +++++++++------------ src/lib_stdlib_unix/utils.mli | 24 ++++----- 3 files changed, 63 insertions(+), 59 deletions(-) diff --git a/src/lib_smart_rollup_node/snapshot_utils.ml b/src/lib_smart_rollup_node/snapshot_utils.ml index b096e2dd22b4..42d273d8c0f7 100644 --- a/src/lib_smart_rollup_node/snapshot_utils.ml +++ b/src/lib_smart_rollup_node/snapshot_utils.ml @@ -133,8 +133,19 @@ let create (module Reader : READER) (module Writer : WRITER) metadata ~dir include Reader include Writer end) in + let files = + Tezos_stdlib_unix.Utils.list_files dir + |> List.filter (fun relative_path -> include_file ~relative_path) + in let total = - Tezos_stdlib_unix.Utils.directory_contents_size dir ~include_file + List.fold_left + (fun total relative_path -> + let {Unix.st_size; _} = + Unix.lstat (Filename.concat dir relative_path) + in + total + st_size) + 0 + files in let progress_bar = Progress_bar.progress_bar @@ -163,18 +174,21 @@ let create (module Reader : READER) (module Writer : WRITER) metadata ~dir raise e in let file_stream = - Tezos_stdlib_unix.Utils.list_files dir ~include_file - @@ fun ~full_path ~relative_path -> - let {Unix.st_perm; st_size; st_mtime; _} = Unix.lstat full_path in - let header = - Tar.Header.make - ~file_mode:st_perm - ~mod_time:(Int64.of_float st_mtime) - relative_path - (Int64.of_int st_size) - in - let writer = write_file full_path in - (header, writer) + List.rev_map + (fun relative_path -> + let full_path = Filename.concat dir relative_path in + let {Unix.st_perm; st_size; st_mtime; _} = Unix.lstat full_path in + let header = + Tar.Header.make + ~file_mode:st_perm + ~mod_time:(Int64.of_float st_mtime) + relative_path + (Int64.of_int st_size) + in + let writer = write_file full_path in + (header, writer)) + files + |> Stream.of_list in let out_chan = Writer.open_out dest in try diff --git a/src/lib_stdlib_unix/utils.ml b/src/lib_stdlib_unix/utils.ml index db205ce8923e..70a594eeb2c5 100644 --- a/src/lib_stdlib_unix/utils.ml +++ b/src/lib_stdlib_unix/utils.ml @@ -22,39 +22,37 @@ let display_progress ?(refresh_rate = (1, 1)) msgf = let display_progress_end () = if Unix.isatty Unix.stderr then Format.eprintf "@." -let list_files dir ?(include_file = fun ~relative_path:_ -> true) f = - let rec list_files_in_dir stream +let fold_files dir f acc = + let rec list_files_in_dir acc ((dir, relative_dir, dir_handle) as current_dir_info) = match Unix.readdir dir_handle with - | "." | ".." -> list_files_in_dir stream current_dir_info + | "." | ".." -> list_files_in_dir acc current_dir_info | basename -> let full_path = Filename.concat dir basename in let relative_path = Filename.concat relative_dir basename in - let stream = + let acc = if Sys.is_directory full_path then let sub_dir_handle = Unix.opendir full_path in - list_files_in_dir stream (full_path, relative_path, sub_dir_handle) - else if include_file ~relative_path then - Stream.icons (f ~full_path ~relative_path) stream - else stream + list_files_in_dir acc (full_path, relative_path, sub_dir_handle) + else f relative_path acc in - list_files_in_dir stream current_dir_info + list_files_in_dir acc current_dir_info | exception End_of_file -> Unix.closedir dir_handle ; - stream + acc in let dir_handle = Unix.opendir dir in - list_files_in_dir Stream.sempty (dir, "", dir_handle) + list_files_in_dir acc (dir, "", dir_handle) -let directory_contents_size ?include_file dir = - let file_stream = - list_files dir ?include_file @@ fun ~full_path ~relative_path:_ -> - let {Unix.st_size; _} = Unix.lstat full_path in - st_size - in - let total = ref 0 in - Stream.iter (fun size -> total := !total + size) file_stream ; - !total +let list_files dir = fold_files dir List.cons [] |> List.rev + +let directory_contents_size dir = + fold_files + dir + (fun relative_path total -> + let {Unix.st_size; _} = Unix.lstat (Filename.concat dir relative_path) in + total + st_size) + 0 let rec create_dir ?(perm = 0o755) dir = let stat = @@ -98,27 +96,21 @@ let copy_dir ?(perm = 0o755) ?progress src dst = match progress with | None -> fun f -> f (fun _ -> ()) | Some (message, color) -> - let total = - directory_contents_size src ~include_file:(fun ~relative_path:_ -> - true) - in + let total = directory_contents_size src in let progress_bar = Progress_bar.progress_bar ~counter:`Bytes ~message ~color total in fun f -> Progress_bar.with_reporter progress_bar f in maybe_report_progress @@ fun count_progress -> - let files = - list_files src ~include_file:(fun ~relative_path:_ -> true) - @@ fun ~full_path ~relative_path -> - let dst_file = Filename.concat dst relative_path in - (full_path, dst_file) - in - Stream.iter - (fun (src, dst) -> + fold_files + src + (fun relative_path () -> + let src = Filename.concat src relative_path in + let dst = Filename.concat dst relative_path in let dst_dir = Filename.dirname dst in create_dir ~perm dst_dir ; copy_file ~count_progress ~src ~dst) - files + () let copy_file = copy_file ~count_progress:(fun _ -> ()) diff --git a/src/lib_stdlib_unix/utils.mli b/src/lib_stdlib_unix/utils.mli index ba8ac9b1dc3d..ebb14f741140 100644 --- a/src/lib_stdlib_unix/utils.mli +++ b/src/lib_stdlib_unix/utils.mli @@ -32,19 +32,17 @@ val display_progress_end : unit -> unit (** {2 Files manipulation} *) -(** [list_files dir ~include_file f] lists the files in directory [dir] which - satisfy predicate [include_file] (by default all files) and applies [f] on - each element. The result is returned as a stream. *) -val list_files : - string -> - ?include_file:(relative_path:string -> bool) -> - (full_path:string -> relative_path:string -> 'a) -> - 'a Stream.t - -(** [directory_contents_size ~include_file] returns the total size of contents - of directory [dir] which satisfy the predicate [include_file]. *) -val directory_contents_size : - ?include_file:(relative_path:string -> bool) -> string -> int +(** [list_files dir] lists the files in directory [dir] and returns a list of + file names with relative (to [dir]) paths. *) +val list_files : string -> string list + +(** [fold_files dir f acc] applies [f] on all files in [dir] (recursively) and + accumulated on [acc]. *) +val fold_files : string -> (string -> 'a -> 'a) -> 'a -> 'a + +(** [directory_contents_size dir] returns the total size of contents of + directory [dir]. *) +val directory_contents_size : string -> int (** [create_dir ~perm path] creates directory [path] with permissions [perm] if it does not exist. All directories in [path] are created if necessary, à la -- GitLab