diff --git a/manifest/externals.ml b/manifest/externals.ml index 8e358b2f66c71deaa6fe1f6ecaf5e8de7d0a1cac..054e5021028beb10500bc40ad675f23bcf7a298c 100644 --- a/manifest/externals.ml +++ b/manifest/externals.ml @@ -48,6 +48,8 @@ let bigstringaf = external_lib "bigstringaf" V.(at_least "0.5.0") let bisect_ppx = opam_only "bisect_ppx" V.(at_least "2.7.0") +let camlp_streams = external_lib "camlp-streams" V.(at_least "5.0.1") + let camlzip = external_lib "camlzip" V.(at_least "1.11" && less_than "1.12") let caqti = external_lib "caqti" V.True diff --git a/manifest/product_octez.ml b/manifest/product_octez.ml index a55881a7bb401e04af04d5a0af566d077e71544a..f9083674def28fceb630cb85ea994cd03be77290 100644 --- a/manifest/product_octez.ml +++ b/manifest/product_octez.ml @@ -1463,6 +1463,7 @@ let octez_stdlib_unix = conf_libev; uri; progress; + camlp_streams; ] let _octez_stdlib_unix_test = diff --git a/opam/octez-libs.opam b/opam/octez-libs.opam index 98db7bf5fcf1e001ba8392b063f955bb87716e19..9ee0d3a65dbeafb1fef1ffdfcd5e4ae69bc63a3b 100644 --- a/opam/octez-libs.opam +++ b/opam/octez-libs.opam @@ -57,6 +57,7 @@ depends: [ "mtime" { >= "2.0.0" } "conf-libev" "progress" { >= "0.1.0" } + "camlp-streams" { >= "5.0.1" } "uutf" "lwt-exit" "ppxlib" diff --git a/opam/virtual/octez-deps.opam b/opam/virtual/octez-deps.opam index 95c1da8bc2630d5b826d2bcf69a5aac438d1571b..2a239aa3f366556c491184fa8fe5f17ad9282da5 100644 --- a/opam/virtual/octez-deps.opam +++ b/opam/virtual/octez-deps.opam @@ -21,6 +21,7 @@ depends: [ "bigstring" "bigstringaf" { >= "0.5.0" } "bisect_ppx" { >= "2.7.0" } + "camlp-streams" { >= "5.0.1" } "camlzip" { >= "1.11" & < "1.12" } "caqti" "caqti-driver-sqlite3" { >= "2.0.1" } diff --git a/src/lib_smart_rollup_node/snapshot_utils.ml b/src/lib_smart_rollup_node/snapshot_utils.ml index 2cdc2089bbddb7d051db5e4f05c5db6e432f8a38..42d273d8c0f74bc8516569374bf640ac17227ead 100644 --- a/src/lib_smart_rollup_node/snapshot_utils.ml +++ b/src/lib_smart_rollup_node/snapshot_utils.ml @@ -127,47 +127,26 @@ let read_snapshot_metadata (module Reader : READER_INPUT) = assert (snapshot_version = version) ; Data_encoding.Binary.of_bytes_exn snaphsot_metadata_encoding metadata_bytes -let list_files dir ~include_file f = - let rec list_files_in_dir stream - ((dir, relative_dir, dir_handle) as current_dir_info) = - match Unix.readdir dir_handle with - | "." | ".." -> list_files_in_dir stream current_dir_info - | basename -> - let full_path = Filename.concat dir basename in - let relative_path = Filename.concat relative_dir basename in - let stream = - if Sys.is_directory full_path then - let sub_dir_handle = Unix.opendir full_path in - list_files_in_dir stream (full_path, relative_path, sub_dir_handle) - else if include_file ~relative_path then - Stream.icons (f ~full_path ~relative_path) stream - else stream - in - list_files_in_dir stream current_dir_info - | exception End_of_file -> - Unix.closedir dir_handle ; - stream - in - let dir_handle = Unix.opendir dir in - list_files_in_dir Stream.sempty (dir, "", dir_handle) - -let total_bytes_to_export dir ~include_file = - let file_stream = - list_files dir ~include_file @@ fun ~full_path ~relative_path:_ -> - let {Unix.st_size; _} = Unix.lstat full_path in - st_size - in - let total = ref 0 in - Stream.iter (fun size -> total := !total + size) file_stream ; - !total - let create (module Reader : READER) (module Writer : WRITER) metadata ~dir ~include_file ~dest = let module Archive_writer = Tar.Make (struct include Reader include Writer end) in - let total = total_bytes_to_export dir ~include_file in + let files = + Tezos_stdlib_unix.Utils.list_files dir + |> List.filter (fun relative_path -> include_file ~relative_path) + in + let total = + List.fold_left + (fun total relative_path -> + let {Unix.st_size; _} = + Unix.lstat (Filename.concat dir relative_path) + in + total + st_size) + 0 + files + in let progress_bar = Progress_bar.progress_bar ~counter:`Bytes @@ -195,17 +174,21 @@ let create (module Reader : READER) (module Writer : WRITER) metadata ~dir raise e in let file_stream = - list_files dir ~include_file @@ fun ~full_path ~relative_path -> - let {Unix.st_perm; st_size; st_mtime; _} = Unix.lstat full_path in - let header = - Tar.Header.make - ~file_mode:st_perm - ~mod_time:(Int64.of_float st_mtime) - relative_path - (Int64.of_int st_size) - in - let writer = write_file full_path in - (header, writer) + List.rev_map + (fun relative_path -> + let full_path = Filename.concat dir relative_path in + let {Unix.st_perm; st_size; st_mtime; _} = Unix.lstat full_path in + let header = + Tar.Header.make + ~file_mode:st_perm + ~mod_time:(Int64.of_float st_mtime) + relative_path + (Int64.of_int st_size) + in + let writer = write_file full_path in + (header, writer)) + files + |> Stream.of_list in let out_chan = Writer.open_out dest in try @@ -222,58 +205,6 @@ let create (module Reader : READER) (module Writer : WRITER) metadata ~dir Writer.close_out out_chan ; raise e -let rec create_dir ?(perm = 0o755) dir = - let stat = - try Some (Unix.stat dir) with Unix.Unix_error (ENOENT, _, _) -> None - in - match stat with - | Some {st_kind = S_DIR; _} -> () - | Some _ -> Stdlib.failwith "Not a directory" - | None -> ( - create_dir ~perm (Filename.dirname dir) ; - try Unix.mkdir dir perm - with Unix.Unix_error (EEXIST, _, _) -> - (* This is the case where the directory has been created at the same - time. *) - ()) - -let copy_file ~src ~dst = - let in_chan = open_in src in - let out_chan = open_out dst in - try - let buffer_size = 64 * 1024 in - let buf = Bytes.create buffer_size in - let rec copy () = - let read_bytes = input in_chan buf 0 buffer_size in - output out_chan buf 0 read_bytes ; - if read_bytes > 0 then copy () - in - copy () ; - flush out_chan ; - close_in in_chan ; - close_out out_chan - with e -> - close_in in_chan ; - close_out out_chan ; - raise e - -(* TODO: https://gitlab.com/tezos/tezos/-/issues/6857 - Use Lwt_utils_unix.copy_dir instead when file descriptors issue is fixed. *) -let copy_dir ?(perm = 0o755) src dst = - create_dir ~perm dst ; - let files = - list_files src ~include_file:(fun ~relative_path:_ -> true) - @@ fun ~full_path ~relative_path -> - let dst_file = Filename.concat dst relative_path in - (full_path, dst_file) - in - Stream.iter - (fun (src, dst) -> - let dst_dir = Filename.dirname dst in - create_dir ~perm dst_dir ; - copy_file ~src ~dst) - files - let extract (module Reader : READER) (module Writer : WRITER) metadata_check ~snapshot_file ~dest = let open Lwt_result_syntax in @@ -292,7 +223,7 @@ let extract (module Reader : READER) (module Writer : WRITER) metadata_check end) in let out_channel_of_header (header : Tar.Header.t) = let path = Filename.concat dest header.file_name in - create_dir (Filename.dirname path) ; + Tezos_stdlib_unix.Utils.create_dir (Filename.dirname path) ; Writer.open_out path in let in_chan = Reader.open_in snapshot_file in diff --git a/src/lib_smart_rollup_node/snapshot_utils.mli b/src/lib_smart_rollup_node/snapshot_utils.mli index 11ed298ba9f4a684eeafbc729d34d21be2006f06..79e47d3a4ec0194be49d22b7c7ead50358d82bd0 100644 --- a/src/lib_smart_rollup_node/snapshot_utils.mli +++ b/src/lib_smart_rollup_node/snapshot_utils.mli @@ -72,10 +72,3 @@ val compress : snapshot_file:string -> string (** [read_metadata reader ~snapshot_file] reads the metadata from the snapshot file without extracting it. *) val read_metadata : reader -> snapshot_file:string -> snapshot_metadata - -(** [copy_file ~src ~dst] copies the file [src] to [dst]. *) -val copy_file : src:string -> dst:string -> unit - -(** [copy_dir ?perm src dst] copies the content of directory [src] in the - directory [dst] (created with [perm], [0o755] by default). *) -val copy_dir : ?perm:int -> string -> string -> unit diff --git a/src/lib_smart_rollup_node/snapshots.ml b/src/lib_smart_rollup_node/snapshots.ml index a4439d09a9afa8c6a27b5d17e023f0cb5bf2e8f2..b86340f44dc6c21db8cf2f8c7d23fa426b5c0b9c 100644 --- a/src/lib_smart_rollup_node/snapshots.ml +++ b/src/lib_smart_rollup_node/snapshots.ml @@ -787,21 +787,24 @@ let export_compact cctxt ~no_checks ~compression ~data_dir ~dest ~filename = @@ Context.export_snapshot context first_block.context ~path:tmp_context_dir in let ( // ) = Filename.concat in + (* TODO: https://gitlab.com/tezos/tezos/-/issues/6857 + Use Lwt_utils_unix.copy_dir instead when file descriptors issue is fixed. *) let copy_dir a = let dir = data_dir // a in if Sys.file_exists dir && Sys.is_directory dir then - copy_dir dir (tmp_dir // a) + Tezos_stdlib_unix.Utils.copy_dir dir (tmp_dir // a) in let copy_file a = let path = data_dir // a in - if Sys.file_exists path then copy_file ~src:path ~dst:(tmp_dir // a) + if Sys.file_exists path then + Tezos_stdlib_unix.Utils.copy_file ~src:path ~dst:(tmp_dir // a) in Format.eprintf "Acquiring process lock@." ; let* () = Utils.with_lockfile (Node_context.processing_lockfile_path ~data_dir) @@ fun () -> Format.eprintf "Copying data@." ; - Snapshot_utils.copy_dir store_dir tmp_store_dir ; + Tezos_stdlib_unix.Utils.copy_dir store_dir tmp_store_dir ; copy_file "metadata" ; return_unit in diff --git a/src/lib_stdlib_unix/dune b/src/lib_stdlib_unix/dune index 835054e6159d756b483f120ac1cfcd76f16e6a91..26ef36efbe7989f6311632c1616036f44a5b50e6 100644 --- a/src/lib_stdlib_unix/dune +++ b/src/lib_stdlib_unix/dune @@ -22,7 +22,8 @@ mtime mtime.clock.os uri - progress) + progress + camlp-streams) (flags (:standard) -open Tezos_error_monad diff --git a/src/lib_stdlib_unix/utils.ml b/src/lib_stdlib_unix/utils.ml index f5b20c671b783c2eab3de26ed394ec09686db292..70a594eeb2c50e721249b6f7aa4bb71f7295b808 100644 --- a/src/lib_stdlib_unix/utils.ml +++ b/src/lib_stdlib_unix/utils.ml @@ -1,25 +1,8 @@ (*****************************************************************************) (* *) -(* Open Source License *) -(* Copyright (c) 2018 Nomadic Labs *) -(* *) -(* Permission is hereby granted, free of charge, to any person obtaining a *) -(* copy of this software and associated documentation files (the "Software"),*) -(* to deal in the Software without restriction, including without limitation *) -(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *) -(* and/or sell copies of the Software, and to permit persons to whom the *) -(* Software is furnished to do so, subject to the following conditions: *) -(* *) -(* The above copyright notice and this permission notice shall be included *) -(* in all copies or substantial portions of the Software. *) -(* *) -(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*) -(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *) -(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *) -(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*) -(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *) -(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *) -(* DEALINGS IN THE SOFTWARE. *) +(* SPDX-License-Identifier: MIT *) +(* SPDX-FileCopyrightText: 2018-2024 Nomadic Labs *) +(* SPDX-FileCopyrightText: 2024 Functori *) (* *) (*****************************************************************************) @@ -38,3 +21,96 @@ let display_progress ?(refresh_rate = (1, 1)) msgf = let display_progress_end () = if Unix.isatty Unix.stderr then Format.eprintf "@." + +let fold_files dir f acc = + let rec list_files_in_dir acc + ((dir, relative_dir, dir_handle) as current_dir_info) = + match Unix.readdir dir_handle with + | "." | ".." -> list_files_in_dir acc current_dir_info + | basename -> + let full_path = Filename.concat dir basename in + let relative_path = Filename.concat relative_dir basename in + let acc = + if Sys.is_directory full_path then + let sub_dir_handle = Unix.opendir full_path in + list_files_in_dir acc (full_path, relative_path, sub_dir_handle) + else f relative_path acc + in + list_files_in_dir acc current_dir_info + | exception End_of_file -> + Unix.closedir dir_handle ; + acc + in + let dir_handle = Unix.opendir dir in + list_files_in_dir acc (dir, "", dir_handle) + +let list_files dir = fold_files dir List.cons [] |> List.rev + +let directory_contents_size dir = + fold_files + dir + (fun relative_path total -> + let {Unix.st_size; _} = Unix.lstat (Filename.concat dir relative_path) in + total + st_size) + 0 + +let rec create_dir ?(perm = 0o755) dir = + let stat = + try Some (Unix.stat dir) with Unix.Unix_error (ENOENT, _, _) -> None + in + match stat with + | Some {st_kind = S_DIR; _} -> () + | Some _ -> Stdlib.failwith "Not a directory" + | None -> ( + create_dir ~perm (Filename.dirname dir) ; + try Unix.mkdir dir perm + with Unix.Unix_error (EEXIST, _, _) -> + (* This is the case where the directory has been created at the same + time. *) + ()) + +let copy_file ~count_progress ~src ~dst = + let in_chan = open_in src in + let out_chan = open_out dst in + try + let buffer_size = 64 * 1024 in + let buf = Bytes.create buffer_size in + let rec copy () = + let read_bytes = input in_chan buf 0 buffer_size in + output out_chan buf 0 read_bytes ; + count_progress read_bytes ; + if read_bytes > 0 then copy () + in + copy () ; + flush out_chan ; + close_in in_chan ; + close_out out_chan + with e -> + close_in in_chan ; + close_out out_chan ; + raise e + +let copy_dir ?(perm = 0o755) ?progress src dst = + create_dir ~perm dst ; + let maybe_report_progress = + match progress with + | None -> fun f -> f (fun _ -> ()) + | Some (message, color) -> + let total = directory_contents_size src in + let progress_bar = + Progress_bar.progress_bar ~counter:`Bytes ~message ~color total + in + fun f -> Progress_bar.with_reporter progress_bar f + in + maybe_report_progress @@ fun count_progress -> + fold_files + src + (fun relative_path () -> + let src = Filename.concat src relative_path in + let dst = Filename.concat dst relative_path in + let dst_dir = Filename.dirname dst in + create_dir ~perm dst_dir ; + copy_file ~count_progress ~src ~dst) + () + +let copy_file = copy_file ~count_progress:(fun _ -> ()) diff --git a/src/lib_stdlib_unix/utils.mli b/src/lib_stdlib_unix/utils.mli index 23f8e6bb64f0a84815685574367892da6616edd8..ebb14f7411400431b21e3a8cdfbde7ef8a915bd2 100644 --- a/src/lib_stdlib_unix/utils.mli +++ b/src/lib_stdlib_unix/utils.mli @@ -1,35 +1,20 @@ (*****************************************************************************) (* *) -(* Open Source License *) -(* Copyright (c) 2018 Nomadic Labs *) -(* *) -(* Permission is hereby granted, free of charge, to any person obtaining a *) -(* copy of this software and associated documentation files (the "Software"),*) -(* to deal in the Software without restriction, including without limitation *) -(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *) -(* and/or sell copies of the Software, and to permit persons to whom the *) -(* Software is furnished to do so, subject to the following conditions: *) -(* *) -(* The above copyright notice and this permission notice shall be included *) -(* in all copies or substantial portions of the Software. *) -(* *) -(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*) -(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *) -(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *) -(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*) -(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *) -(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *) -(* DEALINGS IN THE SOFTWARE. *) +(* SPDX-License-Identifier: MIT *) +(* SPDX-FileCopyrightText: 2018-2024 Nomadic Labs *) +(* SPDX-FileCopyrightText: 2024 Functori *) (* *) (*****************************************************************************) +(** {2 Displaying progress} *) + (** Print over the current [stdout] line. Takes a message formatting function of the form [(fun m -> m )]. Message formatting occurs only when the line is actually printed (i.e. when [(fst refresh_rate) mod (snd refresh_rate) = 0]). [refresh_rate] defaults to always printing the supplied message. - {2 Examples:} + {3 Examples:} - [display_progress (fun m -> m "Loading... %d/100" percent)]. @@ -44,3 +29,32 @@ val display_progress : (** Finalizes progress display *) val display_progress_end : unit -> unit + +(** {2 Files manipulation} *) + +(** [list_files dir] lists the files in directory [dir] and returns a list of + file names with relative (to [dir]) paths. *) +val list_files : string -> string list + +(** [fold_files dir f acc] applies [f] on all files in [dir] (recursively) and + accumulated on [acc]. *) +val fold_files : string -> (string -> 'a -> 'a) -> 'a -> 'a + +(** [directory_contents_size dir] returns the total size of contents of + directory [dir]. *) +val directory_contents_size : string -> int + +(** [create_dir ~perm path] creates directory [path] with permissions [perm] if + it does not exist. All directories in [path] are created if necessary, à la + [mkdir -p]. *) +val create_dir : ?perm:int -> string -> unit + +(** [copy_file ~src ~dst] copies the file [src] to [dst]. *) +val copy_file : src:string -> dst:string -> unit + +(** [copy_dir ?perm ~progress:(message, color) src dst] copies the content of + directory [src] in the directory [dst] (created with [perm], [0o755] by + default). If [progress] is provided, a progress bar is displayed on terminal + outputs with the given message and color. *) +val copy_dir : + ?perm:int -> ?progress:string * Terminal.Color.t -> string -> string -> unit