From 3ebb430c48054be3e13b9ebcd7f6bfa5a99e3f59 Mon Sep 17 00:00:00 2001
From: Romain <romain@nomadic-labs.com>
Date: Wed, 21 May 2025 14:28:04 +0200
Subject: [PATCH 1/3] declare not compatible with ocaml 4.12

---
 .gitlab-ci.yml | 2 +-
 CHANGES.md     | 8 ++++++++
 tezt.opam      | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index cca74823..74bb94c8 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -17,7 +17,7 @@ include: 'https://gitlab.com/nomadic-labs/gitlab-ocaml-ci-example/-/raw/main/.gi
   tags: [gcp]
   parallel:
     matrix:
-    - OCAML_COMPILER: ["4.12.1", "4.13.1", "4.14.2", "5.1.1"]
+    - OCAML_COMPILER: ["4.13.1", "4.14.2", "5.1.1", "5.2.1"]
       DUNE_BUILD_TARGETS: "@all"
       DUNE_TEST_TARGETS: "@runtest @fmt"
       DUNE_DOC_TARGETS: [""]
diff --git a/CHANGES.md b/CHANGES.md
index 1867bbeb..0a68b93a 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,13 @@
 # Changelog
 
+# Changelog
+
+## Development Version
+
+### Breaking Changes
+
+- Tezt is no longer available for OCaml versions earlier than 4.13.
+
 ## Version 4.2.0
 
 ### New Features
diff --git a/tezt.opam b/tezt.opam
index f79ff68c..45eab2bb 100644
--- a/tezt.opam
+++ b/tezt.opam
@@ -7,7 +7,7 @@ dev-repo: "git+https://gitlab.com/nomadic-labs/tezt.git"
 license: "MIT"
 depends: [
   "dune" { >= "3.0" }
-  "ocaml" { >= "4.12" }
+  "ocaml" { >= "4.13" }
   "re" { >= "1.7.2" }
   "lwt" { >= "5.6.0" }
   "base-unix"
-- 
GitLab


From 8aa163ed4934fdc8ce6e3ce3b9c7bc3bf6724ec9 Mon Sep 17 00:00:00 2001
From: Romain <romain@nomadic-labs.com>
Date: Tue, 29 Apr 2025 12:33:24 +0200
Subject: [PATCH 2/3] add Scheduler.get_runner_pid

---
 CHANGES.md                  | 4 ++++
 lib_scheduler/scheduler.ml  | 5 +++++
 lib_scheduler/scheduler.mli | 5 +++++
 3 files changed, 14 insertions(+)

diff --git a/CHANGES.md b/CHANGES.md
index 0a68b93a..3abfffea 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,6 +4,10 @@
 
 ## Development Version
 
+### New Features
+
+- Added `get_runner_pid` to the `tezt.scheduler` library.
+
 ### Breaking Changes
 
 - Tezt is no longer available for OCaml versions earlier than 4.13.
diff --git a/lib_scheduler/scheduler.ml b/lib_scheduler/scheduler.ml
index f0ebc035..f09875cb 100644
--- a/lib_scheduler/scheduler.ml
+++ b/lib_scheduler/scheduler.ml
@@ -1297,3 +1297,8 @@ let stop () = stopped := true
 let clear () = Queue.clear task_queue
 
 let get_current_worker_context () = !Worker.current_child_state
+
+let get_runner_pid (scheduler_context : scheduler_context) =
+  match scheduler_context.status with
+  | Alive {pid; _} -> Some pid
+  | Dead _ | Burried -> None
diff --git a/lib_scheduler/scheduler.mli b/lib_scheduler/scheduler.mli
index 93c96a60..a6e31bde 100644
--- a/lib_scheduler/scheduler.mli
+++ b/lib_scheduler/scheduler.mli
@@ -64,6 +64,11 @@ type scheduler_context
     Returns [None] if not currently in a worker process. *)
 val get_current_worker_context : unit -> worker_context option
 
+(** Get the PID of a runner process.
+
+    Returns [None] if the runner is not running. *)
+val get_runner_pid : scheduler_context -> int option
+
 (** {2 Messages} *)
 
 module Message : sig
-- 
GitLab


From f8fb243354372fba3e7ac81a1fb7f37fe796bf0d Mon Sep 17 00:00:00 2001
From: Romain <romain@nomadic-labs.com>
Date: Tue, 29 Apr 2025 15:35:04 +0200
Subject: [PATCH 3/3] add --record-mem-peak and --mem-poll-frequency

---
 CHANGES.md        |  10 ++++
 lib/main.ml       | 136 ++++++++++++++++++++++++++++++++++++++++++++--
 lib_core/cli.ml   |  18 ++++++
 lib_core/cli.mli  |   6 ++
 lib_core/test.ml  |  60 +++++++++++++++-----
 lib_core/test.mli |  11 +++-
 6 files changed, 221 insertions(+), 20 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 3abfffea..25abc4cb 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -8,10 +8,20 @@
 
 - Added `get_runner_pid` to the `tezt.scheduler` library.
 
+- Added `--record-mem-peak`.
+  It causes Tezt to regularly measure the memory usage of tests
+  and to record the peak in reports when using `--record`.
+  Only works with `-j N` with `N` being at least 2.
+
+- Added `--mem-poll-frequency`, which controls how often memory usage is measured
+  when using `--record-mem-peak`.
+
 ### Breaking Changes
 
 - Tezt is no longer available for OCaml versions earlier than 4.13.
 
+- `Test.test_result` contains a new field: `peak_memory_usage`.
+
 ## Version 4.2.0
 
 ### New Features
diff --git a/lib/main.ml b/lib/main.ml
index 544dab25..ecc35aa6 100644
--- a/lib/main.ml
+++ b/lib/main.ml
@@ -34,6 +34,98 @@ let () =
 
 let start_time = Unix.gettimeofday ()
 
+module Memory_usage : sig
+  val get_recursively_for_pid : int -> (int, string) result
+end = struct
+  let kb = 1000
+
+  (* Get the memory usage of a single process (not recursively)
+     by reading from /proc/PID/smaps.
+     More specifically, we read the Pss field, which gives a more representative memory usage
+     as it divides shared memory by the number of processes that share this memory. *)
+  (* https://stackoverflow.com/questions/131303/how-can-i-measure-the-actual-memory-usage-of-an-application-or-process *)
+  let get_total_pss pid =
+    let ch = open_in ("/proc/" ^ string_of_int pid ^ "/smaps") in
+    Fun.protect ~finally:(fun () -> close_in ch) @@ fun () ->
+    let rec sum_pss acc =
+      match input_line ch with
+      | exception End_of_file -> acc
+      | line ->
+          let parse_error () =
+            failwith (Printf.sprintf "failed to parse %S" line)
+          in
+          let line = String.trim line in
+          let value =
+            if not (String.starts_with ~prefix:"Pss:" line) then 0
+            else if not (String.ends_with ~suffix:"kB" line) then parse_error ()
+            else
+              let value =
+                String.sub
+                  line
+                  (String.length "Pss:")
+                  (String.length line - String.length "Pss:"
+                 - String.length "kB")
+                |> String.trim
+              in
+              match int_of_string_opt value with
+              | None -> parse_error ()
+              | Some value -> value * kb
+          in
+          sum_pss (acc + value)
+    in
+    sum_pss 0
+
+  (* Read all lines from an input channel and return them as a list. *)
+  let rec input_lines ?(acc = []) ch =
+    match input_line ch with
+    | exception End_of_file -> List.rev acc
+    | line -> input_lines ~acc:(line :: acc) ch
+
+  (* Read /proc/PID/task/*/children to get the list of children of a process.
+     Returns the list of PIDs of all children. *)
+  let get_children pid =
+    (* I think a task is the same as a thread. *)
+    let task_path = "/proc/" ^ string_of_int pid ^ "/task" in
+    let parse_tid line =
+      match int_of_string_opt (String.trim line) with
+      | None ->
+          failwith (sf "failed to parse %S from %s as a TID" line task_path)
+      | Some pid -> pid
+    in
+    let task_ids =
+      Sys.readdir task_path |> Array.to_list |> List.map parse_tid
+    in
+    let get_task_children tid =
+      let task_children_path =
+        task_path ^ "/" ^ string_of_int tid ^ "/children"
+      in
+      let ch = open_in task_children_path in
+      Fun.protect ~finally:(fun () -> close_in ch) @@ fun () ->
+      let parse_pid line =
+        match int_of_string_opt line with
+        | None ->
+            failwith
+              (sf "failed to parse %S from %s as a PID" line task_children_path)
+        | Some pid -> pid
+      in
+      input_lines ch
+      |> List.map (String.split_on_char ' ')
+      |> List.flatten |> List.map String.trim
+      |> List.filter (( <> ) "")
+      |> List.map parse_pid
+    in
+    List.flatten (List.map get_task_children task_ids)
+
+  let get_recursively_for_pid pid =
+    let rec get acc pid =
+      let acc = acc + get_total_pss pid in
+      let children = get_children pid in
+      List.fold_left get acc children
+    in
+    try Ok (get 0 pid)
+    with Sys_error message | Failure message -> Error message
+end
+
 module Scheduler : Test.SCHEDULER = struct
   (* Use the [Scheduler] module from the [tezt.scheduler] library,
      with an alias [S] so that we don't confuse it with the [Scheduler] module
@@ -68,7 +160,8 @@ module Scheduler : Test.SCHEDULER = struct
     | _ -> raise (S.Message.Failed_to_decode (value, "used_seed"))
 
   let test_result_type : Test.test_result S.Message.typ =
-    let encode ({test_result; seed} : Test.test_result) : S.Message.value =
+    let encode ({test_result; seed; peak_memory_usage} : Test.test_result) :
+        S.Message.value =
       Block
         [|
           (match test_result with
@@ -76,12 +169,15 @@ module Scheduler : Test.SCHEDULER = struct
           | Failed error_message -> String error_message
           | Aborted -> Int 1);
           encode_used_seed seed;
+          (match peak_memory_usage with
+          | None -> Int (-1)
+          | Some peak_memory_usage -> Int peak_memory_usage);
         |]
     in
     let decode (value : S.Message.value) : Test.test_result =
       let fail () = raise (S.Message.Failed_to_decode (value, "test_result")) in
       match value with
-      | Block [|test_result; seed|] ->
+      | Block [|test_result; seed; Int peak_memory_usage|] ->
           {
             test_result =
               (match test_result with
@@ -90,6 +186,8 @@ module Scheduler : Test.SCHEDULER = struct
               | Int 1 -> Aborted
               | _ -> fail ());
             seed = decode_used_seed seed;
+            peak_memory_usage =
+              (if peak_memory_usage >= 0 then Some peak_memory_usage else None);
           }
       | _ -> fail ()
     in
@@ -144,7 +242,17 @@ module Scheduler : Test.SCHEDULER = struct
       | Some _ -> Some Cli.Options.cleanup_timeout
     in
     let warn_after_timeout_timer : S.Timer.t option ref = ref None in
-    let on_start _ctx =
+    let measure_memory_usage_timer : S.Timer.t option ref = ref None in
+    let peak_memory_usage = ref None in
+    let on_start ctx =
+      let measure_memory_usage_once pid =
+        match Memory_usage.get_recursively_for_pid pid with
+        | Ok bytes -> (
+            match !peak_memory_usage with
+            | None -> peak_memory_usage := Some bytes
+            | Some old -> peak_memory_usage := Some (max old bytes))
+        | Error _ -> ()
+      in
       let rec warn_after_timeout delay =
         warn_after_timeout_timer :=
           Some
@@ -152,6 +260,18 @@ module Scheduler : Test.SCHEDULER = struct
               Log.warn "Test is still running: %S" test_title ;
               warn_after_timeout delay )
       in
+      let rec measure_memory_usage delay =
+        measure_memory_usage_timer :=
+          Some
+            ( S.Timer.on_delay delay @@ fun () ->
+              match S.get_runner_pid ctx with
+              | None -> ()
+              | Some pid ->
+                  measure_memory_usage_once pid ;
+                  measure_memory_usage delay )
+      in
+      if Cli.Reports.record_mem_peak then
+        measure_memory_usage (1. /. Cli.Options.mem_poll_frequency) ;
       if Cli.Options.warn_after_timeout > 0. then
         warn_after_timeout Cli.Options.warn_after_timeout
     in
@@ -162,9 +282,11 @@ module Scheduler : Test.SCHEDULER = struct
     in
     let on_finish result =
       Option.iter S.Timer.cancel !warn_after_timeout_timer ;
+      Option.iter S.Timer.cancel !measure_memory_usage_timer ;
       let test_result : Test.test_result =
         match result with
-        | Ok test_result -> test_result
+        | Ok test_result ->
+            {test_result with peak_memory_usage = !peak_memory_usage}
         | Error error_message ->
             Log.error "%s" error_message ;
             let seed : Test.used_seed =
@@ -172,7 +294,11 @@ module Scheduler : Test.SCHEDULER = struct
               | None -> Used_fixed
               | Some seed -> Used_random seed
             in
-            {test_result = Failed error_message; seed}
+            {
+              test_result = Failed error_message;
+              seed;
+              peak_memory_usage = !peak_memory_usage;
+            }
       in
       on_response (Test_result test_result)
     in
diff --git a/lib_core/cli.ml b/lib_core/cli.ml
index 1d63ebc1..ff1fc17d 100644
--- a/lib_core/cli.ml
+++ b/lib_core/cli.ml
@@ -255,6 +255,15 @@ module Options = struct
         "Force tests declared with ~seed:Random to initialize the \
          pseudo-random number generator with this seed."
       ()
+
+  let mem_poll_frequency =
+    Clap.default_float
+      ~long:"mem-poll-frequency"
+      ~placeholder:"HERTZ"
+      ~description:
+        "How many times per second memory usage of tests shall be measured \
+         when using --record-mem-peak."
+      10.
 end
 
 module Logs = struct
@@ -399,6 +408,15 @@ module Reports = struct
          for each test."
       ()
 
+  let record_mem_peak =
+    Clap.flag
+      ~section
+      ~set_long:"record-mem-peak"
+      ~description:
+        "Include peak memory usage of tests in --record files. Only work with \
+         -j and useless without --record."
+      false
+
   let from_records =
     Clap.list_string
       ~section
diff --git a/lib_core/cli.mli b/lib_core/cli.mli
index 2c66cf1d..7aa5733c 100644
--- a/lib_core/cli.mli
+++ b/lib_core/cli.mli
@@ -100,6 +100,9 @@ module Options : sig
 
   (** [--seed] *)
   val seed : int option
+
+  (** [--mem-poll-frequency] *)
+  val mem_poll_frequency : float
 end
 
 module Logs : sig
@@ -168,6 +171,9 @@ module Reports : sig
   (** [--record] *)
   val record : string option
 
+  (** [--record-mem-peak] *)
+  val record_mem_peak : bool
+
   (** [--from-record] *)
   val from_records : string list
 
diff --git a/lib_core/test.ml b/lib_core/test.ml
index a982cc49..59ccc1d6 100644
--- a/lib_core/test.ml
+++ b/lib_core/test.ml
@@ -176,8 +176,10 @@ type test = {
   mutable session_successful_runs : Summed_durations.t;
   mutable session_failed_runs : Summed_durations.t;
   mutable session_retries : int;
+  mutable session_peak_memory_usage : int option;
   mutable past_records_successful_runs : Summed_durations.t;
   mutable past_records_failed_runs : Summed_durations.t;
+  mutable past_records_peak_memory_usage : int option;
   mutable result : Log.test_result option;
 }
 
@@ -185,7 +187,11 @@ type t = test
 
 type used_seed = Used_fixed | Used_random of int
 
-type test_result = {test_result : Log.test_result; seed : used_seed}
+type test_result = {
+  test_result : Log.test_result;
+  seed : used_seed;
+  peak_memory_usage : int option;
+}
 
 (* This reference should be only modified by [really_run] before and after each test. *)
 let current_test = ref None
@@ -369,7 +375,7 @@ let really_run ~sleep ~clean_up ~temp_start ~temp_stop ~temp_clean_up test =
   (* Flush logs. *)
   Log.flush_file () ;
   current_test := None ;
-  return {test_result; seed}
+  return {test_result; seed; peak_memory_usage = None}
 
 let rec really_run_with_retry ~sleep ~clean_up ~temp_start ~temp_stop
     ~temp_clean_up remaining_retry_count test =
@@ -793,6 +799,12 @@ let display_time_summary () =
   String_map.iter print_time_for_file tests_by_file ;
   ()
 
+let max_peak a b =
+  match (a, b) with
+  | None, None -> None
+  | Some x, None | None, Some x -> Some x
+  | Some x, Some y -> Some (max x y)
+
 module Record = struct
   type test = {
     file : string;
@@ -800,20 +812,25 @@ module Record = struct
     tags : string list;
     successful_runs : Summed_durations.t;
     failed_runs : Summed_durations.t;
+    peak_memory_usage : int option;
   }
 
   let encode_obj fields =
     `O (List.filter (function _, `Null -> false | _ -> true) fields)
 
-  let encode_test {file; title; tags; successful_runs; failed_runs} : JSON.u =
+  let encode_test
+      {file; title; tags; successful_runs; failed_runs; peak_memory_usage} :
+      JSON.u =
     encode_obj
-      [
-        ("file", `String file);
-        ("title", `String title);
-        ("tags", `A (List.map (fun tag -> `String tag) tags));
-        ("successful_runs", Summed_durations.encode successful_runs);
-        ("failed_runs", Summed_durations.encode failed_runs);
-      ]
+      (("file", `String file)
+      :: ("title", `String title)
+      :: ("tags", `A (List.map (fun tag -> `String tag) tags))
+      :: ("successful_runs", Summed_durations.encode successful_runs)
+      :: ("failed_runs", Summed_durations.encode failed_runs)
+      ::
+      (match peak_memory_usage with
+      | None -> []
+      | Some bytes -> [("peak_memory_usage", `Float (float bytes))]))
 
   let decode_test (json : JSON.t) : test =
     {
@@ -823,6 +840,7 @@ module Record = struct
       successful_runs =
         Summed_durations.decode JSON.(json |-> "successful_runs");
       failed_runs = Summed_durations.decode JSON.(json |-> "failed_runs");
+      peak_memory_usage = JSON.(json |-> "peak_memory_usage" |> as_int_opt);
     }
 
   type t = test list
@@ -855,8 +873,10 @@ module Record = struct
              session_retries = _;
              session_successful_runs;
              session_failed_runs;
+             session_peak_memory_usage;
              past_records_successful_runs = _;
              past_records_failed_runs = _;
+             past_records_peak_memory_usage = _;
              result = _;
            } ->
     {
@@ -865,6 +885,7 @@ module Record = struct
       tags;
       successful_runs = session_successful_runs;
       failed_runs = session_failed_runs;
+      peak_memory_usage = session_peak_memory_usage;
     }
 
   (* Read a record and update the time information of registered tests
@@ -881,7 +902,11 @@ module Record = struct
               test.past_records_successful_runs + recorded_test.successful_runs) ;
           test.past_records_failed_runs <-
             Summed_durations.(
-              test.past_records_failed_runs + recorded_test.failed_runs)
+              test.past_records_failed_runs + recorded_test.failed_runs) ;
+          test.past_records_peak_memory_usage <-
+            max_peak
+              test.past_records_peak_memory_usage
+              recorded_test.peak_memory_usage
     in
     List.iter update_test record
 
@@ -896,7 +921,11 @@ module Record = struct
       | Some test ->
           test.session_successful_runs <-
             Summed_durations.(
-              test.session_successful_runs + recorded_test.successful_runs)
+              test.session_successful_runs + recorded_test.successful_runs) ;
+          test.session_peak_memory_usage <-
+            max_peak
+              test.session_peak_memory_usage
+              recorded_test.peak_memory_usage
     in
     List.iter update_test record
 end
@@ -1131,8 +1160,10 @@ let register ~__FILE__ ~title ~tags ?(seed = Fixed 0) body =
       session_successful_runs = Summed_durations.zero;
       session_failed_runs = Summed_durations.zero;
       session_retries = 0;
+      session_peak_memory_usage = None;
       past_records_successful_runs = Summed_durations.zero;
       past_records_failed_runs = Summed_durations.zero;
+      past_records_peak_memory_usage = None;
       result = None;
     }
   in
@@ -1356,7 +1387,8 @@ let run_with_scheduler scheduler =
         | None -> None
         | Some (test, test_instance) ->
             let start = Unix.gettimeofday () in
-            let on_response (Scheduler.Test_result {test_result; seed}) =
+            let on_response
+                (Scheduler.Test_result {test_result; seed; peak_memory_usage}) =
               test.result <- Some test_result ;
               let time = Unix.gettimeofday () -. start in
               (match test_result with
@@ -1369,6 +1401,8 @@ let run_with_scheduler scheduler =
                 ~iteration:test_instance.iteration
                 test_result
                 test.title ;
+              test.session_peak_memory_usage <-
+                max_peak test.session_peak_memory_usage peak_memory_usage ;
               match test_result with
               | Successful ->
                   test.session_successful_runs <-
diff --git a/lib_core/test.mli b/lib_core/test.mli
index a2e56ccc..99d8e64d 100644
--- a/lib_core/test.mli
+++ b/lib_core/test.mli
@@ -162,8 +162,15 @@ val before_test_run : (unit -> unit) -> unit
       or from the [--seed] command-line argument. *)
 type used_seed = Used_fixed | Used_random of int
 
-(** Data that a test sends to the scheduler after it is done. *)
-type test_result = {test_result : Log.test_result; seed : used_seed}
+(** Data that a test sends to the scheduler after it is done.
+
+    [peak_memory_usage] is in bytes.
+    It is only set if memory usage could be measured. *)
+type test_result = {
+  test_result : Log.test_result;
+  seed : used_seed;
+  peak_memory_usage : int option;
+}
 
 module type SCHEDULER = sig
   (** Signature of schedulers to pass to {!run_with_scheduler}. *)
-- 
GitLab