From 983a9a8364bb999af54edbefb651e7aec3b5cc84 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Tue, 15 Jul 2025 11:02:19 +0100 Subject: [PATCH 1/4] Dal_node: Move L1 crawler status to DAL node services This is needed because the RPC services will use the type of the L1 crawler status for defining the `/synced` endpoint. --- src/{lib_dal_node => lib_dal_node_services}/l1_crawler_status.ml | 0 src/{lib_dal_node => lib_dal_node_services}/l1_crawler_status.mli | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/{lib_dal_node => lib_dal_node_services}/l1_crawler_status.ml (100%) rename src/{lib_dal_node => lib_dal_node_services}/l1_crawler_status.mli (100%) diff --git a/src/lib_dal_node/l1_crawler_status.ml b/src/lib_dal_node_services/l1_crawler_status.ml similarity index 100% rename from src/lib_dal_node/l1_crawler_status.ml rename to src/lib_dal_node_services/l1_crawler_status.ml diff --git a/src/lib_dal_node/l1_crawler_status.mli b/src/lib_dal_node_services/l1_crawler_status.mli similarity index 100% rename from src/lib_dal_node/l1_crawler_status.mli rename to src/lib_dal_node_services/l1_crawler_status.mli -- GitLab From f54a2ac339c56a03af06fd6ff46a279f2ac8aff2 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Tue, 15 Jul 2025 11:10:48 +0100 Subject: [PATCH 2/4] Dal_node: Add /synchronized RPC for the current synchronisation status of the DAL node --- src/lib_dal_node/RPC_server.ml | 9 ++++ .../l1_crawler_status.ml | 50 +++++++++++++++++++ .../l1_crawler_status.mli | 3 ++ src/lib_dal_node_services/services.ml | 16 ++++++ src/lib_dal_node_services/services.mli | 14 +++++- 5 files changed, 90 insertions(+), 2 deletions(-) diff --git a/src/lib_dal_node/RPC_server.ml b/src/lib_dal_node/RPC_server.ml index 45033f835327..8ade78b22304 100644 --- a/src/lib_dal_node/RPC_server.ml +++ b/src/lib_dal_node/RPC_server.ml @@ -714,6 +714,11 @@ module Health = struct return {status = Up; checks} end +module Synchronized = struct + let get_synchronized ctxt () () = + Lwt_result_syntax.return @@ Node_context.get_l1_crawler_status ctxt +end + let add_service registerer service handler directory = registerer directory service handler @@ -858,6 +863,10 @@ let register : Tezos_rpc.Directory.register0 Services.health (Health.get_health ctxt) + |> add_service + Tezos_rpc.Directory.register0 + Services.synchronized + (Synchronized.get_synchronized ctxt) |> add_service Tezos_rpc.Directory.opt_register0 Services.get_last_processed_level diff --git a/src/lib_dal_node_services/l1_crawler_status.ml b/src/lib_dal_node_services/l1_crawler_status.ml index ff65966a9074..6963cbd4352b 100644 --- a/src/lib_dal_node_services/l1_crawler_status.ml +++ b/src/lib_dal_node_services/l1_crawler_status.ml @@ -14,6 +14,56 @@ type t = | L1_unreachable | Unknown +let encoding = + let open Data_encoding in + union + [ + case + ~title:"Catching_up" + (Tag 0) + (obj2 + (req "status" (constant "Catching_up")) + (req "levels_to_process" int32)) + (function + | Catching_up {levels_to_process} -> Some ((), levels_to_process) + | _ -> None) + (fun ((), levels_to_process) -> Catching_up {levels_to_process}); + case + ~title:"Synced" + (Tag 1) + (obj1 (req "status" (constant "Synced"))) + (function Synced -> Some () | _ -> None) + (fun () -> Synced); + case + ~title:"Lagging" + (Tag 2) + (obj2 + (req "status" (constant "Lagging")) + (req "levels_to_process" int32)) + (function + | Lagging {levels_to_process} -> Some ((), levels_to_process) + | _ -> None) + (fun ((), levels_to_process) -> Lagging {levels_to_process}); + case + ~title:"L1_bootstrapping" + (Tag 3) + (obj1 (req "status" (constant "L1_bootstrapping"))) + (function L1_bootstrapping -> Some () | _ -> None) + (fun () -> L1_bootstrapping); + case + ~title:"L1_unreachable" + (Tag 4) + (obj1 (req "status" (constant "L1_unreachable"))) + (function L1_unreachable -> Some () | _ -> None) + (fun () -> L1_unreachable); + case + ~title:"Unknown" + (Tag 5) + (obj1 (req "status" (constant "Unknown"))) + (function Unknown -> Some () | _ -> None) + (fun () -> Unknown); + ] + let catching_up_lagging_or_synced_status ~delta_kind ~head_level ~last_processed_level = let last_finalized = Int32.sub head_level 2l in diff --git a/src/lib_dal_node_services/l1_crawler_status.mli b/src/lib_dal_node_services/l1_crawler_status.mli index 478d87728667..59909feb4c26 100644 --- a/src/lib_dal_node_services/l1_crawler_status.mli +++ b/src/lib_dal_node_services/l1_crawler_status.mli @@ -24,6 +24,9 @@ type t = | L1_unreachable (** The DAL node is unable to reach the L1 node. *) | Unknown (** The crawler's status is unknown. *) +(** Encoding of the L1 crawler status. *) +val encoding : t Data_encoding.t + (** [catching_up_or_synced_status ~head_level ~last_processed_level] returns the appropriate status of the crawler: [Catching_up] if the [last_processed_level] is more than 2 levels behind [head_level], diff --git a/src/lib_dal_node_services/services.ml b/src/lib_dal_node_services/services.ml index 0b65872d978a..17c9afecc24d 100644 --- a/src/lib_dal_node_services/services.ml +++ b/src/lib_dal_node_services/services.ml @@ -135,6 +135,22 @@ let health : ~output:Health.encoding Tezos_rpc.Path.(open_root / "health") +let synchronized : + < meth : [`GET] + ; input : unit + ; output : L1_crawler_status.t + ; prefix : unit + ; params : unit + ; query : unit > + service = + Tezos_rpc.Service.get_service + ~description: + "Returns the current synchronization status of the DAL node with the L1 \ + node." + ~query:Tezos_rpc.Query.empty + ~output:L1_crawler_status.encoding + Tezos_rpc.Path.(open_root / "synchronized") + let post_slot : < meth : [`POST] ; input : string diff --git a/src/lib_dal_node_services/services.mli b/src/lib_dal_node_services/services.mli index 4dd4b2c22adb..506b8fae4ba9 100644 --- a/src/lib_dal_node_services/services.mli +++ b/src/lib_dal_node_services/services.mli @@ -39,8 +39,7 @@ type 'rpc service = ; output : 'output > (* This RPC aims to be used by a user to check whether its DAL node - behaves as expected. It does not aim to provide a clear diagnostic, - this is left to another RPC (not implemented yet). *) + behaves as expected. *) val health : < meth : [`GET] ; input : unit @@ -50,6 +49,17 @@ val health : ; query : unit > service +(* This RPC reports the current synchronization status of the DAL node + with respect to the L1 node. *) +val synchronized : + < meth : [`GET] + ; input : unit + ; output : L1_crawler_status.t + ; prefix : unit + ; params : unit + ; query : unit > + service + (** This RPC should be used by a slot producer. It allows to produce a commitment, a commitment proof and the shards from a slot. A padding is added if the slot is not of the expected size -- GitLab From 35b6710548669652b85540d6e8e97545155f8acf Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Thu, 17 Jul 2025 15:36:02 +0100 Subject: [PATCH 3/4] Dal_node: Add /monitor/synchronized to be a monitoring RPC of the L1 crawler status --- src/lib_dal_node/RPC_server.ml | 13 +++++++++++++ src/lib_dal_node/node_context.ml | 9 ++++++++- src/lib_dal_node/node_context.mli | 10 ++++++++++ src/lib_dal_node/store_cleanup.ml | 4 ++++ src/lib_dal_node_services/services.ml | 16 ++++++++++++++++ src/lib_dal_node_services/services.mli | 11 +++++++++++ ...ha- Testing DAL node (dal node list RPCs).out | 6 ++++++ ...2-- Testing DAL node (dal node list RPCs).out | 6 ++++++ ...3-- Testing DAL node (dal node list RPCs).out | 6 ++++++ 9 files changed, 80 insertions(+), 1 deletion(-) diff --git a/src/lib_dal_node/RPC_server.ml b/src/lib_dal_node/RPC_server.ml index 8ade78b22304..193bc1589fbf 100644 --- a/src/lib_dal_node/RPC_server.ml +++ b/src/lib_dal_node/RPC_server.ml @@ -717,6 +717,15 @@ end module Synchronized = struct let get_synchronized ctxt () () = Lwt_result_syntax.return @@ Node_context.get_l1_crawler_status ctxt + + let get_monitor_synchronized ctxt () () = + let l1_crawler_status_input = + Node_context.get_l1_crawler_status_input ctxt + in + let stream, stopper = Lwt_watcher.create_stream l1_crawler_status_input in + let next () = Lwt_stream.get stream in + let shutdown () = Lwt_watcher.shutdown stopper in + Tezos_rpc.Answer.return_stream {next; shutdown} end let add_service registerer service handler directory = @@ -867,6 +876,10 @@ let register : Tezos_rpc.Directory.register0 Services.synchronized (Synchronized.get_synchronized ctxt) + |> add_service + Tezos_rpc.Directory.gen_register0 + Services.monitor_synchronized + (Synchronized.get_monitor_synchronized ctxt) |> add_service Tezos_rpc.Directory.opt_register0 Services.get_last_processed_level diff --git a/src/lib_dal_node/node_context.ml b/src/lib_dal_node/node_context.ml index 7cafc11a5b48..9df2d0880dcc 100644 --- a/src/lib_dal_node/node_context.ml +++ b/src/lib_dal_node/node_context.ml @@ -43,6 +43,7 @@ type t = { (* the highest finalized level the DAL node is aware of (except at start-up, where it is the highest level the node is aware of) *) mutable l1_crawler_status : L1_crawler_status.t; + l1_crawler_status_input : L1_crawler_status.t Lwt_watcher.input; disable_shard_validation : bool; ignore_pkhs : Signature.Public_key_hash.Set.t; } @@ -68,16 +69,22 @@ let init config ~network_name profile_ctxt cryptobox profile_ctxt; last_finalized_level; l1_crawler_status = Unknown; + l1_crawler_status_input = Lwt_watcher.create_input (); disable_shard_validation; ignore_pkhs; } let get_tezos_node_cctxt ctxt = ctxt.tezos_node_cctxt -let set_l1_crawler_status ctxt status = ctxt.l1_crawler_status <- status +let set_l1_crawler_status ctxt status = + if ctxt.l1_crawler_status <> status then ( + ctxt.l1_crawler_status <- status ; + Lwt_watcher.notify ctxt.l1_crawler_status_input status) let get_l1_crawler_status ctxt = ctxt.l1_crawler_status +let get_l1_crawler_status_input ctxt = ctxt.l1_crawler_status_input + let may_reconstruct ~reconstruct slot_id t = let open Lwt_result_syntax in let p = diff --git a/src/lib_dal_node/node_context.mli b/src/lib_dal_node/node_context.mli index 4e7d46d108e0..c62416da02ec 100644 --- a/src/lib_dal_node/node_context.mli +++ b/src/lib_dal_node/node_context.mli @@ -105,6 +105,16 @@ val may_reconstruct : context. *) val get_l1_crawler_status : t -> L1_crawler_status.t +(** [get_l1_crawler_status_input ctxt] returns the watcher input used to + broadcast L1 crawler status updates. + + This input can be used with [Lwt_watcher.notify] to push new + [L1_crawler_status.t] values to all subscribers (e.g., RPC clients + monitoring synchronization status). Each call to the monitoring RPC + creates a new stream from this watcher, and receives updates pushed + through this input. *) +val get_l1_crawler_status_input : t -> L1_crawler_status.t Lwt_watcher.input + (** Updates the status of the L1 crawler with the given value. *) val set_l1_crawler_status : t -> L1_crawler_status.t -> unit diff --git a/src/lib_dal_node/store_cleanup.ml b/src/lib_dal_node/store_cleanup.ml index 2fcd0cc769df..59189b73a1ac 100644 --- a/src/lib_dal_node/store_cleanup.ml +++ b/src/lib_dal_node/store_cleanup.ml @@ -187,6 +187,10 @@ let clean_up_store_and_catch_up_for_no_refutation_support ctxt let*! () = Block_handler.remove_old_level_stored_data proto_parameters ctxt level in + L1_crawler_status.catching_up_or_synced_status + ~head_level + ~last_processed_level:level + |> Node_context.set_l1_crawler_status ctxt ; cleanup @@ Int32.succ level in let start_level = Int32.succ last_processed_level in diff --git a/src/lib_dal_node_services/services.ml b/src/lib_dal_node_services/services.ml index 17c9afecc24d..d16fb8b72d92 100644 --- a/src/lib_dal_node_services/services.ml +++ b/src/lib_dal_node_services/services.ml @@ -151,6 +151,22 @@ let synchronized : ~output:L1_crawler_status.encoding Tezos_rpc.Path.(open_root / "synchronized") +let monitor_synchronized : + < meth : [`GET] + ; input : unit + ; output : L1_crawler_status.t + ; prefix : unit + ; params : unit + ; query : unit > + service = + Tezos_rpc.Service.get_service + ~description: + "Returns the stream of synchronization statuses of the DAL node with the \ + L1 node." + ~query:Tezos_rpc.Query.empty + ~output:L1_crawler_status.encoding + Tezos_rpc.Path.(open_root / "monitor" / "synchronized") + let post_slot : < meth : [`POST] ; input : string diff --git a/src/lib_dal_node_services/services.mli b/src/lib_dal_node_services/services.mli index 506b8fae4ba9..d056be7bd99f 100644 --- a/src/lib_dal_node_services/services.mli +++ b/src/lib_dal_node_services/services.mli @@ -60,6 +60,17 @@ val synchronized : ; query : unit > service +(* This RPC monitors the synchronization statuses of the DAL node + with respect to the L1 node. *) +val monitor_synchronized : + < meth : [`GET] + ; input : unit + ; output : L1_crawler_status.t + ; prefix : unit + ; params : unit + ; query : unit > + service + (** This RPC should be used by a slot producer. It allows to produce a commitment, a commitment proof and the shards from a slot. A padding is added if the slot is not of the expected size diff --git a/tezt/tests/expected/dal.ml/Alpha- Testing DAL node (dal node list RPCs).out b/tezt/tests/expected/dal.ml/Alpha- Testing DAL node (dal node list RPCs).out index 86a97492a67b..b92ee7020708 100644 --- a/tezt/tests/expected/dal.ml/Alpha- Testing DAL node (dal node list RPCs).out +++ b/tezt/tests/expected/dal.ml/Alpha- Testing DAL node (dal node list RPCs).out @@ -24,6 +24,9 @@ Available services: Fetch shard as bytes - GET /levels//slots//status Return the status for the given slot. + - GET /monitor/synchronized + Returns the stream of synchronization statuses of the DAL node with the + L1 node. + p2p/ - POST /p2p/connect Connect to a new peer. @@ -123,6 +126,9 @@ Available services: the character provided as padding query parameter (defaults to \000). If the slot_index query parameter is provided, the DAL node checks that its profile allows to publish data on the given slot index. + - GET /synchronized + Returns the current synchronization status of the DAL node with the L1 + node. - GET /version version diff --git a/tezt/tests/expected/dal.ml/R022-- Testing DAL node (dal node list RPCs).out b/tezt/tests/expected/dal.ml/R022-- Testing DAL node (dal node list RPCs).out index 86a97492a67b..b92ee7020708 100644 --- a/tezt/tests/expected/dal.ml/R022-- Testing DAL node (dal node list RPCs).out +++ b/tezt/tests/expected/dal.ml/R022-- Testing DAL node (dal node list RPCs).out @@ -24,6 +24,9 @@ Available services: Fetch shard as bytes - GET /levels//slots//status Return the status for the given slot. + - GET /monitor/synchronized + Returns the stream of synchronization statuses of the DAL node with the + L1 node. + p2p/ - POST /p2p/connect Connect to a new peer. @@ -123,6 +126,9 @@ Available services: the character provided as padding query parameter (defaults to \000). If the slot_index query parameter is provided, the DAL node checks that its profile allows to publish data on the given slot index. + - GET /synchronized + Returns the current synchronization status of the DAL node with the L1 + node. - GET /version version diff --git a/tezt/tests/expected/dal.ml/S023-- Testing DAL node (dal node list RPCs).out b/tezt/tests/expected/dal.ml/S023-- Testing DAL node (dal node list RPCs).out index 86a97492a67b..b92ee7020708 100644 --- a/tezt/tests/expected/dal.ml/S023-- Testing DAL node (dal node list RPCs).out +++ b/tezt/tests/expected/dal.ml/S023-- Testing DAL node (dal node list RPCs).out @@ -24,6 +24,9 @@ Available services: Fetch shard as bytes - GET /levels//slots//status Return the status for the given slot. + - GET /monitor/synchronized + Returns the stream of synchronization statuses of the DAL node with the + L1 node. + p2p/ - POST /p2p/connect Connect to a new peer. @@ -123,6 +126,9 @@ Available services: the character provided as padding query parameter (defaults to \000). If the slot_index query parameter is provided, the DAL node checks that its profile allows to publish data on the given slot index. + - GET /synchronized + Returns the current synchronization status of the DAL node with the L1 + node. - GET /version version -- GitLab From d0916d5f9f684d4513c579d56b1ddd9a47063ff5 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Thu, 17 Jul 2025 09:45:05 +0100 Subject: [PATCH 4/4] Docs: Added entry in CHANGELOG --- docs/CHANGES.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/CHANGES.rst b/docs/CHANGES.rst index d7bf646881b8..868c3295d960 100644 --- a/docs/CHANGES.rst +++ b/docs/CHANGES.rst @@ -362,6 +362,16 @@ DAL node - A command line option ``--config-file`` has been added, allowing to have a configuration file out of the data directory. (MR :gl:`!18464`) +- Added two new RPCs to track the synchronization status between the DAL node and the L1 node: + + + ``GET /synchronized``: returns the current synchronization status. + + + ``GET /monitor/synchronized``: provides a streamed view of the status as it changes + over time. + + These endpoints help operators monitor whether the DAL node is catching up, lagging, + or fully synchronized with the L1 chain. (MR :gl:`!18686`) + Grafazos -------- -- GitLab