diff --git a/src/lib_gossipsub/gossipsub_intf.ml b/src/lib_gossipsub/gossipsub_intf.ml index e26db1a7a5b627547ce86df4579f9ff2ec1bcb2a..99d455732e31bcabd4526baa007b12b140161413 100644 --- a/src/lib_gossipsub/gossipsub_intf.ml +++ b/src/lib_gossipsub/gossipsub_intf.ml @@ -203,21 +203,20 @@ type ('topic, 'peer, 'message_id, 'span) limits = { it's been [fanout_ttl] since we've published to a topic that we're not subscribed to, then we don't track that topic anymore, that is, we delete it from the fanout map. *) - heartbeat_interval : 'span; - (** [heartbeat_interval] controls the time between heartbeats. *) + heartbeat_interval : 'span; (** The time between heartbeats. *) backoff_cleanup_ticks : int; - (** [backoff_cleanup_ticks] is the number of heartbeat ticks setting the - frequency at which the backoffs are checked and potentially cleared. *) + (** The number of heartbeat ticks setting the frequency at which the + backoffs are checked and potentially cleared. *) score_cleanup_ticks : int; - (** [score_cleanup_ticks] is the number of heartbeat ticks setting the - frequency at which the scores are checked and potentially cleared. *) + (** The number of heartbeat ticks setting the frequency at which the + scores are checked and potentially cleared. *) degree_low : int; - (** [degree_low] sets the lower bound on the number of peers we keep in a + (** The lower bound on the number of peers we keep in a topic mesh. If we have fewer than [degree_low] peers, the heartbeat will attempt to graft some more into the mesh at the next heartbeat. *) degree_high : int; - (** [degree_high] sets the upper bound on the number of peers we keep in a - topic mesh. If there are more than [degree_high] peers, the heartbeat will select + (** The upper bound on the number of peers we keep in a topic mesh. If + there are more than [degree_high] peers, the heartbeat will select some to prune from the mesh at the next heartbeat. *) degree_score : int; (** [degree_score] affects how peers are selected when pruning a mesh due @@ -226,13 +225,12 @@ type ('topic, 'peer, 'message_id, 'span) limits = { (* TODO: https://gitlab.com/tezos/tezos/-/issues/5052 [degree_score] must not exceed [degree_optimal - degree_out]. *) degree_out : int; - (** [degree_out] is the quota for the number of outbound connections to - maintain in a topic mesh. When the mesh is pruned due to over - subscription, we make sure that we have outbound connections to at - least [degree_out] of the survivor peers. This prevents Sybil - attackers from overwhelming our mesh with incoming connections. - [degree_out] must be set below {degree_low}, and must not exceed - [degree_optimal / 2]. *) + (** The number of outbound connections to maintain in a topic mesh. When + the mesh is pruned due to over subscription, we make sure that we have + outbound connections to at least [degree_out] of the survivor + peers. This prevents Sybil attackers from overwhelming our mesh with + incoming connections. [degree_out] must be set below {degree_low}, and + must not exceed [degree_optimal / 2]. *) degree_lazy : int; (** [degree_lazy] affects how many peers the local peer will emit gossip to at each heartbeat. The local peer will send gossip to at least @@ -245,9 +243,8 @@ type ('topic, 'peer, 'message_id, 'span) limits = { [gossip_factor] * (total number of non-mesh/non-fanout peers), or [degree_lazy] peers, whichever is greater. *) history_length : int; - (** [history_length] controls the size of the message cache used for - gossip. The message cache will remember messages for [history_length] - heartbeats. *) + (** The size of the message cache used for gossip. The message cache will + remember messages for [history_length] heartbeats. *) history_gossip_length : int; (** [history_gossip_length] controls how many cached message ids the local peer will advertise in IHave gossip messages. When asked for its seen @@ -257,6 +254,17 @@ type ('topic, 'peer, 'message_id, 'span) limits = { avoid advertising messages that will be expired by the time they're requested. [history_gossip_length] must be less than or equal to [history_length]. *) + opportunistic_graft_ticks : int64; + (** The number of heartbeat ticks setting the frequency at which to + attempt to improve the mesh with opportunistic grafting. Every + [opportunistic_graft_ticks], if the median score of the mesh peers + falls below the {opportunistic_graft_threshold}, then the local peer + will select some high-scoring mesh peers to graft. *) + opportunistic_graft_peers : int; + (** The number of peers to opportunistically graft. *) + opportunistic_graft_threshold : float; + (** The median mesh score threshold before triggering opportunistic + grafting; this should have a small positive value. *) score_parameters : ('topic, 'span) score_parameters; (** score-specific parameters. *) } diff --git a/src/lib_gossipsub/test/test_gossipsub.ml b/src/lib_gossipsub/test/test_gossipsub.ml index d4ad3a263d2f2232ef9e080e0d8d5c1a8b81793d..09da3735d764d304b1c64d8ab92ece9d04098c5b 100644 --- a/src/lib_gossipsub/test/test_gossipsub.ml +++ b/src/lib_gossipsub/test/test_gossipsub.ml @@ -78,6 +78,9 @@ let default_limits = gossip_factor = 0.25; history_length = 5; history_gossip_length = 3; + opportunistic_graft_ticks = 60L; + opportunistic_graft_peers = 2; + opportunistic_graft_threshold = 1.; score_parameters; } diff --git a/src/lib_gossipsub/test/test_gossipsub_shared.ml b/src/lib_gossipsub/test/test_gossipsub_shared.ml index 84dc5a3b8ac917c196c65aa33e88a34974d4490e..18d950998cab589d71bd538a74d606626aa374b7 100644 --- a/src/lib_gossipsub/test/test_gossipsub_shared.ml +++ b/src/lib_gossipsub/test/test_gossipsub_shared.ml @@ -218,6 +218,9 @@ let pp_limits fmtr gossip_factor; history_length; history_gossip_length; + opportunistic_graft_ticks; + opportunistic_graft_peers; + opportunistic_graft_threshold; score_parameters; } = l @@ -250,6 +253,9 @@ let pp_limits fmtr gossip_factor = %f;@;\ history_length = %d;@;\ history_gossip_length = %d;@;\ + opportunistic_graft_ticks = %Ld;@;\ + opportunistic_graft_peers = %d;@;\ + opportunistic_graft_threshold = %f;@;\ score_parameters= %a }@]" max_recv_ihave_per_heartbeat max_sent_iwant_per_heartbeat @@ -282,6 +288,9 @@ let pp_limits fmtr gossip_factor history_length history_gossip_length + opportunistic_graft_ticks + opportunistic_graft_peers + opportunistic_graft_threshold pp_score_parameters score_parameters diff --git a/src/lib_gossipsub/tezos_gossipsub.ml b/src/lib_gossipsub/tezos_gossipsub.ml index cd55a5167550661f801faf8d3bd72c31bb73e227..bfce7e6a21c425bb4bc465f5a10080d1e3d04e33 100644 --- a/src/lib_gossipsub/tezos_gossipsub.ml +++ b/src/lib_gossipsub/tezos_gossipsub.ml @@ -350,6 +350,13 @@ module Make (C : AUTOMATON_CONFIG) : let gossip_threshold state = state.limits.gossip_threshold + let opportunistic_graft_ticks state = state.limits.opportunistic_graft_ticks + + let opportunistic_graft_peers state = state.limits.opportunistic_graft_peers + + let opportunistic_graft_threshold state = + state.limits.opportunistic_graft_threshold + let mesh state = state.mesh let fanout state = state.fanout @@ -1172,22 +1179,26 @@ module Make (C : AUTOMATON_CONFIG) : return () (* Update mesh for grafted and pruned peers. Note that in the Go - implementation this update is done on-the-fly. The semantics is however - the same, given that a peer cannot be grafted and then pruned or - vice-versa. The reasoning for why that is the case is as follows. There - are three blocks of updates in the code above: + implementation this update is done on-the-fly. Contrary to the Go + implementation, we explicitly ensure that a peer cannot be grafted and + then pruned or vice-versa. The reasoning for why that is the case is as + follows. There are four blocks of updates in the code above: + 1) graft peers if [d_mesh < degree_low] 2) prune peers if [d_mesh > degree_high] 3) graft peers if [d_mesh > degree_low] and not enough outbound peers - ([d_mesh] denotes [Peer.Set.cardinal peers]) + ([d_mesh] denotes [Peer.Set.cardinal peers]) + 4) graft peers with opportunistic grafting, and remove them from the + previously pruned peers (if they were indeed meant to be pruned). - Condition 1) is mutually exclusive with the other two. + The condition for 1) is mutually exclusive with ones for 2) and 3). Now, a peer p cannot be pruned in 2) and then grafted in 3) because in 2): A) Either we have enough outbound peers, then we don't execute 3). B) Or we don't have enough outbound peers, so p is not outbound and in 3) - we only graft outbound peers. *) - (* TODO: https://gitlab.com/tezos/tezos/-/issues/4967 - Revisit this comment if opportunistic grafting is implemented. *) + we only graft outbound peers. + + Finally, block 4) ensures the property explicitly. + *) let update_mesh mesh ~to_graft ~to_prune = let update f = Peer.Map.fold (fun peer topicset mesh -> @@ -1249,6 +1260,10 @@ module Make (C : AUTOMATON_CONFIG) : let*! degree_high in let*! degree_score in let*! degree_out in + let*! heartbeat_ticks in + let*! opportunistic_graft_ticks in + let*! opportunistic_graft_peers in + let*! opportunistic_graft_threshold in let has_outbound_connection = peer_has_outbound_connection connections ~default:false @@ -1289,6 +1304,50 @@ module Make (C : AUTOMATON_CONFIG) : (to_prune, peers) in + let opportunistic_grafting topic peers = + if Int64.rem heartbeat_ticks opportunistic_graft_ticks = 0L then + let num_peers = List.length peers in + if num_peers > 1 then + (* Opportunistic grafting works as follows: we check the median score + of peers in the mesh; if this score is below the + [opportunistic_graft_threshold], we select a few peers at random + with score over the median. + + The intention is to (slowly) improve an underperforming mesh by + introducing good scoring peers that may have been gossiping at + us. This allows us to get out of sticky situations where we are + stuck with poor peers and also recover from churn of good peers. *) + + (* Compute the median peer score in the mesh. *) + let median_score = + let sorted_scores = + peers |> List.rev_map get_score |> Array.of_list + in + Array.sort Score.compare sorted_scores ; + sorted_scores.(num_peers / 2) + in + if Score.(median_score < of_float opportunistic_graft_threshold) + then + let peers_set = Peer.Set.of_list peers in + let filter peer connection score = + let in_mesh = Peer.Set.mem peer peers_set in + let backed_off = exists_backoff topic peer backoff in + let above_median = Score.(score > median_score) in + (not in_mesh) && (not backed_off) && (not connection.direct) + && above_median + in + select_connections_peers + connections + scores + rng + topic + ~filter + ~max:opportunistic_graft_peers + else [] + else [] + else [] + in + (* Keep the first [degree_score] peers by score and the remaining up to [degree_optimal] randomly, under the constraint that we keep [degree_out] peers in the mesh (if we have that many). *) @@ -1405,7 +1464,7 @@ module Make (C : AUTOMATON_CONFIG) : (* Do we have enough outbound peers? *) let num_peers = Peer.Set.cardinal peers in - let to_graft = + let to_graft, peers = if num_peers >= degree_low then let num_outbound = Peer.Set.fold @@ -1425,14 +1484,44 @@ module Make (C : AUTOMATON_CONFIG) : && Score.(score >= zero) && has_outbound_connection peer in - select_connections_peers connections scores rng topic ~filter ~max - |> add_to_peers_topic_set to_graft topic - else to_graft - else to_graft + let new_peers = + select_connections_peers + connections + scores + rng + topic + ~filter + ~max + in + let to_graft = add_to_peers_topic_set to_graft topic new_peers in + (to_graft, new_peers) + else (to_graft, Peer.Set.elements peers) + else (to_graft, Peer.Set.elements peers) + in + + (* Attempt opportunistic grafting. *) + let to_graft, to_prune = + let peers_to_graft = opportunistic_grafting topic peers in + let to_graft = add_to_peers_topic_set to_graft topic peers_to_graft in + (* Do not actually prune the [peers_to_graft]. *) + let to_prune = + List.fold_left + (fun to_prune peer_to_graft -> + Peer.Map.update + peer_to_graft + (function + | None -> None + | Some topicset -> + let topicset = Topic.Set.remove topic topicset in + if Topic.Set.is_empty topicset then None + else Some topicset) + to_prune) + to_prune + peers_to_graft + in + (to_graft, to_prune) in - (* TODO: https://gitlab.com/tezos/tezos/-/issues/4967 - Try to improve the mesh with opportunistic grafting *) (`To_prune to_prune, `To_graft to_graft, noPX_peers) in