Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/super catchup stuck at the tip #9132

Merged
merged 10 commits into from
Jun 29, 2021
89 changes: 70 additions & 19 deletions src/lib/ledger_catchup/super_catchup.ml
Original file line number Diff line number Diff line change
Expand Up @@ -141,52 +141,83 @@ let verify_transition ~logger ~consensus_constants ~trust_system ~frontier
~consensus_constants ~unprocessed_transition_cache
enveloped_initially_validated_transition
in
let state_hash =
External_transition.Validation.forget_validation_with_hash
transition_with_hash
|> With_hash.hash |> State_hash.to_yojson
in
let open Deferred.Let_syntax in
match cached_initially_validated_transition_result with
| Ok x ->
[%log trace]
~metadata:[("state_hash", state_hash)]
"initial_validate: validation is successful" ;
Deferred.return @@ Ok (`Building_path x)
| Error (`In_frontier hash) ->
[%log trace]
"transition queried during ledger catchup has already been seen" ;
~metadata:[("state_hash", state_hash)]
"initial_validate: transition queried during ledger catchup has \
already been seen" ;
Deferred.return @@ Ok (`In_frontier hash)
| Error (`In_process consumed_state) -> (
[%log trace]
"transition queried during ledger catchup is still in process in one \
of the components in transition_frontier" ;
~metadata:[("state_hash", state_hash)]
"initial_validate: transition queried during ledger catchup is still \
in process in one of the components in transition_frontier" ;
match%map Ivar.read consumed_state with
| `Failed ->
[%log trace] "transition queried during ledger catchup failed" ;
[%log trace]
~metadata:[("state_hash", state_hash)]
"initial_validate: transition queried during ledger catchup failed" ;
Error (Error.of_string "Previous transition failed")
| `Success hash ->
[%log trace]
~metadata:[("state_hash", state_hash)]
"initial_validate: transition queried during ledger catchup is \
added to frontier" ;
Ok (`In_frontier hash) )
| Error (`Verifier_error error) ->
[%log warn]
~metadata:[("error", Error_json.error_to_yojson error)]
"verifier threw an error while verifying transiton queried during \
ledger catchup: $error" ;
~metadata:
[ ("error", Error_json.error_to_yojson error)
; ("state_hash", state_hash) ]
"initial_validate: verifier threw an error while verifying transiton \
queried during ledger catchup: $error" ;
Deferred.Or_error.fail (Error.tag ~tag:"verifier threw an error" error)
| Error `Invalid_proof ->
let%map () =
Trust_system.record_envelope_sender trust_system logger sender
( Trust_system.Actions.Gossiped_invalid_transition
, Some ("invalid proof", []) )
in
[%log warn]
~metadata:[("state_hash", state_hash)]
"initial_validate: invalid proof" ;
Error (Error.of_string "invalid proof")
| Error `Invalid_genesis_protocol_state ->
let%map () =
Trust_system.record_envelope_sender trust_system logger sender
( Trust_system.Actions.Gossiped_invalid_transition
, Some ("invalid genesis protocol state", []) )
in
[%log warn]
~metadata:[("state_hash", state_hash)]
"initial_validate: invalid genesis protocol state" ;
Error (Error.of_string "invalid genesis protocol state")
| Error `Invalid_delta_transition_chain_proof ->
[%log warn]
~metadata:[("state_hash", state_hash)]
"initial_validate: invalid delta transition chain proof" ;
let%map () =
Trust_system.record_envelope_sender trust_system logger sender
( Trust_system.Actions.Gossiped_invalid_transition
, Some ("invalid delta transition chain witness", []) )
in
Error (Error.of_string "invalid delta transition chain witness")
| Error `Invalid_protocol_version ->
[%log warn]
~metadata:[("state_hash", state_hash)]
"initial_validate: invalid protocol version" ;
let transition =
External_transition.Validation.forget_validation transition_with_hash
in
Expand All @@ -208,6 +239,9 @@ let verify_transition ~logger ~consensus_constants ~trust_system ~frontier
in
Error (Error.of_string "invalid protocol version")
| Error `Mismatched_protocol_version ->
[%log warn]
~metadata:[("state_hash", state_hash)]
"initial_validate: mismatch protocol version" ;
let transition =
External_transition.Validation.forget_validation transition_with_hash
in
Expand All @@ -227,6 +261,9 @@ let verify_transition ~logger ~consensus_constants ~trust_system ~frontier
in
Error (Error.of_string "mismatched protocol version")
| Error `Disconnected ->
[%log warn]
~metadata:[("state_hash", state_hash)]
"initial_validate: disconnected chain" ;
Deferred.Or_error.fail @@ Error.of_string "disconnected chain"

let find_map_ok ?how xs ~f =
Expand Down Expand Up @@ -515,14 +552,20 @@ let initial_validate ~(precomputed_values : Precomputed_values.t) ~logger
~unprocessed_transition_cache transition =
let verification_start_time = Core.Time.now () in
let open Deferred.Result.Let_syntax in
let state_hash =
Envelope.Incoming.data transition |> With_hash.hash |> State_hash.to_yojson
in
[%log debug]
~metadata:[("state_hash", state_hash)]
"initial_validate: start processing $state_hash" ;
let%bind tv =
let open Deferred.Let_syntax in
match%bind Initial_validate_batcher.verify batcher transition with
| Ok (Ok tv) ->
return (Ok {transition with data= tv})
| Ok (Error ()) ->
let s = "proof failed to verify" in
[%log warn] "%s" s ;
let s = "initial_validate: proof failed to verify" in
[%log warn] ~metadata:[("state_hash", state_hash)] "%s" s ;
let%map () =
match transition.sender with
| Local ->
Expand All @@ -535,8 +578,11 @@ let initial_validate ~(precomputed_values : Precomputed_values.t) ~logger
Error (`Error (Error.of_string s))
| Error e ->
[%log warn]
~metadata:[("error", Error_json.error_to_yojson e)]
"verification of blockchain snark failed but it was our fault" ;
~metadata:
[ ("error", Error_json.error_to_yojson e)
; ("state_hash", state_hash) ]
"initial_validate: verification of blockchain snark failed but it \
was our fault" ;
return (Error `Couldn't_reach_verifier)
in
let verification_end_time = Core.Time.now () in
Expand All @@ -546,8 +592,9 @@ let initial_validate ~(precomputed_values : Precomputed_values.t) ~logger
, `Float
Core.Time.(
Span.to_sec @@ diff verification_end_time verification_start_time)
) ]
"verification of proofs complete" ;
)
; ("state_hash", state_hash) ]
"initial_validate: verification of proofs complete" ;
verify_transition ~logger
~consensus_constants:precomputed_values.consensus_constants ~trust_system
~frontier ~unprocessed_transition_cache tv
Expand Down Expand Up @@ -600,7 +647,11 @@ let create_node ~downloader t x =
in
upon (Ivar.read node.result) (fun _ ->
Downloader.cancel downloader (h, blockchain_length) ) ;
Hashtbl.incr t.states (Node.State.enum node.state) ;
Hashtbl.update t.states (Node.State.enum node.state) ~f:(function
| None ->
State_hash.Set.singleton node.state_hash
| Some hashes ->
State_hash.Set.add hashes node.state_hash ) ;
Hashtbl.set t.nodes ~key:h ~data:node ;
( try check_invariant ~downloader t
with e ->
Expand Down Expand Up @@ -1053,8 +1104,10 @@ let run ~logger ~trust_system ~verifier ~network ~frontier
$reason" ;
Mina_metrics.(
Counter.inc Rejected_blocks.no_common_ancestor
(Float.of_int @@ (1 + List.length children_transitions)))
)
(Float.of_int @@ (1 + List.length children_transitions))) ;
List.iter forest ~f:(fun tree ->
Rose_tree.iter subtree ~f:Cached.invalidate_with_failure
) )
| Ok (root, state_hashes) ->
[%log' debug t.logger]
~metadata:
Expand Down Expand Up @@ -1084,9 +1137,7 @@ let run ~logger ~trust_system ~verifier ~network ~frontier
List.iter forest
~f:
(Rose_tree.iter ~f:(fun c ->
let node =
create_node ~downloader t (`Initial_validated c)
in
let node = create_node ~downloader t (`Verify c) in
run_node node |> ignore )) ;
List.fold state_hashes
~init:(root.state_hash, root.blockchain_length)
Expand Down
4 changes: 3 additions & 1 deletion src/lib/mina_commands/mina_commands.ml
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,9 @@ let get_status ~flag t =
in
match Transition_frontier.catchup_tree frontier with
| Full full ->
Some (Hashtbl.to_alist full.states)
Some
(List.map (Hashtbl.to_alist full.states) ~f:(fun (state, hashes) ->
(state, State_hash.Set.length hashes) ))
| _ ->
None
in
Expand Down
35 changes: 28 additions & 7 deletions src/lib/transition_frontier/full_catchup_tree.ml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ end
ancestor is at most k *)
type t =
{ nodes: Node.t State_hash.Table.t
; states: int Node.State.Enum.Table.t
; states: State_hash.Set.t Node.State.Enum.Table.t
; logger: Logger.t }

(* mutable root: Node.t ; *)
Expand All @@ -137,9 +137,17 @@ let tear_down {nodes; states; _} =
Hashtbl.clear states

let set_state t (node : Node.t) s =
Hashtbl.decr t.states (Node.State.enum node.state) ;
Hashtbl.update t.states (Node.State.enum node.state) ~f:(function
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe refactor these two out into their own functions

| None ->
State_hash.Set.empty
| Some hashes ->
State_hash.Set.remove hashes node.state_hash ) ;
node.state <- s ;
Hashtbl.incr t.states (Node.State.enum s)
Hashtbl.update t.states (Node.State.enum node.state) ~f:(function
| None ->
State_hash.Set.singleton node.state_hash
| Some hashes ->
State_hash.Set.add hashes node.state_hash )

let finish t (node : Node.t) b =
let s, r =
Expand All @@ -151,9 +159,14 @@ let finish t (node : Node.t) b =

let to_yojson =
let module T = struct
type t = (Node.State.Enum.t * int) list [@@deriving to_yojson]
type t = (Node.State.Enum.t * (int * State_hash.t list)) list
[@@deriving to_yojson]
end in
fun (t : t) -> T.to_yojson (Hashtbl.to_alist t.states)
fun (t : t) ->
T.to_yojson
@@ List.map (Hashtbl.to_alist t.states) ~f:(fun (state, hashes) ->
( state
, (State_hash.Set.length hashes, State_hash.Set.to_list hashes) ) )

let max_catchup_chain_length (t : t) =
(* Find the longest directed path *)
Expand Down Expand Up @@ -195,7 +208,11 @@ let create_node_full t b : unit =
; parent= Breadcrumb.parent_hash b
; result= Ivar.create_full (Ok `Added_to_frontier) }
in
Hashtbl.incr t.states (Node.State.enum node.state) ;
Hashtbl.update t.states (Node.State.enum node.state) ~f:(function
| None ->
State_hash.Set.singleton h
| Some hashes ->
State_hash.Set.add hashes h ) ;
Hashtbl.add_exn t.nodes ~key:h ~data:node

let breadcrumb_added (t : t) b =
Expand All @@ -218,7 +235,11 @@ let breadcrumb_added (t : t) b =

let remove_node' t (node : Node.t) =
Hashtbl.remove t.nodes node.state_hash ;
Hashtbl.decr t.states (Node.State.enum node.state) ;
Hashtbl.update t.states (Node.State.enum node.state) ~f:(function
| None ->
State_hash.Set.empty
| Some hashes ->
State_hash.Set.remove hashes node.state_hash ) ;
Ivar.fill_if_empty node.result (Error node.attempts) ;
match node.state with
| Root _ | Failed | Finished ->
Expand Down