From 66c9395fbf17642bcd7712f937da6d42c5d4835e Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Tue, 30 Jul 2024 18:57:23 -0700 Subject: [PATCH 1/6] Added total generation timeout for signatures --- chain-signatures/contract/src/config/impls.rs | 1 + chain-signatures/contract/src/config/mod.rs | 4 ++ .../node/src/protocol/signature.rs | 42 ++++++++++++------- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/chain-signatures/contract/src/config/impls.rs b/chain-signatures/contract/src/config/impls.rs index 4ded7e0ff..36c7d5b58 100644 --- a/chain-signatures/contract/src/config/impls.rs +++ b/chain-signatures/contract/src/config/impls.rs @@ -68,6 +68,7 @@ impl Default for SignatureConfig { fn default() -> Self { Self { generation_timeout: secs_to_ms(45), + generation_timeout_total: secs_to_ms(200), garbage_timeout: hours_to_ms(24), other: Default::default(), diff --git a/chain-signatures/contract/src/config/mod.rs b/chain-signatures/contract/src/config/mod.rs index 6dfd7bea8..12449b905 100644 --- a/chain-signatures/contract/src/config/mod.rs +++ b/chain-signatures/contract/src/config/mod.rs @@ -83,6 +83,10 @@ pub struct PresignatureConfig { pub struct SignatureConfig { /// Timeout for signature generation in milliseconds. pub generation_timeout: u64, + /// Timeout for signature generation in milliseconds. This is the total timeout for + /// the signature generation process. Mainly used to include the whole generation of + /// signatures including their retries up till this timeout. + pub generation_timeout_total: u64, /// Garbage collection timeout in milliseconds for signatures generated. pub garbage_timeout: u64, diff --git a/chain-signatures/node/src/protocol/signature.rs b/chain-signatures/node/src/protocol/signature.rs index cb4aab7f3..0ea282bb2 100644 --- a/chain-signatures/node/src/protocol/signature.rs +++ b/chain-signatures/node/src/protocol/signature.rs @@ -133,6 +133,7 @@ pub struct SignatureGenerator { pub sign_request_timestamp: Instant, pub generator_timestamp: Instant, pub timeout: Duration, + pub timeout_total: Duration, } impl SignatureGenerator { @@ -146,7 +147,7 @@ impl SignatureGenerator { epsilon: Scalar, delta: Scalar, sign_request_timestamp: Instant, - timeout: u64, + cfg: &ProtocolConfig, ) -> Self { Self { protocol, @@ -158,15 +159,22 @@ impl SignatureGenerator { delta, sign_request_timestamp, generator_timestamp: Instant::now(), - timeout: Duration::from_millis(timeout), + timeout: Duration::from_millis(cfg.signature.generation_timeout), + timeout_total: Duration::from_millis(cfg.signature.generation_timeout_total), } } pub fn poke(&mut self) -> Result>, ProtocolError> { + if self.sign_request_timestamp.elapsed() > self.timeout_total { + return Err(ProtocolError::Other( + anyhow::anyhow!("signature protocol timeout completely").into(), + )); + } + if self.generator_timestamp.elapsed() > self.timeout { - tracing::info!(self.presignature_id, "signature protocol timed out"); + tracing::warn!(self.presignature_id, "signature protocol timed out"); return Err(ProtocolError::Other( - anyhow::anyhow!("signature protocol timed out").into(), + anyhow::anyhow!("signature protocol timeout").into(), )); } @@ -253,7 +261,7 @@ impl SignatureManager { public_key: PublicKey, presignature: Presignature, req: GenerationRequest, - timeout: u64, + cfg: &ProtocolConfig, ) -> Result { let participants = participants.keys_vec(); let GenerationRequest { @@ -286,7 +294,7 @@ impl SignatureManager { epsilon, delta, sign_request_timestamp, - timeout, + cfg, )) } @@ -296,7 +304,7 @@ impl SignatureManager { req: GenerationRequest, presignature: Presignature, participants: &Participants, - timeout: u64, + cfg: &ProtocolConfig, ) -> Result<(), InitializationError> { tracing::info!(receipt_id = %receipt_id, participants = ?participants.keys_vec(), "restarting failed protocol to generate signature"); let generator = Self::generate_internal( @@ -305,7 +313,7 @@ impl SignatureManager { self.public_key, presignature, req, - timeout, + cfg, )?; self.generators.insert(receipt_id, generator); Ok(()) @@ -322,7 +330,7 @@ impl SignatureManager { epsilon: Scalar, delta: Scalar, sign_request_timestamp: Instant, - timeout: u64, + cfg: &ProtocolConfig, ) -> Result<(), InitializationError> { tracing::info!( %receipt_id, @@ -343,7 +351,7 @@ impl SignatureManager { delta, sign_request_timestamp, }, - timeout, + cfg, )?; self.generators.insert(receipt_id, generator); Ok(()) @@ -404,7 +412,7 @@ impl SignatureManager { delta, sign_request_timestamp: Instant::now(), }, - cfg.signature.generation_timeout, + cfg, )?; let generator = entry.insert(generator); Ok(&mut generator.protocol) @@ -424,8 +432,8 @@ impl SignatureManager { let action = match generator.poke() { Ok(action) => action, Err(err) => { - tracing::warn!(?err, "signature failed to be produced; pushing request back into failed queue"); - if generator.proposer == self.me { + if generator.proposer == self.me && generator.sign_request_timestamp.elapsed() < generator.timeout_total { + tracing::warn!(?err, "signature failed to be produced; pushing request back into failed queue"); // only retry the signature generation if it was initially proposed by us. We do not // want any nodes to be proposing the same signature multiple times. self.failed.push_back(( @@ -438,6 +446,9 @@ impl SignatureManager { sign_request_timestamp: generator.sign_request_timestamp }, )); + } else { + self.completed.insert(*receipt_id, Instant::now()); + tracing::warn!(?err, "signature failed to be produced; trashing request"); } break false; } @@ -555,9 +566,10 @@ impl SignatureManager { failed_req, presignature, &sig_participants, - cfg.signature.generation_timeout, + cfg, ) { tracing::warn!(%receipt_id, presig_id, ?err, "failed to retry signature generation: trashing presignature"); + self.completed.insert(receipt_id, Instant::now()); continue; } @@ -584,7 +596,7 @@ impl SignatureManager { my_request.epsilon, my_request.delta, my_request.time_added, - cfg.signature.generation_timeout, + cfg, ) { tracing::warn!(%receipt_id, presig_id, ?err, "failed to start signature generation: trashing presignature"); continue; From 1cd175ad94841ac6cd8f33248b7da06b8387f3c6 Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Tue, 30 Jul 2024 18:58:25 -0700 Subject: [PATCH 2/6] Added explicit insertion ordering for requests --- .../node/src/protocol/cryptography.rs | 6 ++- .../node/src/protocol/signature.rs | 45 +++++++++++++++---- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/chain-signatures/node/src/protocol/cryptography.rs b/chain-signatures/node/src/protocol/cryptography.rs index 9c6a241ed..12f08c811 100644 --- a/chain-signatures/node/src/protocol/cryptography.rs +++ b/chain-signatures/node/src/protocol/cryptography.rs @@ -438,8 +438,10 @@ impl CryptographicProtocol for RunningState { crate::metrics::SIGN_QUEUE_SIZE .with_label_values(&[my_account_id.as_str()]) .set(sign_queue.len() as i64); - sign_queue.organize(self.threshold, &stable, ctx.me().await, &my_account_id); - let my_requests = sign_queue.my_requests(ctx.me().await); + let me = ctx.me().await; + sign_queue.organize(self.threshold, &stable, me, &my_account_id); + + let my_requests = sign_queue.my_requests(me); crate::metrics::SIGN_QUEUE_MINE_SIZE .with_label_values(&[my_account_id.as_str()]) .set(my_requests.len() as i64); diff --git a/chain-signatures/node/src/protocol/signature.rs b/chain-signatures/node/src/protocol/signature.rs index 0ea282bb2..f762c1b77 100644 --- a/chain-signatures/node/src/protocol/signature.rs +++ b/chain-signatures/node/src/protocol/signature.rs @@ -35,10 +35,43 @@ pub struct SignRequest { pub time_added: Instant, } +/// Type that preserves the insertion order of requests. +#[derive(Default)] +pub struct ParticipantRequests { + requests: HashMap, + order: VecDeque, +} + +impl ParticipantRequests { + fn insert(&mut self, receipt_id: ReceiptId, request: SignRequest) { + self.requests.insert(receipt_id, request); + self.order.push_back(receipt_id); + } + + fn contains_key(&self, receipt_id: &ReceiptId) -> bool { + self.requests.contains_key(receipt_id) + } + + pub fn len(&self) -> usize { + self.requests.len() + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn pop_front(&mut self) -> Option<(ReceiptId, SignRequest)> { + let receipt_id = self.order.pop_front()?; + self.requests + .remove(&receipt_id) + .map(|req| (receipt_id, req)) + } +} + #[derive(Default)] pub struct SignQueue { unorganized_requests: Vec, - requests: HashMap>, + requests: HashMap, } impl SignQueue { @@ -116,7 +149,7 @@ impl SignQueue { participant_requests.contains_key(&receipt_id) } - pub fn my_requests(&mut self, me: Participant) -> &mut HashMap { + pub fn my_requests(&mut self, me: Participant) -> &mut ParticipantRequests { self.requests.entry(me).or_default() } } @@ -524,7 +557,7 @@ impl SignatureManager { &mut self, threshold: usize, stable: &Participants, - my_requests: &mut HashMap, + my_requests: &mut ParticipantRequests, presignature_manager: &mut PresignatureManager, cfg: &ProtocolConfig, ) { @@ -580,11 +613,7 @@ impl SignatureManager { } } - let Some(receipt_id) = my_requests.keys().next().cloned() else { - failed_presigs.push(presignature); - continue; - }; - let Some(my_request) = my_requests.remove(&receipt_id) else { + let Some((receipt_id, my_request)) = my_requests.pop_front() else { failed_presigs.push(presignature); continue; }; From 18a23dda5d20064e6509f58ae59daf4a25a1c596 Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Tue, 30 Jul 2024 19:04:07 -0700 Subject: [PATCH 3/6] Cleanup indexer --- chain-signatures/node/src/indexer.rs | 114 +++++++++++++++------------ 1 file changed, 63 insertions(+), 51 deletions(-) diff --git a/chain-signatures/node/src/indexer.rs b/chain-signatures/node/src/indexer.rs index 101f64298..e84f1803f 100644 --- a/chain-signatures/node/src/indexer.rs +++ b/chain-signatures/node/src/indexer.rs @@ -3,7 +3,6 @@ use crate::gcp::GcpService; use crate::kdf; use crate::protocol::{SignQueue, SignRequest}; use crate::types::LatestBlockHeight; -use anyhow::Context as _; use crypto_shared::{derive_epsilon, ScalarExt}; use k256::Scalar; use near_account_id::AccountId; @@ -150,62 +149,75 @@ async fn handle_block( let mut pending_requests = Vec::new(); for action in block.actions().cloned().collect::>() { if action.receiver_id() == ctx.mpc_contract_id { - let receipt = - anyhow::Context::with_context(block.receipt_by_id(&action.receipt_id()), || { - format!( - "indexer unable to find block for receipt_id={}", - action.receipt_id() - ) - })?; + let Some(receipt) = block.receipt_by_id(&action.receipt_id()) else { + let err = format!( + "indexer unable to find block for receipt_id={}", + action.receipt_id() + ); + tracing::warn!("{err}"); + anyhow::bail!(err); + }; let ExecutionStatus::SuccessReceiptId(receipt_id) = receipt.status() else { continue; }; - if let Some(function_call) = action.as_function_call() { - if function_call.method_name() == "sign" { - if let Ok(arguments) = - serde_json::from_slice::<'_, SignArguments>(function_call.args()) - { - if receipt.logs().is_empty() { - tracing::warn!("`sign` did not produce entropy"); + let Some(function_call) = action.as_function_call() else { + continue; + }; + if function_call.method_name() == "sign" { + let arguments = + match serde_json::from_slice::<'_, SignArguments>(function_call.args()) { + Ok(arguments) => arguments, + Err(err) => { + tracing::warn!(%err, "failed to parse `sign` arguments"); continue; } - let payload = Scalar::from_bytes(arguments.request.payload) - .context("Payload cannot be converted to scalar, not in k256 field")?; - let Ok(entropy) = serde_json::from_str::<'_, [u8; 32]>(&receipt.logs()[1]) - else { - tracing::warn!( - "`sign` did not produce entropy correctly: {:?}", - receipt.logs()[0] - ); - continue; - }; - let epsilon = - derive_epsilon(&action.predecessor_id(), &arguments.request.path); - let delta = kdf::derive_delta(receipt_id, entropy); - tracing::info!( - receipt_id = %receipt_id, - caller_id = receipt.predecessor_id().to_string(), - our_account = ctx.node_account_id.to_string(), - payload = hex::encode(arguments.request.payload), - key_version = arguments.request.key_version, - entropy = hex::encode(entropy), - "indexed new `sign` function call" - ); - let request = ContractSignRequest { - payload, - path: arguments.request.path, - key_version: arguments.request.key_version, - }; - pending_requests.push(SignRequest { - receipt_id, - request, - epsilon, - delta, - entropy, - time_added: Instant::now(), - }); - } + }; + + if receipt.logs().is_empty() { + tracing::warn!("`sign` did not produce entropy"); + continue; } + + let Some(payload) = Scalar::from_bytes(arguments.request.payload) else { + tracing::warn!( + "`sign` did not produce payload correctly: {:?}", + arguments.request.payload, + ); + continue; + }; + + let Ok(entropy) = serde_json::from_str::<'_, [u8; 32]>(&receipt.logs()[1]) else { + tracing::warn!( + "`sign` did not produce entropy correctly: {:?}", + receipt.logs()[0] + ); + continue; + }; + let epsilon = derive_epsilon(&action.predecessor_id(), &arguments.request.path); + let delta = kdf::derive_delta(receipt_id, entropy); + tracing::info!( + receipt_id = %receipt_id, + caller_id = receipt.predecessor_id().to_string(), + our_account = ctx.node_account_id.to_string(), + payload = hex::encode(arguments.request.payload), + key_version = arguments.request.key_version, + entropy = hex::encode(entropy), + "indexed new `sign` function call" + ); + let request = ContractSignRequest { + payload, + path: arguments.request.path, + key_version: arguments.request.key_version, + }; + pending_requests.push(SignRequest { + receipt_id, + request, + epsilon, + delta, + entropy, + // TODO: use indexer timestamp instead. + time_added: Instant::now(), + }); } } } From 218f7c499680297ebe95376c0c0b32846f4e8c7b Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Tue, 30 Jul 2024 19:32:13 -0700 Subject: [PATCH 4/6] Add back failed presigs on bad parameters --- .../node/src/protocol/signature.rs | 94 +++++++++++-------- 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/chain-signatures/node/src/protocol/signature.rs b/chain-signatures/node/src/protocol/signature.rs index f762c1b77..ef827d10d 100644 --- a/chain-signatures/node/src/protocol/signature.rs +++ b/chain-signatures/node/src/protocol/signature.rs @@ -295,7 +295,7 @@ impl SignatureManager { presignature: Presignature, req: GenerationRequest, cfg: &ProtocolConfig, - ) -> Result { + ) -> Result { let participants = participants.keys_vec(); let GenerationRequest { proposer, @@ -311,18 +311,22 @@ impl SignatureManager { k: k * delta.invert().unwrap(), sigma: (sigma + epsilon * k) * delta.invert().unwrap(), }; - let protocol = Box::new(cait_sith::sign( - &participants, - me, - derive_key(public_key, epsilon), - output, - request.payload, - )?); + let presignature_id = presignature.id; + let protocol = Box::new( + cait_sith::sign( + &participants, + me, + derive_key(public_key, epsilon), + output, + request.payload, + ) + .map_err(|err| (presignature, err))?, + ); Ok(SignatureGenerator::new( protocol, participants, proposer, - presignature.id, + presignature_id, request, epsilon, delta, @@ -338,7 +342,7 @@ impl SignatureManager { presignature: Presignature, participants: &Participants, cfg: &ProtocolConfig, - ) -> Result<(), InitializationError> { + ) -> Result<(), (Presignature, InitializationError)> { tracing::info!(receipt_id = %receipt_id, participants = ?participants.keys_vec(), "restarting failed protocol to generate signature"); let generator = Self::generate_internal( participants, @@ -364,7 +368,7 @@ impl SignatureManager { delta: Scalar, sign_request_timestamp: Instant, cfg: &ProtocolConfig, - ) -> Result<(), InitializationError> { + ) -> Result<(), (Presignature, InitializationError)> { tracing::info!( %receipt_id, me = ?self.me, @@ -433,7 +437,7 @@ impl SignatureManager { Err(err) => return Err(err), }; tracing::info!(me = ?self.me, presignature_id, "found presignature: ready to start signature generation"); - let generator = Self::generate_internal( + let generator = match Self::generate_internal( participants, self.me, self.public_key, @@ -446,7 +450,14 @@ impl SignatureManager { sign_request_timestamp: Instant::now(), }, cfg, - )?; + ) { + Ok(generator) => generator, + Err((presignature, err @ InitializationError::BadParameters(_))) => { + presignature_manager.insert_mine(presignature); + tracing::warn!(%receipt_id, presignature_id, ?err, "failed to start signature generation"); + return Err(GenerationError::CaitSithInitializationError(err)); + } + }; let generator = entry.insert(generator); Ok(&mut generator.protocol) } @@ -465,23 +476,25 @@ impl SignatureManager { let action = match generator.poke() { Ok(action) => action, Err(err) => { - if generator.proposer == self.me && generator.sign_request_timestamp.elapsed() < generator.timeout_total { - tracing::warn!(?err, "signature failed to be produced; pushing request back into failed queue"); - // only retry the signature generation if it was initially proposed by us. We do not - // want any nodes to be proposing the same signature multiple times. - self.failed.push_back(( - *receipt_id, - GenerationRequest { - proposer: generator.proposer, - request: generator.request.clone(), - epsilon: generator.epsilon, - delta: generator.delta, - sign_request_timestamp: generator.sign_request_timestamp - }, - )); - } else { - self.completed.insert(*receipt_id, Instant::now()); - tracing::warn!(?err, "signature failed to be produced; trashing request"); + if generator.proposer == self.me { + if generator.sign_request_timestamp.elapsed() < generator.timeout_total { + tracing::warn!(?err, "signature failed to be produced; pushing request back into failed queue"); + // only retry the signature generation if it was initially proposed by us. We do not + // want any nodes to be proposing the same signature multiple times. + self.failed.push_back(( + *receipt_id, + GenerationRequest { + proposer: generator.proposer, + request: generator.request.clone(), + epsilon: generator.epsilon, + delta: generator.delta, + sign_request_timestamp: generator.sign_request_timestamp + }, + )); + } else { + self.completed.insert(*receipt_id, Instant::now()); + tracing::warn!(?err, "signature failed to be produced; trashing request"); + } } break false; } @@ -594,15 +607,17 @@ impl SignatureManager { // when the request made it into the NEAR network. // issue: https://github.com/near/mpc-recovery/issues/596 if let Some((receipt_id, failed_req)) = self.failed.pop_front() { - if let Err(err) = self.retry_failed_generation( - receipt_id, - failed_req, - presignature, - &sig_participants, - cfg, - ) { + if let Err((presignature, InitializationError::BadParameters(err))) = self + .retry_failed_generation( + receipt_id, + failed_req, + presignature, + &sig_participants, + cfg, + ) + { tracing::warn!(%receipt_id, presig_id, ?err, "failed to retry signature generation: trashing presignature"); - self.completed.insert(receipt_id, Instant::now()); + failed_presigs.push(presignature); continue; } @@ -617,7 +632,7 @@ impl SignatureManager { failed_presigs.push(presignature); continue; }; - if let Err(err) = self.generate( + if let Err((presignature, InitializationError::BadParameters(err))) = self.generate( &sig_participants, receipt_id, presignature, @@ -627,6 +642,7 @@ impl SignatureManager { my_request.time_added, cfg, ) { + failed_presigs.push(presignature); tracing::warn!(%receipt_id, presig_id, ?err, "failed to start signature generation: trashing presignature"); continue; } From e2ca9369da0a44db208ac50ebd1c0a17f8cb4010 Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Tue, 30 Jul 2024 19:33:01 -0700 Subject: [PATCH 5/6] Fix test --- chain-signatures/contract/src/config/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/chain-signatures/contract/src/config/mod.rs b/chain-signatures/contract/src/config/mod.rs index 12449b905..682e2408f 100644 --- a/chain-signatures/contract/src/config/mod.rs +++ b/chain-signatures/contract/src/config/mod.rs @@ -119,6 +119,7 @@ mod tests { }, "signature": { "generation_timeout": 10000, + "generation_timeout_total": 1000000, "garbage_timeout": 10000000 }, "string": "value", From 260cef103bd73e9064c1a7065cf0f1e968d5e29e Mon Sep 17 00:00:00 2001 From: Phuong Nguyen Date: Tue, 30 Jul 2024 20:50:00 -0700 Subject: [PATCH 6/6] Clippy --- chain-signatures/node/src/protocol/signature.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/chain-signatures/node/src/protocol/signature.rs b/chain-signatures/node/src/protocol/signature.rs index ef827d10d..f9038a38a 100644 --- a/chain-signatures/node/src/protocol/signature.rs +++ b/chain-signatures/node/src/protocol/signature.rs @@ -288,6 +288,7 @@ impl SignatureManager { } #[allow(clippy::too_many_arguments)] + #[allow(clippy::result_large_err)] fn generate_internal( participants: &Participants, me: Participant, @@ -335,6 +336,7 @@ impl SignatureManager { )) } + #[allow(clippy::result_large_err)] fn retry_failed_generation( &mut self, receipt_id: ReceiptId, @@ -358,6 +360,7 @@ impl SignatureManager { /// Starts a new presignature generation protocol. #[allow(clippy::too_many_arguments)] + #[allow(clippy::result_large_err)] pub fn generate( &mut self, participants: &Participants,