From aee251a889aa1705dd0aa10b708c88e592d90ce1 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Wed, 8 Jul 2020 12:46:49 +0200 Subject: [PATCH 01/32] Add subsystem-util crate. Start by moving the JobCanceler here. --- Cargo.lock | 8 ++++++++ Cargo.toml | 1 + node/subsystem-util/Cargo.toml | 10 ++++++++++ node/subsystem-util/README.md | 3 +++ node/subsystem-util/src/lib.rs | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 55 insertions(+) create mode 100644 node/subsystem-util/Cargo.toml create mode 100644 node/subsystem-util/README.md create mode 100644 node/subsystem-util/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 8583a3b1db51..30f6cc2ed09b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4554,6 +4554,14 @@ dependencies = [ "sc-network", ] +[[package]] +name = "polkadot-node-subsystem-util" +version = "0.1.0" +dependencies = [ + "futures 0.3.5", + "polkadot-primitives", +] + [[package]] name = "polkadot-overseer" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index b6d1aa53eaa2..c02485c0ebc1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ members = [ "node/service", "node/core/backing", "node/subsystem", + "node/subsystem-util", "node/test-helpers/subsystem", "node/test-service", diff --git a/node/subsystem-util/Cargo.toml b/node/subsystem-util/Cargo.toml new file mode 100644 index 000000000000..4d701b4dabe6 --- /dev/null +++ b/node/subsystem-util/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "polkadot-node-subsystem-util" +version = "0.1.0" +authors = ["Parity Technologies "] +edition = "2018" +description = "Basic utilities for subsystems" + +[dependencies] +futures = "0.3.5" +polkadot-primitives = { path = "../../primitives" } diff --git a/node/subsystem-util/README.md b/node/subsystem-util/README.md new file mode 100644 index 000000000000..a09351d16606 --- /dev/null +++ b/node/subsystem-util/README.md @@ -0,0 +1,3 @@ +# `subsystem-util` + +Utilities for subsystems. Lots of subsystems have common interests such as canceling a bunch of spawned jobs, or determining what their validator ID is. Those common interests are factored into this crate. diff --git a/node/subsystem-util/src/lib.rs b/node/subsystem-util/src/lib.rs new file mode 100644 index 000000000000..647f1b4859fe --- /dev/null +++ b/node/subsystem-util/src/lib.rs @@ -0,0 +1,33 @@ +use futures::future::AbortHandle; +use polkadot_primitives::Hash; +use std::{collections::HashMap, ops::{Deref, DerefMut}}; + +/// JobCanceler aborts all contained abort handles on drop +#[derive(Debug, Default)] +pub struct JobCanceler(HashMap); + +impl Drop for JobCanceler { + fn drop(&mut self) { + for abort_handle in self.0.values() { + abort_handle.abort(); + } + } +} + +// JobCanceler is a smart pointer wrapping the contained hashmap; +// it only cares about the wrapped data insofar as it's necessary +// to implement proper Drop behavior. Therefore, it's appropriate +// to impl Deref and DerefMut. +impl Deref for JobCanceler { + type Target = HashMap; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for JobCanceler { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} From 1719606ad0e43821e78ff3f7d6b2806a4b4ad5a5 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Wed, 8 Jul 2020 14:09:51 +0200 Subject: [PATCH 02/32] copy utility functions for requesting runtime data; generalize --- Cargo.lock | 2 + node/subsystem-util/Cargo.toml | 2 + node/subsystem-util/src/lib.rs | 128 +++++++++++++++++++++++++++++---- 3 files changed, 117 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 30f6cc2ed09b..7fda84e2d4e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4558,7 +4558,9 @@ dependencies = [ name = "polkadot-node-subsystem-util" version = "0.1.0" dependencies = [ + "derive_more 0.99.9", "futures 0.3.5", + "polkadot-node-subsystem", "polkadot-primitives", ] diff --git a/node/subsystem-util/Cargo.toml b/node/subsystem-util/Cargo.toml index 4d701b4dabe6..d3a6465999e6 100644 --- a/node/subsystem-util/Cargo.toml +++ b/node/subsystem-util/Cargo.toml @@ -6,5 +6,7 @@ edition = "2018" description = "Basic utilities for subsystems" [dependencies] +derive_more = "0.99.9" futures = "0.3.5" polkadot-primitives = { path = "../../primitives" } +polkadot-node-subsystem = { path = "../subsystem" } diff --git a/node/subsystem-util/src/lib.rs b/node/subsystem-util/src/lib.rs index 647f1b4859fe..485720859e6a 100644 --- a/node/subsystem-util/src/lib.rs +++ b/node/subsystem-util/src/lib.rs @@ -1,17 +1,34 @@ -use futures::future::AbortHandle; -use polkadot_primitives::Hash; -use std::{collections::HashMap, ops::{Deref, DerefMut}}; +use futures::{ + channel::{mpsc, oneshot}, + future::AbortHandle, + prelude::*, + task::SpawnError, +}; +use polkadot_node_subsystem::messages::{ + AllMessages, RuntimeApiMessage, RuntimeApiRequest, SchedulerRoster, +}; +use polkadot_primitives::{ + parachain::{ + GlobalValidationSchedule, HeadData, Id as ParaId, LocalValidationData, SigningContext, + ValidatorId, + }, + Hash, +}; +use std::{ + collections::HashMap, + ops::{Deref, DerefMut}, +}; /// JobCanceler aborts all contained abort handles on drop #[derive(Debug, Default)] pub struct JobCanceler(HashMap); impl Drop for JobCanceler { - fn drop(&mut self) { - for abort_handle in self.0.values() { - abort_handle.abort(); - } - } + fn drop(&mut self) { + for abort_handle in self.0.values() { + abort_handle.abort(); + } + } } // JobCanceler is a smart pointer wrapping the contained hashmap; @@ -19,15 +36,96 @@ impl Drop for JobCanceler { // to implement proper Drop behavior. Therefore, it's appropriate // to impl Deref and DerefMut. impl Deref for JobCanceler { - type Target = HashMap; + type Target = HashMap; - fn deref(&self) -> &Self::Target { - &self.0 - } + fn deref(&self) -> &Self::Target { + &self.0 + } } impl DerefMut for JobCanceler { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +#[derive(Debug, derive_more::From)] +pub enum Error { + #[from] + Oneshot(oneshot::Canceled), + #[from] + Mpsc(mpsc::SendError), + #[from] + Spawn(SpawnError), +} + +/// Request some data from the `RuntimeApi`. +pub async fn request_from_runtime( + parent: Hash, + sender: &mut mpsc::Sender, + request_builder: RequestBuilder, +) -> Result, Error> +where + RequestBuilder: FnOnce(oneshot::Sender) -> RuntimeApiRequest, +{ + let (tx, rx) = oneshot::channel(); + + sender + .send(AllMessages::RuntimeApi(RuntimeApiMessage::Request( + parent, + request_builder(tx), + ))) + .await?; + + Ok(rx) +} + +/// Request a `GlobalValidationSchedule` from `RuntimeApi`. +pub async fn request_global_validation_schedule( + parent: Hash, + s: &mut mpsc::Sender, +) -> Result, Error> { + request_from_runtime(parent, s, |tx| RuntimeApiRequest::GlobalValidationSchedule(tx)).await +} + +/// Request a `LocalValidationData` from `RuntimeApi`. +pub async fn request_local_validation_data( + parent: Hash, + para_id: ParaId, + s: &mut mpsc::Sender, +) -> Result>, Error> { + request_from_runtime(parent, s, |tx| RuntimeApiRequest::LocalValidationData(para_id, tx)).await +} + +/// Request a validator set from the `RuntimeApi`. +pub async fn request_validators( + parent: Hash, + s: &mut mpsc::Sender, +) -> Result>, Error> { + request_from_runtime(parent, s, |tx| RuntimeApiRequest::Validators(tx)).await +} + +/// Request the scheduler roster from `RuntimeApi`. +pub async fn request_validator_groups( + parent: Hash, + s: &mut mpsc::Sender, +) -> Result, Error> { + request_from_runtime(parent, s, |tx| RuntimeApiRequest::ValidatorGroups(tx)).await +} + +/// Request a `SigningContext` from the `RuntimeApi`. +pub async fn request_signing_context( + parent: Hash, + s: &mut mpsc::Sender, +) -> Result, Error> { + request_from_runtime(parent, s, |tx| RuntimeApiRequest::SigningContext(tx)).await +} + +/// Request `HeadData` for some `ParaId` from `RuntimeApi`. +pub async fn request_head_data( + parent: Hash, + s: &mut mpsc::Sender, + id: ParaId, +) -> Result, Error> { + request_from_runtime(parent, s, |tx| RuntimeApiRequest::HeadData(id, tx)).await } From bc5efd0dfaf874e5b66f93e5bbb282763471d69a Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Wed, 8 Jul 2020 14:19:28 +0200 Subject: [PATCH 03/32] convert subsystem-util from crate to module in subsystem The point of making a sub-crate is to ensure that only the necessary parts of a program get compiled; if a dependent package needed only subsystem-util, and not subsystem, then subsystem wouldn't need to be compiled. However, that will never happen: subsystem-util depends on subsystem::messages, so subsystem will always be compiled. Therefore, it makes more sense to add it as a module in the existing crate than as a new and distinct crate. --- Cargo.lock | 11 +----- Cargo.toml | 1 - node/subsystem-util/Cargo.toml | 12 ------- node/subsystem-util/README.md | 3 -- node/subsystem/Cargo.toml | 7 ++-- node/subsystem/src/lib.rs | 1 + .../src/lib.rs => subsystem/src/util.rs} | 36 ++++++++++++++++--- 7 files changed, 37 insertions(+), 34 deletions(-) delete mode 100644 node/subsystem-util/Cargo.toml delete mode 100644 node/subsystem-util/README.md rename node/{subsystem-util/src/lib.rs => subsystem/src/util.rs} (72%) diff --git a/Cargo.lock b/Cargo.lock index 7fda84e2d4e7..b753aae13be9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4547,6 +4547,7 @@ name = "polkadot-node-subsystem" version = "0.1.0" dependencies = [ "async-trait", + "derive_more 0.99.9", "futures 0.3.5", "polkadot-node-primitives", "polkadot-primitives", @@ -4554,16 +4555,6 @@ dependencies = [ "sc-network", ] -[[package]] -name = "polkadot-node-subsystem-util" -version = "0.1.0" -dependencies = [ - "derive_more 0.99.9", - "futures 0.3.5", - "polkadot-node-subsystem", - "polkadot-primitives", -] - [[package]] name = "polkadot-overseer" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index c02485c0ebc1..b6d1aa53eaa2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,7 +52,6 @@ members = [ "node/service", "node/core/backing", "node/subsystem", - "node/subsystem-util", "node/test-helpers/subsystem", "node/test-service", diff --git a/node/subsystem-util/Cargo.toml b/node/subsystem-util/Cargo.toml deleted file mode 100644 index d3a6465999e6..000000000000 --- a/node/subsystem-util/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "polkadot-node-subsystem-util" -version = "0.1.0" -authors = ["Parity Technologies "] -edition = "2018" -description = "Basic utilities for subsystems" - -[dependencies] -derive_more = "0.99.9" -futures = "0.3.5" -polkadot-primitives = { path = "../../primitives" } -polkadot-node-subsystem = { path = "../subsystem" } diff --git a/node/subsystem-util/README.md b/node/subsystem-util/README.md deleted file mode 100644 index a09351d16606..000000000000 --- a/node/subsystem-util/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# `subsystem-util` - -Utilities for subsystems. Lots of subsystems have common interests such as canceling a bunch of spawned jobs, or determining what their validator ID is. Those common interests are factored into this crate. diff --git a/node/subsystem/Cargo.toml b/node/subsystem/Cargo.toml index 43712319cb71..d63ce34ec1a7 100644 --- a/node/subsystem/Cargo.toml +++ b/node/subsystem/Cargo.toml @@ -6,9 +6,10 @@ edition = "2018" description = "Subsystem traits and message definitions" [dependencies] +async-trait = "0.1" +derive_more = "0.99.9" +futures = "0.3.5" +polkadot-node-primitives = { path = "../primitives" } polkadot-primitives = { path = "../../primitives" } polkadot-statement-table = { path = "../../statement-table" } -polkadot-node-primitives = { path = "../primitives" } sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" } -futures = "0.3.5" -async-trait = "0.1" diff --git a/node/subsystem/src/lib.rs b/node/subsystem/src/lib.rs index e374eb9cfcdd..c318d5440992 100644 --- a/node/subsystem/src/lib.rs +++ b/node/subsystem/src/lib.rs @@ -32,6 +32,7 @@ use async_trait::async_trait; use crate::messages::AllMessages; pub mod messages; +pub mod util; /// Signals sent by an overseer to a subsystem. #[derive(PartialEq, Clone, Debug)] diff --git a/node/subsystem-util/src/lib.rs b/node/subsystem/src/util.rs similarity index 72% rename from node/subsystem-util/src/lib.rs rename to node/subsystem/src/util.rs index 485720859e6a..b60e49dabb2d 100644 --- a/node/subsystem-util/src/lib.rs +++ b/node/subsystem/src/util.rs @@ -1,12 +1,32 @@ +// Copyright 2017-2020 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! Utility module for subsystems +//! +//! Many subsystems have common interests such as canceling a bunch of spawned jobs, +//! or determining what their validator ID is. These common interests are factored into +//! this module. + +use crate::messages::{AllMessages, RuntimeApiMessage, RuntimeApiRequest, SchedulerRoster}; use futures::{ channel::{mpsc, oneshot}, future::AbortHandle, prelude::*, task::SpawnError, }; -use polkadot_node_subsystem::messages::{ - AllMessages, RuntimeApiMessage, RuntimeApiRequest, SchedulerRoster, -}; use polkadot_primitives::{ parachain::{ GlobalValidationSchedule, HeadData, Id as ParaId, LocalValidationData, SigningContext, @@ -85,7 +105,10 @@ pub async fn request_global_validation_schedule( parent: Hash, s: &mut mpsc::Sender, ) -> Result, Error> { - request_from_runtime(parent, s, |tx| RuntimeApiRequest::GlobalValidationSchedule(tx)).await + request_from_runtime(parent, s, |tx| { + RuntimeApiRequest::GlobalValidationSchedule(tx) + }) + .await } /// Request a `LocalValidationData` from `RuntimeApi`. @@ -94,7 +117,10 @@ pub async fn request_local_validation_data( para_id: ParaId, s: &mut mpsc::Sender, ) -> Result>, Error> { - request_from_runtime(parent, s, |tx| RuntimeApiRequest::LocalValidationData(para_id, tx)).await + request_from_runtime(parent, s, |tx| { + RuntimeApiRequest::LocalValidationData(para_id, tx) + }) + .await } /// Request a validator set from the `RuntimeApi`. From 145dd589d3e856b0741aa8f2e4e69798dc3018da Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Wed, 8 Jul 2020 15:38:19 +0200 Subject: [PATCH 04/32] make runtime request sender type generic --- node/subsystem/src/util.rs | 77 ++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index b60e49dabb2d..6a3431fd3329 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -36,6 +36,7 @@ use polkadot_primitives::{ }; use std::{ collections::HashMap, + convert::{TryFrom, TryInto}, ops::{Deref, DerefMut}, }; @@ -77,34 +78,42 @@ pub enum Error { Mpsc(mpsc::SendError), #[from] Spawn(SpawnError), + SenderConversion(String), } /// Request some data from the `RuntimeApi`. -pub async fn request_from_runtime( +pub async fn request_from_runtime( parent: Hash, - sender: &mut mpsc::Sender, + sender: &mut mpsc::Sender, request_builder: RequestBuilder, ) -> Result, Error> where RequestBuilder: FnOnce(oneshot::Sender) -> RuntimeApiRequest, + SenderMessage: TryFrom, + >::Error: std::fmt::Debug, { let (tx, rx) = oneshot::channel(); sender - .send(AllMessages::RuntimeApi(RuntimeApiMessage::Request( - parent, - request_builder(tx), - ))) + .send( + AllMessages::RuntimeApi(RuntimeApiMessage::Request(parent, request_builder(tx))) + .try_into() + .map_err(|err| Error::SenderConversion(format!("{:?}", err)))?, + ) .await?; Ok(rx) } /// Request a `GlobalValidationSchedule` from `RuntimeApi`. -pub async fn request_global_validation_schedule( +pub async fn request_global_validation_schedule( parent: Hash, - s: &mut mpsc::Sender, -) -> Result, Error> { + s: &mut mpsc::Sender, +) -> Result, Error> +where + SenderMessage: TryFrom, + >::Error: std::fmt::Debug, +{ request_from_runtime(parent, s, |tx| { RuntimeApiRequest::GlobalValidationSchedule(tx) }) @@ -112,11 +121,15 @@ pub async fn request_global_validation_schedule( } /// Request a `LocalValidationData` from `RuntimeApi`. -pub async fn request_local_validation_data( +pub async fn request_local_validation_data( parent: Hash, para_id: ParaId, - s: &mut mpsc::Sender, -) -> Result>, Error> { + s: &mut mpsc::Sender, +) -> Result>, Error> +where + SenderMessage: TryFrom, + >::Error: std::fmt::Debug, +{ request_from_runtime(parent, s, |tx| { RuntimeApiRequest::LocalValidationData(para_id, tx) }) @@ -124,34 +137,50 @@ pub async fn request_local_validation_data( } /// Request a validator set from the `RuntimeApi`. -pub async fn request_validators( +pub async fn request_validators( parent: Hash, - s: &mut mpsc::Sender, -) -> Result>, Error> { + s: &mut mpsc::Sender, +) -> Result>, Error> +where + SenderMessage: TryFrom, + >::Error: std::fmt::Debug, +{ request_from_runtime(parent, s, |tx| RuntimeApiRequest::Validators(tx)).await } /// Request the scheduler roster from `RuntimeApi`. -pub async fn request_validator_groups( +pub async fn request_validator_groups( parent: Hash, - s: &mut mpsc::Sender, -) -> Result, Error> { + s: &mut mpsc::Sender, +) -> Result, Error> +where + SenderMessage: TryFrom, + >::Error: std::fmt::Debug, +{ request_from_runtime(parent, s, |tx| RuntimeApiRequest::ValidatorGroups(tx)).await } /// Request a `SigningContext` from the `RuntimeApi`. -pub async fn request_signing_context( +pub async fn request_signing_context( parent: Hash, - s: &mut mpsc::Sender, -) -> Result, Error> { + s: &mut mpsc::Sender, +) -> Result, Error> +where + SenderMessage: TryFrom, + >::Error: std::fmt::Debug, +{ request_from_runtime(parent, s, |tx| RuntimeApiRequest::SigningContext(tx)).await } /// Request `HeadData` for some `ParaId` from `RuntimeApi`. -pub async fn request_head_data( +pub async fn request_head_data( parent: Hash, - s: &mut mpsc::Sender, + s: &mut mpsc::Sender, id: ParaId, -) -> Result, Error> { +) -> Result, Error> +where + SenderMessage: TryFrom, + >::Error: std::fmt::Debug, +{ request_from_runtime(parent, s, |tx| RuntimeApiRequest::HeadData(id, tx)).await } From e76239d76d3b757935f55914bd28202e8e5bbb40 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Wed, 8 Jul 2020 15:56:43 +0200 Subject: [PATCH 05/32] candidate backing subsystem uses util for api requests --- node/core/backing/src/lib.rs | 78 +++++++++++++----------------------- 1 file changed, 28 insertions(+), 50 deletions(-) diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index 281147847e19..b739a7b9ef37 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -49,6 +49,15 @@ use polkadot_node_primitives::{ }; use polkadot_subsystem::{ FromOverseer, OverseerSignal, Subsystem, SubsystemContext, SpawnedSubsystem, + util::{ + self, + request_global_validation_schedule, + request_head_data, + request_local_validation_data, + request_signing_context, + request_validator_groups, + request_validators, + }, }; use polkadot_subsystem::messages::{ AllMessages, CandidateBackingMessage, CandidateSelectionMessage, SchedulerRoster, @@ -82,6 +91,8 @@ enum Error { Mpsc(mpsc::SendError), #[from] Spawn(SpawnError), + #[from] + UtilError(util::Error), } /// Holds all data needed for candidate backing job operation. @@ -92,7 +103,6 @@ struct CandidateBackingJob { rx_to: mpsc::Receiver, /// Outbound message channel sending part. tx_from: mpsc::Sender, - /// The `ParaId`s assigned to this validator. assignment: ParaId, /// We issued `Valid` or `Invalid` statements on about these candidates. @@ -193,6 +203,23 @@ impl From for AllMessages { } } +impl TryFrom for FromJob { + type Error = &'static str; + + fn try_from(f: AllMessages) -> Result { + match f { + AllMessages::AvailabilityStore(msg) => Ok(FromJob::AvailabilityStore(msg)), + AllMessages::RuntimeApi(msg) => Ok(FromJob::RuntimeApiMessage(msg)), + AllMessages::CandidateValidation(msg) => Ok(FromJob::CandidateValidation(msg)), + AllMessages::CandidateSelection(msg) => Ok(FromJob::CandidateSelection(msg)), + AllMessages::StatementDistribution(msg) => Ok(FromJob::StatementDistribution(msg)), + AllMessages::PoVDistribution(msg) => Ok(FromJob::PoVDistribution(msg)), + AllMessages::Provisioner(msg) => Ok(FromJob::Provisioner(msg)), + _ => Err("can't convert this AllMessages variant to FromJob"), + } + } +} + // It looks like it's not possible to do an `impl From` given the current state of // the code. So this does the necessary conversion. fn primitive_statement_to_table(s: &SignedFullStatement) -> TableSignedStatement { @@ -748,54 +775,6 @@ async fn run_job( job.run().await } -/// Request a validator set from the `RuntimeApi`. -async fn request_validators( - parent: Hash, - s: &mut mpsc::Sender, -) -> Result>, Error> { - let (tx, rx) = oneshot::channel(); - - s.send(FromJob::RuntimeApiMessage(RuntimeApiMessage::Request( - parent, - RuntimeApiRequest::Validators(tx), - ) - )).await?; - - Ok(rx) -} - -/// Request the scheduler roster from `RuntimeApi`. -async fn request_validator_groups( - parent: Hash, - s: &mut mpsc::Sender, -) -> Result, Error> { - let (tx, rx) = oneshot::channel(); - - s.send(FromJob::RuntimeApiMessage(RuntimeApiMessage::Request( - parent, - RuntimeApiRequest::ValidatorGroups(tx), - ) - )).await?; - - Ok(rx) -} - -/// Request a `SigningContext` from the `RuntimeApi`. -async fn request_signing_context( - parent: Hash, - s: &mut mpsc::Sender, -) -> Result, Error> { - let (tx, rx) = oneshot::channel(); - - s.send(FromJob::RuntimeApiMessage(RuntimeApiMessage::Request( - parent, - RuntimeApiRequest::SigningContext(tx), - ) - )).await?; - - Ok(rx) -} - impl Jobs { fn new(spawner: S) -> Self { Self { @@ -973,7 +952,6 @@ mod tests { use super::*; use futures::{Future, executor::{self, ThreadPool}}; use std::collections::HashMap; - use std::sync::Arc; use sp_keyring::Sr25519Keyring; use polkadot_primitives::v1::{ AssignmentKind, CollatorId, CoreAssignment, BlockData, CoreIndex, GroupIndex, ValidityAttestation, From be2be35a45542424e8867ee757960a65d9b14f0d Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Wed, 8 Jul 2020 17:43:12 +0200 Subject: [PATCH 06/32] add struct Validator representing the local validator This struct can be constructed when the local node is a validator; the constructor fails otherwise. It stores a bit of local data, and provides some utility methods. --- Cargo.lock | 3 ++ node/subsystem/Cargo.toml | 3 ++ node/subsystem/src/util.rs | 93 +++++++++++++++++++++++++++++++++++++- 3 files changed, 97 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b753aae13be9..a98fbc68b3aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4549,10 +4549,13 @@ dependencies = [ "async-trait", "derive_more 0.99.9", "futures 0.3.5", + "parity-scale-codec", "polkadot-node-primitives", "polkadot-primitives", "polkadot-statement-table", + "sc-keystore", "sc-network", + "sp-core", ] [[package]] diff --git a/node/subsystem/Cargo.toml b/node/subsystem/Cargo.toml index d63ce34ec1a7..604d36fdc9b9 100644 --- a/node/subsystem/Cargo.toml +++ b/node/subsystem/Cargo.toml @@ -9,7 +9,10 @@ description = "Subsystem traits and message definitions" async-trait = "0.1" derive_more = "0.99.9" futures = "0.3.5" +keystore = { package = "sc-keystore", git = "https://github.com/paritytech/substrate", branch = "master" } +parity-scale-codec = "1.3.0" polkadot-node-primitives = { path = "../primitives" } polkadot-primitives = { path = "../../primitives" } polkadot-statement-table = { path = "../../statement-table" } sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 6a3431fd3329..a2996085fc94 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -27,13 +27,16 @@ use futures::{ prelude::*, task::SpawnError, }; +use keystore::KeyStorePtr; +use parity_scale_codec::Encode; use polkadot_primitives::{ parachain::{ - GlobalValidationSchedule, HeadData, Id as ParaId, LocalValidationData, SigningContext, - ValidatorId, + EncodeAs, GlobalValidationSchedule, HeadData, Id as ParaId, LocalValidationData, Signed, + SigningContext, ValidatorId, ValidatorIndex, ValidatorPair, }, Hash, }; +use sp_core::Pair; use std::{ collections::HashMap, convert::{TryFrom, TryInto}, @@ -79,6 +82,8 @@ pub enum Error { #[from] Spawn(SpawnError), SenderConversion(String), + /// The local node is not a validator. + NotAValidator, } /// Request some data from the `RuntimeApi`. @@ -184,3 +189,87 @@ where { request_from_runtime(parent, s, |tx| RuntimeApiRequest::HeadData(id, tx)).await } + +/// From the given set of validators, find the first key we can sign with, if any. +pub fn signing_key(validators: &[ValidatorId], keystore: &KeyStorePtr) -> Option { + let keystore = keystore.read(); + validators + .iter() + .find_map(|v| keystore.key_pair::(&v).ok()) +} + +/// Local validator information +/// +/// It can be created if the local node is a validator in the context of a particular +/// relay chain block. +pub struct Validator { + signing_context: SigningContext, + key: ValidatorPair, + index: ValidatorIndex, +} + +impl Validator { + /// Get a struct representing this node's validator if this node is in fact a validator in the context of the given block. + pub async fn new( + parent: Hash, + keystore: KeyStorePtr, + mut sender: mpsc::Sender, + ) -> Result + where + SenderMessage: TryFrom, + >::Error: std::fmt::Debug, + { + // Note: request_validators and request_signing_context do not and cannot run concurrently: they both + // have a mutable handle to the same sender. + // However, each of them returns a oneshot::Receiver, and those are resolved concurrently. + let (validators, signing_context) = futures::try_join!( + request_validators(parent, &mut sender).await?, + request_signing_context(parent, &mut sender).await?, + )?; + + let key = signing_key(&validators[..], &keystore).ok_or(Error::NotAValidator)?; + let index = validators + .iter() + .enumerate() + .find(|(_, k)| k == &&key.public()) + .map(|(idx, _)| idx as ValidatorIndex) + .expect("signing_key would have already returned NotAValidator if the item we're searching for isn't in this list; qed"); + + Ok(Validator { + signing_context, + key, + index, + }) + } + + /// Get this validator's id. + pub fn id(&self) -> ValidatorId { + self.key.public() + } + + /// Get this validator's local index. + pub fn index(&self) -> ValidatorIndex { + self.index + } + + /// Get the current signing context. + pub fn signing_context(&self) -> &SigningContext { + &self.signing_context + } + + /// Sign a payload with this validator + pub fn sign, RealPayload: Encode>( + &self, + payload: Payload, + ) -> Signed { + Signed::sign(payload, &self.signing_context, self.index, &self.key) + } + + /// Validate the payload with this validator + pub fn check_payload, RealPayload: Encode>( + &self, + signed: Signed, + ) -> Result<(), ()> { + signed.check_signature(&self.signing_context, &self.id()) + } +} From 32e5c184530c4b9782f0fe11651067fad0efcda2 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Thu, 9 Jul 2020 10:34:49 +0200 Subject: [PATCH 07/32] add alternate constructor for better efficiency --- node/subsystem/src/util.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index a2996085fc94..527c6e7639ab 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -227,7 +227,18 @@ impl Validator { request_signing_context(parent, &mut sender).await?, )?; - let key = signing_key(&validators[..], &keystore).ok_or(Error::NotAValidator)?; + Self::construct(&validators, signing_context, keystore) + } + + /// Construct a validator instance without performing runtime fetches. + /// + /// This can be useful if external code also needs the same data. + pub fn construct( + validators: &[ValidatorId], + signing_context: SigningContext, + keystore: KeyStorePtr, + ) -> Result { + let key = signing_key(validators, &keystore).ok_or(Error::NotAValidator)?; let index = validators .iter() .enumerate() From 5d8a8af59c2f3db05ceb79c0c8ce8185410c10d8 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Thu, 9 Jul 2020 10:54:51 +0200 Subject: [PATCH 08/32] refactor candidate backing to use utility methods --- node/core/backing/src/lib.rs | 65 +++++++----------------------------- 1 file changed, 12 insertions(+), 53 deletions(-) diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index b739a7b9ef37..c1b16518e443 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -35,7 +35,6 @@ use futures::{ use futures_timer::Delay; use streamunordered::{StreamUnordered, StreamYield}; -use primitives::Pair; use keystore::KeyStorePtr; use polkadot_primitives::v1::{ CommittedCandidateReceipt, BackedCandidate, Id as ParaId, ValidatorPair, ValidatorId, @@ -45,7 +44,6 @@ use polkadot_primitives::v1::{ }; use polkadot_node_primitives::{ FromTableMisbehavior, Statement, SignedFullStatement, MisbehaviorReport, ValidationResult, - ValidationOutputs, }; use polkadot_subsystem::{ FromOverseer, OverseerSignal, Subsystem, SubsystemContext, SpawnedSubsystem, @@ -57,6 +55,7 @@ use polkadot_subsystem::{ request_signing_context, request_validator_groups, request_validators, + Validator, }, }; use polkadot_subsystem::messages::{ @@ -77,7 +76,6 @@ use statement_table::{ #[derive(Debug, derive_more::From)] enum Error { - NotInValidatorSet, CandidateNotFound, JobNotFound(Hash), InvalidSignature, @@ -123,7 +121,7 @@ const fn group_quorum(n_validators: usize) -> usize { #[derive(Default)] struct TableContext { signing_context: SigningContext, - key: Option, + validator: Option, groups: HashMap>, validators: Vec, } @@ -152,22 +150,6 @@ impl TableContextTrait for TableContext { } } -impl TableContext { - fn local_id(&self) -> Option { - self.key.as_ref().map(|k| k.public()) - } - - fn local_index(&self) -> Option { - self.local_id().and_then(|id| - self.validators - .iter() - .enumerate() - .find(|(_, k)| k == &&id) - .map(|(i, _)| i as ValidatorIndex) - ) - } -} - const CHANNEL_CAPACITY: usize = 64; /// A message type that is sent from `CandidateBackingSubsystem` to `CandidateBackingJob`. @@ -236,16 +218,6 @@ fn primitive_statement_to_table(s: &SignedFullStatement) -> TableSignedStatement } } -// finds the first key we are capable of signing with out of the given set of validators, -// if any. -fn signing_key(validators: &[ValidatorId], keystore: &KeyStorePtr) -> Option { - let keystore = keystore.read(); - validators.iter() - .find_map(|v| { - keystore.key_pair::(&v).ok() - }) -} - impl CandidateBackingJob { /// Run asynchronously. async fn run(mut self) -> Result<(), Error> { @@ -540,18 +512,7 @@ impl CandidateBackingJob { } fn sign_statement(&self, statement: Statement) -> Option { - let local_index = self.table_context.local_index()?; - - let signing_key = self.table_context.key.as_ref()?; - - let signed_statement = SignedFullStatement::sign( - statement, - &self.table_context.signing_context, - local_index, - signing_key, - ); - - Some(signed_statement) + Some(self.table_context.validator.as_ref()?.sign(statement)) } fn check_statement_signature(&self, statement: &SignedFullStatement) -> Result<(), Error> { @@ -697,11 +658,10 @@ impl JobHandle { let stop_timer = Delay::new(Duration::from_secs(1)); match future::select(stop_timer, self.finished).await { - Either::Left((_, _)) => { - }, + Either::Left((_, _)) => {} Either::Right((_, _)) => { self.abort_handle.abort(); - }, + } } } @@ -722,12 +682,14 @@ async fn run_job( rx_to: mpsc::Receiver, mut tx_from: mpsc::Sender, ) -> Result<(), Error> { - let (validators, roster) = futures::try_join!( + let (validators, roster, signing_context) = futures::try_join!( request_validators(parent, &mut tx_from).await?, request_validator_groups(parent, &mut tx_from).await?, + request_signing_context(parent, &mut tx_from).await?, )?; - let key = signing_key(&validators[..], &keystore).ok_or(Error::NotInValidatorSet)?; + let validator = Validator::construct(&validators, signing_context, keystore.clone())?; + let mut groups = HashMap::new(); for assignment in roster.scheduled { @@ -741,7 +703,7 @@ async fn run_job( let mut assignment = Default::default(); - if let Some(idx) = validators.iter().position(|k| *k == key.public()) { + if let Some(idx) = validators.iter().position(|k| *k == validator.id()) { let idx = idx as u32; for (para_id, group) in groups.iter() { if group.contains(&idx) { @@ -751,13 +713,11 @@ async fn run_job( } } - let signing_context = request_signing_context(parent, &mut tx_from).await?.await?; - let table_context = TableContext { - signing_context, - key: Some(key), groups, validators, + signing_context: validator.signing_context(), + validator: Some(validator), }; let job = CandidateBackingJob { @@ -1523,7 +1483,6 @@ mod tests { ).unwrap(); } ); - }); } From 447ab4a8c506dead1bd4590bfff413910ef04a2b Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Thu, 9 Jul 2020 12:23:47 +0200 Subject: [PATCH 09/32] fix test breakage caused by reordering tests --- node/core/backing/src/lib.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index c1b16518e443..66458084bb23 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -49,9 +49,7 @@ use polkadot_subsystem::{ FromOverseer, OverseerSignal, Subsystem, SubsystemContext, SpawnedSubsystem, util::{ self, - request_global_validation_schedule, request_head_data, - request_local_validation_data, request_signing_context, request_validator_groups, request_validators, @@ -918,6 +916,17 @@ mod tests { CandidateCommitments, LocalValidationData, GlobalValidationSchedule, HeadData, }; use assert_matches::assert_matches; + use futures::{ + executor::{self, ThreadPool}, + Future, + }; + use polkadot_primitives::parachain::{ + AssignmentKind, BlockData, CollatorId, CoreAssignment, CoreIndex, GroupIndex, + ValidatorPair, ValidityAttestation, + }; + use polkadot_subsystem::messages::{RuntimeApiRequest, SchedulerRoster}; + use sp_keyring::Sr25519Keyring; + use std::collections::HashMap; fn validator_pubkeys(val_ids: &[Sr25519Keyring]) -> Vec { val_ids.iter().map(|v| v.public().into()).collect() @@ -1540,6 +1549,7 @@ mod tests { AllMessages::CandidateValidation( CandidateValidationMessage::ValidateFromChainState( c, + head_data, pov, tx, ) @@ -1570,6 +1580,7 @@ mod tests { AllMessages::CandidateValidation( CandidateValidationMessage::ValidateFromChainState( c, + head_data, pov, tx, ) From 1d59e66df66c623e23fc97cb127d64495cbfaf5c Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Thu, 9 Jul 2020 12:27:13 +0200 Subject: [PATCH 10/32] restore test which accidentally got deleted during merge From fa46a8a338448a6c554655721b40ea4b1ecd3016 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Thu, 9 Jul 2020 17:41:37 +0200 Subject: [PATCH 11/32] start extracting jobs management into helper traits + structs --- Cargo.lock | 4 +- node/subsystem/Cargo.toml | 3 + node/subsystem/src/util.rs | 214 +++++++++++++++++++++++++++++++------ 3 files changed, 189 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a98fbc68b3aa..a8320d3bd30d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4488,7 +4488,6 @@ dependencies = [ "bitvec", "derive_more 0.99.9", "futures 0.3.5", - "futures-timer 3.0.2", "log 0.4.8", "polkadot-erasure-coding", "polkadot-node-primitives", @@ -4549,6 +4548,8 @@ dependencies = [ "async-trait", "derive_more 0.99.9", "futures 0.3.5", + "futures-timer 3.0.2", + "log 0.4.8", "parity-scale-codec", "polkadot-node-primitives", "polkadot-primitives", @@ -4556,6 +4557,7 @@ dependencies = [ "sc-keystore", "sc-network", "sp-core", + "streamunordered", ] [[package]] diff --git a/node/subsystem/Cargo.toml b/node/subsystem/Cargo.toml index 604d36fdc9b9..fe743a9add41 100644 --- a/node/subsystem/Cargo.toml +++ b/node/subsystem/Cargo.toml @@ -9,10 +9,13 @@ description = "Subsystem traits and message definitions" async-trait = "0.1" derive_more = "0.99.9" futures = "0.3.5" +futures-timer = "3.0.2" keystore = { package = "sc-keystore", git = "https://github.com/paritytech/substrate", branch = "master" } +log = "0.4.8" parity-scale-codec = "1.3.0" polkadot-node-primitives = { path = "../primitives" } polkadot-primitives = { path = "../../primitives" } polkadot-statement-table = { path = "../../statement-table" } sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } +streamunordered = "0.5.1" diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 527c6e7639ab..1c92bab96d3f 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -23,10 +23,11 @@ use crate::messages::{AllMessages, RuntimeApiMessage, RuntimeApiRequest, SchedulerRoster}; use futures::{ channel::{mpsc, oneshot}, - future::AbortHandle, + future::{AbortHandle, Either}, prelude::*, - task::SpawnError, + task::{Spawn, SpawnError, SpawnExt}, }; +use futures_timer::Delay; use keystore::KeyStorePtr; use parity_scale_codec::Encode; use polkadot_primitives::{ @@ -41,37 +42,15 @@ use std::{ collections::HashMap, convert::{TryFrom, TryInto}, ops::{Deref, DerefMut}, + pin::Pin, + time::Duration, }; +use streamunordered::{StreamUnordered, StreamYield}; -/// JobCanceler aborts all contained abort handles on drop -#[derive(Debug, Default)] -pub struct JobCanceler(HashMap); - -impl Drop for JobCanceler { - fn drop(&mut self) { - for abort_handle in self.0.values() { - abort_handle.abort(); - } - } -} - -// JobCanceler is a smart pointer wrapping the contained hashmap; -// it only cares about the wrapped data insofar as it's necessary -// to implement proper Drop behavior. Therefore, it's appropriate -// to impl Deref and DerefMut. -impl Deref for JobCanceler { - type Target = HashMap; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl DerefMut for JobCanceler { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} +/// Duration a job will wait after sending a stop signal before hard-aborting. +pub const JOB_GRACEFUL_STOP_DURATION: Duration = Duration::from_secs(1); +/// Capacity of channels to and from individual jobs +pub const JOB_CHANNEL_CAPACITY: usize = 64; #[derive(Debug, derive_more::From)] pub enum Error { @@ -84,6 +63,8 @@ pub enum Error { SenderConversion(String), /// The local node is not a validator. NotAValidator, + /// The desired job is not present in the jobs list. + JobNotFound(Hash), } /// Request some data from the `RuntimeApi`. @@ -284,3 +265,174 @@ impl Validator { signed.check_signature(&self.signing_context, &self.id()) } } + +/// ToJob is expected to be an enum declaring messages which can be sent to a particular job. +/// +/// Normally, this will be some subset of `Allmessages`, and a `Stop` variant. +pub trait ToJobTrait: TryFrom { + /// Produce the `Stop` variant of the ToJob enum. + fn stop() -> Self; +} + +/// A JobHandle manages a particular job for a subsystem. +pub struct JobHandle { + abort_handle: future::AbortHandle, + to_job: mpsc::Sender, + finished: oneshot::Receiver<()>, + outgoing_msgs_handle: usize, +} + +impl JobHandle { + /// Send a message to the job. + pub async fn send_msg(&mut self, msg: ToJob) -> Result<(), Error> { + self.to_job.send(msg).await.map_err(Into::into) + } + + /// Abort the job without waiting for a graceful shutdown + pub fn abort(self) { + self.abort_handle.abort(); + } +} + +impl JobHandle { + /// Stop this job gracefully. + /// + /// If it hasn't shut itself down after `JOB_GRACEFUL_STOP_DURATION`, abort it. + pub async fn stop(mut self) { + // we don't actually care if the message couldn't be sent + let _ = self.to_job.send(ToJob::stop()).await; + let stop_timer = Delay::new(JOB_GRACEFUL_STOP_DURATION); + + match future::select(stop_timer, self.finished).await { + Either::Left((_, _)) => {} + Either::Right((_, _)) => { + self.abort_handle.abort(); + } + } + } +} + +/// This trait governs jobs. +/// +/// Jobs are instantiated and killed automatically on appropriate overseer messages. +/// Other messages are passed along to and from the job via the overseer to other +/// subsystems. +pub trait JobTrait { + /// Message type to the job. Typically a subset of AllMessages. + type ToJob: 'static + ToJobTrait + Send; + /// Message type from the job. Typically a subset of AllMessages. + type FromJob: 'static + Into + Send; + /// Job runtime error. + type Error: std::error::Error; + /// Extra arguments this job needs to run properly. + /// + /// If no extra information is needed, it is perfectly acceptable to set it to `()`. + type RunArgs: 'static + Send; + + /// Run a job for the parent block indicated + fn run( + parent: Hash, + run_args: Self::RunArgs, + rx_to: mpsc::Receiver, + tx_from: mpsc::Sender, + ) -> Pin> + Send>>; + + /// Name of the job, i.e. `CandidateBackingJob` + fn name() -> &'static str; +} + +pub struct Jobs { + spawner: Spawner, + running: HashMap>, + outgoing_msgs: StreamUnordered>, + job: std::marker::PhantomData, +} + +impl Jobs { + /// Create a new Jobs manager which handles spawning appropriate jobs. + pub fn new(spawner: Spawner) -> Self { + Self { + spawner, + running: HashMap::new(), + outgoing_msgs: StreamUnordered::new(), + job: std::marker::PhantomData, + } + } + + /// Spawn a new job for this `parent_hash`, with whatever args are appropriate. + fn spawn_job(&mut self, parent_hash: Hash, run_args: Job::RunArgs) -> Result<(), Error> { + let (to_job_tx, to_job_rx) = mpsc::channel(JOB_CHANNEL_CAPACITY); + let (from_job_tx, from_job_rx) = mpsc::channel(JOB_CHANNEL_CAPACITY); + let (finished_tx, finished) = oneshot::channel(); + + let (future, abort_handle) = future::abortable(async move { + if let Err(e) = Job::run(parent_hash, run_args, to_job_rx, from_job_tx).await { + log::error!( + "{}({}) finished with an error {:?}", + Job::name(), + parent_hash, + e, + ); + } + }); + + // discard output + let future = async move { + let _ = future.await; + let _ = finished_tx.send(()); + }; + self.spawner.spawn(future)?; + + // this handle lets us remove the appropriate receiver from self.outgoing_msgs + // when it's time to stop the job. + let outgoing_msgs_handle = self.outgoing_msgs.push(from_job_rx); + + let handle = JobHandle { + abort_handle, + to_job: to_job_tx, + finished, + outgoing_msgs_handle, + }; + + self.running.insert(parent_hash, handle); + + Ok(()) + } + + /// Stop the job associated with this `parent_hash`. + pub async fn stop_job(&mut self, parent_hash: Hash) -> Result<(), Error> { + match self.running.remove(&parent_hash) { + Some(handle) => { + Pin::new(&mut self.outgoing_msgs).remove(handle.outgoing_msgs_handle); + handle.stop().await; + Ok(()) + } + None => Err(Error::JobNotFound(parent_hash)) + } + } + + /// Send a message to the appropriate job for this `parent_hash`. + async fn send_msg(&mut self, parent_hash: Hash, msg: Job::ToJob) -> Result<(), Error> { + match self.running.get_mut(&parent_hash) { + Some(job) => job.send_msg(msg).await?, + None => return Err(Error::JobNotFound(parent_hash)), + } + Ok(()) + } + + /// Get the next message from any of the underlying jobs. + async fn next(&mut self) -> Option { + self.outgoing_msgs.next().await.and_then(|(e, _)| match e { + StreamYield::Item(e) => Some(e), + _ => None, + }) + } +} + +impl Drop for Jobs { + fn drop(&mut self) { + for job_handle in self.running.values() { + job_handle.abort_handle.abort(); + } + } +} From 36b336731c438ca4547208167ad0a67b7e8e9056 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Fri, 10 Jul 2020 09:02:23 +0200 Subject: [PATCH 12/32] use util::{JobHandle, Jobs} in CandidateBackingSubsystem --- node/core/backing/Cargo.toml | 2 - node/core/backing/src/lib.rs | 232 +++++++++++++---------------------- node/subsystem/src/util.rs | 18 +-- 3 files changed, 94 insertions(+), 158 deletions(-) diff --git a/node/core/backing/Cargo.toml b/node/core/backing/Cargo.toml index b25055293add..58f58cfe1ee8 100644 --- a/node/core/backing/Cargo.toml +++ b/node/core/backing/Cargo.toml @@ -12,13 +12,11 @@ sc-client-api = { git = "https://github.com/paritytech/substrate", branch = "mas sp-blockchain = { git = "https://github.com/paritytech/substrate", branch = "master" } keystore = { package = "sc-keystore", git = "https://github.com/paritytech/substrate", branch = "master" } primitives = { package = "sp-core", git = "https://github.com/paritytech/substrate", branch = "master" } - polkadot-primitives = { path = "../../../primitives" } polkadot-node-primitives = { path = "../../primitives" } polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../../subsystem" } erasure-coding = { package = "polkadot-erasure-coding", path = "../../../erasure-coding" } statement-table = { package = "polkadot-statement-table", path = "../../../statement-table" } -futures-timer = "3.0.2" streamunordered = "0.5.1" derive_more = "0.99.9" bitvec = { version = "0.17.4", default-features = false, features = ["alloc"] } diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index 66458084bb23..d4f213f76b9e 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -22,14 +22,13 @@ use std::collections::{HashMap, HashSet}; use std::convert::TryFrom; use std::pin::Pin; use std::sync::Arc; -use std::time::Duration; use bitvec::vec::BitVec; use log; use futures::{ - select, FutureExt, SinkExt, StreamExt, + select, Future, FutureExt, SinkExt, StreamExt, channel::{oneshot, mpsc}, - future::{self, Either}, + future, task::{Spawn, SpawnError, SpawnExt}, }; use futures_timer::Delay; @@ -99,6 +98,9 @@ struct CandidateBackingJob { rx_to: mpsc::Receiver, /// Outbound message channel sending part. tx_from: mpsc::Sender, + + /// `HeadData`s of the parachains that this validator is assigned to. + head_data: HeadData, /// The `ParaId`s assigned to this validator. assignment: ParaId, /// We issued `Valid` or `Invalid` statements on about these candidates. @@ -158,6 +160,21 @@ enum ToJob { Stop, } +impl TryFrom for ToJob { + type Error = (); + + fn try_from(msg: AllMessages) -> Result { + match msg { + AllMessages::CandidateBacking(msg) => Ok(ToJob::CandidateBacking(msg)), + _ => Err(()), + } + } +} + +impl util::ToJobTrait for ToJob { + const STOP: Self = ToJob::Stop; +} + /// A message type that is sent from `CandidateBackingJob` to `CandidateBackingSubsystem`. enum FromJob { AvailabilityStore(AvailabilityStoreMessage), @@ -643,167 +660,86 @@ impl CandidateBackingJob { } } -struct JobHandle { - abort_handle: future::AbortHandle, - to_job: mpsc::Sender, - finished: oneshot::Receiver<()>, - su_handle: usize, -} +type JobHandle = util::JobHandle; -impl JobHandle { - async fn stop(mut self) { - let _ = self.to_job.send(ToJob::Stop).await; - let stop_timer = Delay::new(Duration::from_secs(1)); +struct Job; - match future::select(stop_timer, self.finished).await { - Either::Left((_, _)) => {} - Either::Right((_, _)) => { - self.abort_handle.abort(); - } - } - } +impl util::JobTrait for Job { + type ToJob = ToJob; + type FromJob = FromJob; + type Error = Error; + type RunArgs = KeyStorePtr; - async fn send_msg(&mut self, msg: ToJob) -> Result<(), Error> { - Ok(self.to_job.send(msg).await?) - } -} + const NAME: &'static str = "CandidateBackingJob"; -struct Jobs { - spawner: S, - running: HashMap, - outgoing_msgs: StreamUnordered>, -} - -async fn run_job( - parent: Hash, - keystore: KeyStorePtr, - rx_to: mpsc::Receiver, - mut tx_from: mpsc::Sender, -) -> Result<(), Error> { - let (validators, roster, signing_context) = futures::try_join!( - request_validators(parent, &mut tx_from).await?, - request_validator_groups(parent, &mut tx_from).await?, - request_signing_context(parent, &mut tx_from).await?, - )?; - - let validator = Validator::construct(&validators, signing_context, keystore.clone())?; - - let mut groups = HashMap::new(); - - for assignment in roster.scheduled { - if let Some(g) = roster.validator_groups.get(assignment.group_idx.0 as usize) { - groups.insert( - assignment.para_id, - g.clone(), - ); - } - } - - let mut assignment = Default::default(); - - if let Some(idx) = validators.iter().position(|k| *k == validator.id()) { - let idx = idx as u32; - for (para_id, group) in groups.iter() { - if group.contains(&idx) { - assignment = *para_id; - break; + fn run( + parent: Hash, + keystore: KeyStorePtr, + rx_to: mpsc::Receiver, + tx_from: mpsc::Sender, + ) -> Pin> + Send>> { + async move { + let (validators, roster, signing_context) = futures::try_join!( + request_validators(parent, &mut tx_from).await?, + request_validator_groups(parent, &mut tx_from).await?, + request_signing_context(parent, &mut tx_from).await?, + )?; + + let validator = Validator::construct(&validators, signing_context, keystore.clone())?; + + let mut groups = HashMap::new(); + + for assignment in roster.scheduled { + if let Some(g) = roster.validator_groups.get(assignment.group_idx.0 as usize) { + groups.insert( + assignment.para_id, + g.clone(), + ); + } } - } - } - let table_context = TableContext { - groups, - validators, - signing_context: validator.signing_context(), - validator: Some(validator), - }; + let mut assignment = Default::default(); - let job = CandidateBackingJob { - parent, - rx_to, - tx_from, - assignment, - issued_statements: HashSet::new(), - seconded: None, - reported_misbehavior_for: HashSet::new(), - table: Table::default(), - table_context, - }; - - job.run().await -} - -impl Jobs { - fn new(spawner: S) -> Self { - Self { - spawner, - running: HashMap::default(), - outgoing_msgs: StreamUnordered::new(), - } - } - - fn spawn_job(&mut self, parent_hash: Hash, keystore: KeyStorePtr) -> Result<(), Error> { - let (to_job_tx, to_job_rx) = mpsc::channel(CHANNEL_CAPACITY); - let (from_job_tx, from_job_rx) = mpsc::channel(CHANNEL_CAPACITY); - - let (future, abort_handle) = future::abortable(async move { - if let Err(e) = run_job(parent_hash, keystore, to_job_rx, from_job_tx).await { - log::error!( - "CandidateBackingJob({}) finished with an error {:?}", - parent_hash, - e, - ); + if let Some(idx) = validators.iter().position(|k| *k == validator.id()) { + let idx = idx as u32; + for (para_id, group) in groups.iter() { + if group.contains(&idx) { + assignment = *para_id; + break; + } + } } }); - let (finished_tx, finished) = oneshot::channel(); - - let future = async move { - let _ = future.await; - let _ = finished_tx.send(()); - }; - self.spawner.spawn(future)?; - - let su_handle = self.outgoing_msgs.push(from_job_rx); - - let handle = JobHandle { - abort_handle, - to_job: to_job_tx, - finished, - su_handle, - }; - - self.running.insert(parent_hash, handle); - - Ok(()) - } + let head_data = request_head_data(parent, &mut tx_from, assignment).await?.await?; - async fn stop_job(&mut self, parent_hash: Hash) -> Result<(), Error> { - match self.running.remove(&parent_hash) { - Some(handle) => { - Pin::new(&mut self.outgoing_msgs).remove(handle.su_handle); - handle.stop().await; - Ok(()) - } - None => Err(Error::JobNotFound(parent_hash)) - } - } + let table_context = TableContext { + groups, + validators, + signing_context: validator.signing_context().clone(), + validator: Some(validator), + }; - async fn send_msg(&mut self, parent_hash: Hash, msg: ToJob) -> Result<(), Error> { - if let Some(job) = self.running.get_mut(&parent_hash) { - job.send_msg(msg).await?; - } - Ok(()) - } + let job = CandidateBackingJob { + parent, + rx_to, + tx_from, + head_data, + assignment, + issued_validity: HashSet::new(), + seconded: None, + reported_misbehavior_for: HashSet::new(), + table: Table::default(), + table_context, + }; - async fn next(&mut self) -> Option { - self.outgoing_msgs.next().await.and_then(|(e, _)| match e { - StreamYield::Item(e) => Some(e), - _ => None, - }) + job.run().await + }.boxed() } } +type Jobs = util::Jobs; + /// An implementation of the Candidate Backing subsystem. pub struct CandidateBackingSubsystem { spawner: S, diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 1c92bab96d3f..8fc25eff7697 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -270,8 +270,8 @@ impl Validator { /// /// Normally, this will be some subset of `Allmessages`, and a `Stop` variant. pub trait ToJobTrait: TryFrom { - /// Produce the `Stop` variant of the ToJob enum. - fn stop() -> Self; + /// The `Stop` variant of the ToJob enum. + const STOP: Self; } /// A JobHandle manages a particular job for a subsystem. @@ -300,7 +300,7 @@ impl JobHandle { /// If it hasn't shut itself down after `JOB_GRACEFUL_STOP_DURATION`, abort it. pub async fn stop(mut self) { // we don't actually care if the message couldn't be sent - let _ = self.to_job.send(ToJob::stop()).await; + let _ = self.to_job.send(ToJob::STOP).await; let stop_timer = Delay::new(JOB_GRACEFUL_STOP_DURATION); match future::select(stop_timer, self.finished).await { @@ -323,12 +323,15 @@ pub trait JobTrait { /// Message type from the job. Typically a subset of AllMessages. type FromJob: 'static + Into + Send; /// Job runtime error. - type Error: std::error::Error; + type Error: std::fmt::Debug; /// Extra arguments this job needs to run properly. /// /// If no extra information is needed, it is perfectly acceptable to set it to `()`. type RunArgs: 'static + Send; + /// Name of the job, i.e. `CandidateBackingJob` + const NAME: &'static str; + /// Run a job for the parent block indicated fn run( parent: Hash, @@ -336,9 +339,6 @@ pub trait JobTrait { rx_to: mpsc::Receiver, tx_from: mpsc::Sender, ) -> Pin> + Send>>; - - /// Name of the job, i.e. `CandidateBackingJob` - fn name() -> &'static str; } pub struct Jobs { @@ -369,7 +369,7 @@ impl Jobs { if let Err(e) = Job::run(parent_hash, run_args, to_job_rx, from_job_tx).await { log::error!( "{}({}) finished with an error {:?}", - Job::name(), + Job::NAME, parent_hash, e, ); @@ -429,6 +429,8 @@ impl Jobs { } } +// Note that on drop, we don't have the chance to gracefully spin down each of the remaining handles; +// we just abort them all. Still better than letting them dangle. impl Drop for Jobs { fn drop(&mut self) { for job_handle in self.running.values() { From 91e301b82f12bb6990106daa9b81930e531dbb53 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Fri, 10 Jul 2020 11:53:06 +0200 Subject: [PATCH 13/32] implement generic job-manager subsystem impl This means that the work of implementing a subsystem boils down to implementing the job, and then writing an appropriate type definition, i.e. pub type CandidateBackingSubsystem = util::JobManager; --- Cargo.lock | 2 - node/core/backing/Cargo.toml | 2 - node/core/backing/src/lib.rs | 198 +++++++++-------------------------- node/subsystem/src/util.rs | 149 +++++++++++++++++++++++++- 4 files changed, 193 insertions(+), 158 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a8320d3bd30d..1a2c08f13459 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4488,7 +4488,6 @@ dependencies = [ "bitvec", "derive_more 0.99.9", "futures 0.3.5", - "log 0.4.8", "polkadot-erasure-coding", "polkadot-node-primitives", "polkadot-node-subsystem", @@ -4501,7 +4500,6 @@ dependencies = [ "sp-blockchain", "sp-core", "sp-keyring", - "streamunordered", ] [[package]] diff --git a/node/core/backing/Cargo.toml b/node/core/backing/Cargo.toml index 58f58cfe1ee8..720b8af418d2 100644 --- a/node/core/backing/Cargo.toml +++ b/node/core/backing/Cargo.toml @@ -6,7 +6,6 @@ edition = "2018" [dependencies] futures = "0.3.5" -log = "0.4.8" sp-api = { git = "https://github.com/paritytech/substrate", branch = "master" } sc-client-api = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-blockchain = { git = "https://github.com/paritytech/substrate", branch = "master" } @@ -17,7 +16,6 @@ polkadot-node-primitives = { path = "../../primitives" } polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../../subsystem" } erasure-coding = { package = "polkadot-erasure-coding", path = "../../../erasure-coding" } statement-table = { package = "polkadot-statement-table", path = "../../../statement-table" } -streamunordered = "0.5.1" derive_more = "0.99.9" bitvec = { version = "0.17.4", default-features = false, features = ["alloc"] } diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index d4f213f76b9e..bece0de03bc9 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -16,23 +16,17 @@ //! Implements a `CandidateBackingSubsystem`. -#![recursion_limit="256"] - use std::collections::{HashMap, HashSet}; use std::convert::TryFrom; use std::pin::Pin; use std::sync::Arc; use bitvec::vec::BitVec; -use log; use futures::{ - select, Future, FutureExt, SinkExt, StreamExt, - channel::{oneshot, mpsc}, - future, - task::{Spawn, SpawnError, SpawnExt}, + channel::{mpsc, oneshot}, + task::SpawnError, + Future, FutureExt, SinkExt, StreamExt, }; -use futures_timer::Delay; -use streamunordered::{StreamUnordered, StreamYield}; use keystore::KeyStorePtr; use polkadot_primitives::v1::{ @@ -45,7 +39,11 @@ use polkadot_node_primitives::{ FromTableMisbehavior, Statement, SignedFullStatement, MisbehaviorReport, ValidationResult, }; use polkadot_subsystem::{ - FromOverseer, OverseerSignal, Subsystem, SubsystemContext, SpawnedSubsystem, + messages::{ + AllMessages, AvailabilityStoreMessage, CandidateBackingMessage, CandidateSelectionMessage, + CandidateValidationMessage, NewBackedCandidate, PoVDistributionMessage, ProvisionableData, + ProvisionerMessage, RuntimeApiMessage, StatementDistributionMessage, ValidationFailed, + }, util::{ self, request_head_data, @@ -56,8 +54,8 @@ use polkadot_subsystem::{ }, }; use polkadot_subsystem::messages::{ - AllMessages, CandidateBackingMessage, CandidateSelectionMessage, SchedulerRoster, - RuntimeApiMessage, RuntimeApiRequest, CandidateValidationMessage, ValidationFailed, + AllMessages, CandidateBackingMessage, CandidateSelectionMessage, + RuntimeApiMessage, CandidateValidationMessage, ValidationFailed, StatementDistributionMessage, NewBackedCandidate, ProvisionerMessage, ProvisionableData, PoVDistributionMessage, AvailabilityStoreMessage, }; @@ -72,9 +70,8 @@ use statement_table::{ }; #[derive(Debug, derive_more::From)] -enum Error { +pub enum Error { CandidateNotFound, - JobNotFound(Hash), InvalidSignature, #[from] Erasure(erasure_coding::Error), @@ -91,7 +88,7 @@ enum Error { } /// Holds all data needed for candidate backing job operation. -struct CandidateBackingJob { +pub struct CandidateBackingJob { /// The hash of the relay parent on top of which this job is doing it's work. parent: Hash, /// Inbound message channel receiving part. @@ -150,8 +147,6 @@ impl TableContextTrait for TableContext { } } -const CHANNEL_CAPACITY: usize = 64; - /// A message type that is sent from `CandidateBackingSubsystem` to `CandidateBackingJob`. enum ToJob { /// A `CandidateBackingMessage`. @@ -171,12 +166,28 @@ impl TryFrom for ToJob { } } +impl From for ToJob { + fn from(msg: CandidateBackingMessage) -> Self { + Self::CandidateBacking(msg) + } +} + impl util::ToJobTrait for ToJob { const STOP: Self = ToJob::Stop; + + fn hash(&self) -> Option { + use CandidateBackingMessage::{RegisterBackingWatcher, Second, Statement}; + match self { + Self::CandidateBacking(RegisterBackingWatcher(hash, _)) => Some(*hash), + Self::CandidateBacking(Second(hash, _, _)) => Some(*hash), + Self::CandidateBacking(Statement(hash, _)) => Some(*hash), + _ => None, + } + } } /// A message type that is sent from `CandidateBackingJob` to `CandidateBackingSubsystem`. -enum FromJob { +pub enum FromJob { AvailabilityStore(AvailabilityStoreMessage), RuntimeApiMessage(RuntimeApiMessage), CandidateValidation(CandidateValidationMessage), @@ -235,7 +246,7 @@ fn primitive_statement_to_table(s: &SignedFullStatement) -> TableSignedStatement impl CandidateBackingJob { /// Run asynchronously. - async fn run(mut self) -> Result<(), Error> { + async fn run_loop(mut self) -> Result<(), Error> { while let Some(msg) = self.rx_to.next().await { match msg { ToJob::CandidateBacking(msg) => { @@ -342,9 +353,7 @@ impl CandidateBackingJob { None => continue, }; - let mut validator_indices = BitVec::with_capacity( - group.len() - ); + let mut validator_indices = BitVec::with_capacity(group.len()); validator_indices.resize(group.len(), false); @@ -385,7 +394,7 @@ impl CandidateBackingJob { if let Ok(report) = MisbehaviorReport::try_from(f) { let message = ProvisionerMessage::ProvisionableData( - ProvisionableData::MisbehaviorReport(self.parent, report) + ProvisionableData::MisbehaviorReport(self.parent, report), ); reports.push(message); @@ -660,11 +669,7 @@ impl CandidateBackingJob { } } -type JobHandle = util::JobHandle; - -struct Job; - -impl util::JobTrait for Job { +impl util::JobTrait for CandidateBackingJob { type ToJob = ToJob; type FromJob = FromJob; type Error = Error; @@ -676,8 +681,8 @@ impl util::JobTrait for Job { parent: Hash, keystore: KeyStorePtr, rx_to: mpsc::Receiver, - tx_from: mpsc::Sender, - ) -> Pin> + Send>> { + mut tx_from: mpsc::Sender, + ) -> Pin> + Send>> { async move { let (validators, roster, signing_context) = futures::try_join!( request_validators(parent, &mut tx_from).await?, @@ -691,10 +696,7 @@ impl util::JobTrait for Job { for assignment in roster.scheduled { if let Some(g) = roster.validator_groups.get(assignment.group_idx.0 as usize) { - groups.insert( - assignment.para_id, - g.clone(), - ); + groups.insert(assignment.para_id, g.clone()); } } @@ -709,7 +711,6 @@ impl util::JobTrait for Job { } } } - }); let head_data = request_head_data(parent, &mut tx_from, assignment).await?.await?; @@ -733,134 +734,33 @@ impl util::JobTrait for Job { table_context, }; - job.run().await - }.boxed() - } -} - -type Jobs = util::Jobs; - -/// An implementation of the Candidate Backing subsystem. -pub struct CandidateBackingSubsystem { - spawner: S, - keystore: KeyStorePtr, - _context: std::marker::PhantomData, -} - -impl CandidateBackingSubsystem - where - S: Spawn + Clone, - Context: SubsystemContext, -{ - /// Creates a new `CandidateBackingSubsystem`. - pub fn new(keystore: KeyStorePtr, spawner: S) -> Self { - Self { - spawner, - keystore, - _context: std::marker::PhantomData, - } - } - - async fn run( - mut ctx: Context, - keystore: KeyStorePtr, - spawner: S, - ) { - let mut jobs = Jobs::new(spawner.clone()); - - loop { - select! { - incoming = ctx.recv().fuse() => { - match incoming { - Ok(msg) => match msg { - FromOverseer::Signal(OverseerSignal::StartWork(hash)) => { - if let Err(e) = jobs.spawn_job(hash, keystore.clone()) { - log::error!("Failed to spawn a job: {:?}", e); - break; - } - } - FromOverseer::Signal(OverseerSignal::StopWork(hash)) => { - if let Err(e) = jobs.stop_job(hash).await { - log::error!("Failed to spawn a job: {:?}", e); - break; - } - } - FromOverseer::Communication { msg } => { - match msg { - CandidateBackingMessage::Second(hash, _, _) | - CandidateBackingMessage::Statement(hash, _) | - CandidateBackingMessage::GetBackedCandidates(hash, _) => { - let res = jobs.send_msg( - hash.clone(), - ToJob::CandidateBacking(msg), - ).await; - - if let Err(e) = res { - log::error!( - "Failed to send a message to a job: {:?}", - e, - ); - - break; - } - } - _ => (), - } - } - _ => (), - }, - Err(_) => break, - } - } - outgoing = jobs.next().fuse() => { - match outgoing { - Some(msg) => { - let _ = ctx.send_message(msg.into()).await; - } - None => break, - } - } - complete => break, - } + job.run_loop().await } + .boxed() } } -impl Subsystem for CandidateBackingSubsystem - where - S: Spawn + Send + Clone + 'static, - Context: SubsystemContext, -{ - fn start(self, ctx: Context) -> SpawnedSubsystem { - let keystore = self.keystore.clone(); - let spawner = self.spawner.clone(); - - SpawnedSubsystem(Box::pin(async move { - Self::run(ctx, keystore, spawner).await; - })) - } -} +/// An implementation of the Candidate Backing subsystem. +pub type CandidateBackingSubsystem = + util::JobManager; #[cfg(test)] mod tests { use super::*; - use futures::{Future, executor::{self, ThreadPool}}; - use std::collections::HashMap; - use sp_keyring::Sr25519Keyring; - use polkadot_primitives::v1::{ - AssignmentKind, CollatorId, CoreAssignment, BlockData, CoreIndex, GroupIndex, ValidityAttestation, - CandidateCommitments, LocalValidationData, GlobalValidationSchedule, HeadData, - }; use assert_matches::assert_matches; use futures::{ executor::{self, ThreadPool}, - Future, + future, Future, }; - use polkadot_primitives::parachain::{ - AssignmentKind, BlockData, CollatorId, CoreAssignment, CoreIndex, GroupIndex, + use polkadot_primitives::v1::{ + AssignmentKind, BlockData, CandidateCommitments, CollatorId, CoreAssignment, CoreIndex, + LocalValidationData, GlobalValidationSchedule, GroupIndex, HeadData, ValidatorPair, ValidityAttestation, }; - use polkadot_subsystem::messages::{RuntimeApiRequest, SchedulerRoster}; + use polkadot_subsystem::{ + messages::{RuntimeApiRequest, SchedulerRoster}, + FromOverseer, OverseerSignal, + }; use sp_keyring::Sr25519Keyring; use std::collections::HashMap; diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 8fc25eff7697..a644dccc1cf0 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -20,11 +20,15 @@ //! or determining what their validator ID is. These common interests are factored into //! this module. -use crate::messages::{AllMessages, RuntimeApiMessage, RuntimeApiRequest, SchedulerRoster}; +use crate::{ + messages::{AllMessages, RuntimeApiMessage, RuntimeApiRequest, SchedulerRoster}, + FromOverseer, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemResult, +}; use futures::{ channel::{mpsc, oneshot}, - future::{AbortHandle, Either}, + future::Either, prelude::*, + select, task::{Spawn, SpawnError, SpawnExt}, }; use futures_timer::Delay; @@ -41,7 +45,6 @@ use sp_core::Pair; use std::{ collections::HashMap, convert::{TryFrom, TryInto}, - ops::{Deref, DerefMut}, pin::Pin, time::Duration, }; @@ -272,6 +275,9 @@ impl Validator { pub trait ToJobTrait: TryFrom { /// The `Stop` variant of the ToJob enum. const STOP: Self; + + /// If the message variant contains a hash, return it here + fn hash(&self) -> Option; } /// A JobHandle manages a particular job for a subsystem. @@ -338,7 +344,7 @@ pub trait JobTrait { run_args: Self::RunArgs, rx_to: mpsc::Receiver, tx_from: mpsc::Sender, - ) -> Pin> + Send>>; + ) -> Pin> + Send>>; } pub struct Jobs { @@ -407,7 +413,7 @@ impl Jobs { handle.stop().await; Ok(()) } - None => Err(Error::JobNotFound(parent_hash)) + None => Err(Error::JobNotFound(parent_hash)), } } @@ -438,3 +444,136 @@ impl Drop for Jobs { } } } + +/// A basic implementation of a subsystem. +/// +/// This struct is responsible for handling message traffic between +/// this subsystem and the overseer. It spawns and kills jobs on the +/// appropriate Overseer messages, and dispatches standard traffic to +/// the appropriate job the rest of the time. +pub struct JobManager { + spawner: Spawner, + run_args: Job::RunArgs, + context: std::marker::PhantomData, + job: std::marker::PhantomData, +} + +impl JobManager +where + Spawner: Spawn + Clone + Send, + Context: SubsystemContext, + ::Message: Into, + Job: JobTrait, + Job::RunArgs: Clone, + Job::ToJob: TryFrom + Sync + Debug, +{ + /// Creates a new `Subsystem`. + pub fn new(spawner: Spawner, run_args: Job::RunArgs) -> Self { + Self { + spawner, + run_args, + context: std::marker::PhantomData, + job: std::marker::PhantomData, + } + } + + /// Run this subsystem + /// + /// Conceptually, this is very simple: it just loops forever. + /// + /// - On incoming overseer messages, it starts or stops jobs as appropriate. + /// - On other incoming messages, if they can be converted into Job::ToJob and + /// include a hash, then they're forwarded to the appropriate individual job. + /// - On outgoing messages from the jobs, it forwards them to the overseer. + pub async fn run(mut ctx: Context, run_args: Job::RunArgs, spawner: Spawner) { + let mut jobs = Jobs::new(spawner.clone()); + + loop { + select! { + incoming = ctx.recv().fuse() => if Self::handle_incoming(incoming, &mut jobs, &run_args).await { break }, + outgoing = jobs.next().fuse() => if Self::handle_outgoing(outgoing, &mut ctx).await { break }, + complete => break, + } + } + } + + // handle an incoming message. return true if we should break afterwards. + async fn handle_incoming( + incoming: SubsystemResult>, + jobs: &mut Jobs, + run_args: &Job::RunArgs, + ) -> bool { + use crate::FromOverseer::{Communication, Signal}; + use crate::OverseerSignal::{Conclude, StartWork, StopWork}; + + match incoming { + Ok(Signal(StartWork(hash))) => { + if let Err(e) = jobs.spawn_job(hash, run_args.clone()) { + log::error!("Failed to spawn a job: {:?}", e); + return true; + } + } + Ok(Signal(StopWork(hash))) => { + if let Err(e) = jobs.stop_job(hash).await { + log::error!("Failed to stop a job: {:?}", e); + return true; + } + } + Ok(Signal(Conclude)) => { + // breaking the loop ends fn run, which drops `jobs`, which drops all ongoing work + return true; + } + Ok(Communication { msg }) => { + // I don't know the syntax to add this type hint within an if let statement + let maybe_to_job: Result = msg.try_into(); + if let Ok(to_job) = maybe_to_job { + match to_job.hash() { + Some(hash) => { + if let Err(err) = jobs.send_msg(hash, to_job).await { + log::error!("Failed to send a message to a job: {:?}", err); + return true; + } + } + None => log::warn!("message appropriate to send to job, but no parent hash available to dispatch it: {:?}", to_job), + } + } + } + Err(err) => { + log::error!("error receiving message from subsystem context: {:?}", err); + return true; + } + } + false + } + + // handle an outgoing message. return true if we should break afterwards. + async fn handle_outgoing(outgoing: Option, ctx: &mut Context) -> bool { + match outgoing { + Some(msg) => { + // discard errors when sending the message upstream + let _ = ctx.send_message(msg.into()).await; + } + None => return true, + } + false + } +} + +impl Subsystem for JobManager +where + Spawner: Spawn + Send + Clone + 'static, + Context: SubsystemContext, + ::Message: Into, + Job: JobTrait + Send, + Job::RunArgs: Clone + Sync, + Job::ToJob: TryFrom + Sync, +{ + fn start(self, ctx: Context) -> SpawnedSubsystem { + let spawner = self.spawner.clone(); + let run_args = self.run_args.clone(); + + SpawnedSubsystem(Box::pin(async move { + Self::run(ctx, run_args, spawner).await; + })) + } +} From 3a472363f784c118cfc78de2a562142fde11cbbb Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Fri, 10 Jul 2020 12:33:53 +0200 Subject: [PATCH 14/32] add hash-extraction helper to messages --- node/core/backing/src/lib.rs | 7 +-- node/subsystem/src/messages.rs | 103 +++++++++++++++++++++++++++++++++ node/subsystem/src/util.rs | 27 ++++++++- 3 files changed, 129 insertions(+), 8 deletions(-) diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index bece0de03bc9..fd02db0952ce 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -176,12 +176,9 @@ impl util::ToJobTrait for ToJob { const STOP: Self = ToJob::Stop; fn hash(&self) -> Option { - use CandidateBackingMessage::{RegisterBackingWatcher, Second, Statement}; match self { - Self::CandidateBacking(RegisterBackingWatcher(hash, _)) => Some(*hash), - Self::CandidateBacking(Second(hash, _, _)) => Some(*hash), - Self::CandidateBacking(Statement(hash, _)) => Some(*hash), - _ => None, + Self::CandidateBacking(cb) => cb.hash(), + Self::Stop => None, } } } diff --git a/node/subsystem/src/messages.rs b/node/subsystem/src/messages.rs index 10c861f1410c..bc1991c3fea8 100644 --- a/node/subsystem/src/messages.rs +++ b/node/subsystem/src/messages.rs @@ -51,6 +51,14 @@ pub enum CandidateSelectionMessage { Invalid(Hash, CandidateReceipt), } +impl CandidateSelectionMessage { + pub fn hash(&self) -> Option { + match self { + Self::Invalid(hash, _) => Some(*hash), + } + } +} + /// Messages received by the Candidate Backing subsystem. #[derive(Debug)] pub enum CandidateBackingMessage { @@ -65,6 +73,17 @@ pub enum CandidateBackingMessage { Statement(Hash, SignedFullStatement), } + +impl CandidateBackingMessage { + pub fn hash(&self) -> Option { + match self { + Self::RegisterBackingWatcher(hash, _) => Some(*hash), + Self::Second(hash, _, _) => Some(*hash), + Self::Statement(hash, _) => Some(*hash), + } + } +} + /// Blanket error for validation failing. #[derive(Debug)] pub struct ValidationFailed; @@ -102,6 +121,14 @@ pub enum CandidateValidationMessage { ), } +impl CandidateValidationMessage { + pub fn hash(&self) -> Option { + match self { + Self::Validate(hash, _, _, _, _) => Some(*hash), + } + } +} + /// Events from network. #[derive(Debug, Clone)] pub enum NetworkBridgeEvent { @@ -134,6 +161,16 @@ pub enum NetworkBridgeMessage { SendMessage(Vec, ProtocolId, Vec), } +impl NetworkBridgeMessage { + pub fn hash(&self) -> Option { + match self { + Self::RegisterEventProducer(_, _) => None, + Self::ReportPeer(_, _) => None, + Self::SendMessage(_, _, _) => None, + } + } +} + /// Availability Distribution Message. #[derive(Debug)] pub enum AvailabilityDistributionMessage { @@ -147,6 +184,16 @@ pub enum AvailabilityDistributionMessage { NetworkBridgeUpdate(NetworkBridgeEvent), } +impl AvailabilityDistributionMessage { + pub fn hash(&self) -> Option { + match self { + Self::DistributeChunk(hash, _) => Some(*hash), + Self::FetchChunk(hash, _) => Some(*hash), + Self::NetworkBridgeUpdate(_) => None, + } + } +} + /// Bitfield distribution message. #[derive(Debug)] pub enum BitfieldDistributionMessage { @@ -157,6 +204,15 @@ pub enum BitfieldDistributionMessage { NetworkBridgeUpdate(NetworkBridgeEvent), } +impl BitfieldDistributionMessage { + pub fn hash(&self) -> Option { + match self { + Self::DistributeBitfield(hash, _) => Some(*hash), + Self::NetworkBridgeUpdate(_) => None, + } + } +} + /// Availability store subsystem message. #[derive(Debug)] pub enum AvailabilityStoreMessage { @@ -170,6 +226,16 @@ pub enum AvailabilityStoreMessage { StoreChunk(Hash, ValidatorIndex, ErasureChunk), } +impl AvailabilityStoreMessage { + pub fn hash(&self) -> Option { + match self { + Self::QueryPoV(hash, _) => Some(*hash), + Self::QueryChunk(hash, _, _) => Some(*hash), + Self::StoreChunk(hash, _, _) => Some(*hash), + } + } +} + /// The information on scheduler assignments that some somesystems may be querying. #[derive(Debug, Clone)] pub struct SchedulerRoster { @@ -207,6 +273,14 @@ pub enum RuntimeApiMessage { Request(Hash, RuntimeApiRequest), } +impl RuntimeApiMessage { + pub fn hash(&self) -> Option { + match self { + Self::Request(hash, _) => Some(*hash), + } + } +} + /// Statement distribution message. #[derive(Debug)] pub enum StatementDistributionMessage { @@ -217,6 +291,15 @@ pub enum StatementDistributionMessage { NetworkBridgeUpdate(NetworkBridgeEvent), } +impl StatementDistributionMessage { + pub fn hash(&self) -> Option { + match self { + Self::Share(hash, _) => Some(*hash), + Self::NetworkBridgeUpdate(_) => None, + } + } +} + /// This data becomes intrinsics or extrinsics which should be included in a future relay chain block. #[derive(Debug)] pub enum ProvisionableData { @@ -253,6 +336,16 @@ pub enum ProvisionerMessage { ProvisionableData(ProvisionableData), } +impl ProvisionerMessage { + pub fn hash(&self) -> Option { + match self { + Self::RequestBlockAuthorshipData(hash, _) => Some(*hash), + Self::RequestInherentData(hash, _) => Some(*hash), + Self::ProvisionableData(_) => None, + } + } +} + /// Message to the PoV Distribution Subsystem. #[derive(Debug)] pub enum PoVDistributionMessage { @@ -268,6 +361,16 @@ pub enum PoVDistributionMessage { NetworkBridgeUpdate(NetworkBridgeEvent), } +impl PoVDistributionMessage { + pub fn hash(&self) -> Option { + match self { + Self::FetchPoV(hash, _, _) => Some(*hash), + Self::DistributePoV(hash, _, _) => Some(*hash), + Self::NetworkBridgeUpdate(_) => None, + } + } +} + /// A message type tying together all message types that are used across Subsystems. #[derive(Debug)] pub enum AllMessages { diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index a644dccc1cf0..a6e43ff0aec5 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -345,6 +345,20 @@ pub trait JobTrait { rx_to: mpsc::Receiver, tx_from: mpsc::Sender, ) -> Pin> + Send>>; + + /// Handle a message which can't be dispatched to a particular job + /// + /// By default, this is implemented with a NOP function. However, if + /// ToJob occasionally has messages which do not correspond to a particular + /// parent relay hash, then this function will be spawned as a one-off + /// task to handle those messages. + // TODO: the API here is likely not precisely what we want; figure it out more + // once we're implementing a subsystem which actually needs this feature. + // In particular, we're quite likely to want this to return a future instead of + // interrupting the active thread for the duration of the handler. + fn handle_unhashed_msg(_msg: Self::ToJob) -> Result<(), Self::Error> { + Ok(()) + } } pub struct Jobs { @@ -465,7 +479,7 @@ where ::Message: Into, Job: JobTrait, Job::RunArgs: Clone, - Job::ToJob: TryFrom + Sync + Debug, + Job::ToJob: TryFrom + Sync, { /// Creates a new `Subsystem`. pub fn new(spawner: Spawner, run_args: Job::RunArgs) -> Self { @@ -520,7 +534,9 @@ where } } Ok(Signal(Conclude)) => { - // breaking the loop ends fn run, which drops `jobs`, which drops all ongoing work + // breaking the loop ends fn run, which drops `jobs`, which immediately drops all ongoing work + // + // REVIEW: is this the behavior we want? return true; } Ok(Communication { msg }) => { @@ -534,7 +550,12 @@ where return true; } } - None => log::warn!("message appropriate to send to job, but no parent hash available to dispatch it: {:?}", to_job), + None => { + if let Err(err) = Job::handle_unhashed_msg(to_job) { + log::error!("Failed to handle unhashed message: {:?}", err); + return true; + } + } } } } From 38110d55784f02319348c0d03eeb6a0c222b2c9c Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Fri, 10 Jul 2020 16:17:32 +0200 Subject: [PATCH 15/32] fix errors caused by improper rebase --- node/core/backing/src/lib.rs | 29 +++++------------------- node/subsystem/src/messages.rs | 5 +++-- node/subsystem/src/util.rs | 40 +++------------------------------- 3 files changed, 12 insertions(+), 62 deletions(-) diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index fd02db0952ce..835e1d1d1520 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -30,13 +30,14 @@ use futures::{ use keystore::KeyStorePtr; use polkadot_primitives::v1::{ - CommittedCandidateReceipt, BackedCandidate, Id as ParaId, ValidatorPair, ValidatorId, + CommittedCandidateReceipt, BackedCandidate, Id as ParaId, ValidatorId, ValidatorIndex, SigningContext, PoV, OmittedValidationData, CandidateDescriptor, AvailableData, ErasureChunk, ValidatorSignature, Hash, CandidateReceipt, CandidateCommitments, }; use polkadot_node_primitives::{ - FromTableMisbehavior, Statement, SignedFullStatement, MisbehaviorReport, ValidationResult, + FromTableMisbehavior, Statement, SignedFullStatement, MisbehaviorReport, + ValidationOutputs, ValidationResult, }; use polkadot_subsystem::{ messages::{ @@ -46,19 +47,12 @@ use polkadot_subsystem::{ }, util::{ self, - request_head_data, request_signing_context, request_validator_groups, request_validators, Validator, }, }; -use polkadot_subsystem::messages::{ - AllMessages, CandidateBackingMessage, CandidateSelectionMessage, - RuntimeApiMessage, CandidateValidationMessage, ValidationFailed, - StatementDistributionMessage, NewBackedCandidate, ProvisionerMessage, ProvisionableData, - PoVDistributionMessage, AvailabilityStoreMessage, -}; use statement_table::{ generic::AttestedCandidate as TableAttestedCandidate, Context as TableContextTrait, @@ -95,9 +89,6 @@ pub struct CandidateBackingJob { rx_to: mpsc::Receiver, /// Outbound message channel sending part. tx_from: mpsc::Sender, - - /// `HeadData`s of the parachains that this validator is assigned to. - head_data: HeadData, /// The `ParaId`s assigned to this validator. assignment: ParaId, /// We issued `Valid` or `Invalid` statements on about these candidates. @@ -106,7 +97,6 @@ pub struct CandidateBackingJob { seconded: Option, /// We have already reported misbehaviors for these validators. reported_misbehavior_for: HashSet, - table: Table, table_context: TableContext, } @@ -148,7 +138,7 @@ impl TableContextTrait for TableContext { } /// A message type that is sent from `CandidateBackingSubsystem` to `CandidateBackingJob`. -enum ToJob { +pub enum ToJob { /// A `CandidateBackingMessage`. CandidateBacking(CandidateBackingMessage), /// Stop working. @@ -709,8 +699,6 @@ impl util::JobTrait for CandidateBackingJob { } } - let head_data = request_head_data(parent, &mut tx_from, assignment).await?.await?; - let table_context = TableContext { groups, validators, @@ -722,9 +710,8 @@ impl util::JobTrait for CandidateBackingJob { parent, rx_to, tx_from, - head_data, assignment, - issued_validity: HashSet::new(), + issued_statements: HashSet::new(), seconded: None, reported_misbehavior_for: HashSet::new(), table: Table::default(), @@ -750,7 +737,7 @@ mod tests { future, Future, }; use polkadot_primitives::v1::{ - AssignmentKind, BlockData, CandidateCommitments, CollatorId, CoreAssignment, CoreIndex, + AssignmentKind, BlockData, CandidateCommitments, CollatorId, CoreAssignment, CoreIndex, LocalValidationData, GlobalValidationSchedule, GroupIndex, HeadData, ValidatorPair, ValidityAttestation, }; @@ -1382,7 +1369,6 @@ mod tests { AllMessages::CandidateValidation( CandidateValidationMessage::ValidateFromChainState( c, - head_data, pov, tx, ) @@ -1406,14 +1392,11 @@ mod tests { virtual_overseer.send(FromOverseer::Communication{ msg: second }).await; - let expected_head_data = test_state.head_data.get(&test_state.chain_ids[0]).unwrap(); - assert_matches!( virtual_overseer.recv().await, AllMessages::CandidateValidation( CandidateValidationMessage::ValidateFromChainState( c, - head_data, pov, tx, ) diff --git a/node/subsystem/src/messages.rs b/node/subsystem/src/messages.rs index bc1991c3fea8..9513db11b01d 100644 --- a/node/subsystem/src/messages.rs +++ b/node/subsystem/src/messages.rs @@ -77,7 +77,7 @@ pub enum CandidateBackingMessage { impl CandidateBackingMessage { pub fn hash(&self) -> Option { match self { - Self::RegisterBackingWatcher(hash, _) => Some(*hash), + Self::GetBackedCandidates(hash, _) => Some(*hash), Self::Second(hash, _, _) => Some(*hash), Self::Statement(hash, _) => Some(*hash), } @@ -124,7 +124,8 @@ pub enum CandidateValidationMessage { impl CandidateValidationMessage { pub fn hash(&self) -> Option { match self { - Self::Validate(hash, _, _, _, _) => Some(*hash), + Self::ValidateFromChainState(CandidateDescriptor{ relay_parent, ..}, _, _) => Some(*relay_parent), + Self::ValidateFromExhaustive(_, _, CandidateDescriptor{ relay_parent, ..}, _, _) => Some(*relay_parent), } } } diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index a6e43ff0aec5..861decd6b599 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -34,12 +34,9 @@ use futures::{ use futures_timer::Delay; use keystore::KeyStorePtr; use parity_scale_codec::Encode; -use polkadot_primitives::{ - parachain::{ - EncodeAs, GlobalValidationSchedule, HeadData, Id as ParaId, LocalValidationData, Signed, - SigningContext, ValidatorId, ValidatorIndex, ValidatorPair, - }, - Hash, +use polkadot_primitives::v1::{ + EncodeAs, Hash, HeadData, Id as ParaId, Signed, SigningContext, + ValidatorId, ValidatorIndex, ValidatorPair, }; use sp_core::Pair; use std::{ @@ -94,37 +91,6 @@ where Ok(rx) } -/// Request a `GlobalValidationSchedule` from `RuntimeApi`. -pub async fn request_global_validation_schedule( - parent: Hash, - s: &mut mpsc::Sender, -) -> Result, Error> -where - SenderMessage: TryFrom, - >::Error: std::fmt::Debug, -{ - request_from_runtime(parent, s, |tx| { - RuntimeApiRequest::GlobalValidationSchedule(tx) - }) - .await -} - -/// Request a `LocalValidationData` from `RuntimeApi`. -pub async fn request_local_validation_data( - parent: Hash, - para_id: ParaId, - s: &mut mpsc::Sender, -) -> Result>, Error> -where - SenderMessage: TryFrom, - >::Error: std::fmt::Debug, -{ - request_from_runtime(parent, s, |tx| { - RuntimeApiRequest::LocalValidationData(para_id, tx) - }) - .await -} - /// Request a validator set from the `RuntimeApi`. pub async fn request_validators( parent: Hash, From b5d28ce70d9dd8eb93f248d1e58a78d8bcd7294f Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Fri, 10 Jul 2020 16:28:28 +0200 Subject: [PATCH 16/32] doc improvement --- node/subsystem/src/util.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 861decd6b599..935c5e7ca08d 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -235,7 +235,7 @@ impl Validator { } } -/// ToJob is expected to be an enum declaring messages which can be sent to a particular job. +/// ToJob is expected to be an enum declaring the set of messages of interest to a particular job. /// /// Normally, this will be some subset of `Allmessages`, and a `Stop` variant. pub trait ToJobTrait: TryFrom { From 93fe70061bc74d50babdab424829f72fadf26346 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Fri, 10 Jul 2020 16:35:08 +0200 Subject: [PATCH 17/32] simplify conversion from overseer communication to job message --- node/subsystem/src/util.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 935c5e7ca08d..bb917a170685 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -442,10 +442,9 @@ impl JobManager where Spawner: Spawn + Clone + Send, Context: SubsystemContext, - ::Message: Into, Job: JobTrait, Job::RunArgs: Clone, - Job::ToJob: TryFrom + Sync, + Job::ToJob: TryFrom + TryFrom<::Message> + Sync, { /// Creates a new `Subsystem`. pub fn new(spawner: Spawner, run_args: Job::RunArgs) -> Self { @@ -506,9 +505,7 @@ where return true; } Ok(Communication { msg }) => { - // I don't know the syntax to add this type hint within an if let statement - let maybe_to_job: Result = msg.try_into(); - if let Ok(to_job) = maybe_to_job { + if let Ok(to_job) = ::try_from(msg) { match to_job.hash() { Some(hash) => { if let Err(err) = jobs.send_msg(hash, to_job).await { From cca7d4b429d00768995b36ebdcbc466b50581cd8 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Fri, 10 Jul 2020 16:43:18 +0200 Subject: [PATCH 18/32] document fn hash for all messages --- node/subsystem/src/messages.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/node/subsystem/src/messages.rs b/node/subsystem/src/messages.rs index 9513db11b01d..58c4da76b038 100644 --- a/node/subsystem/src/messages.rs +++ b/node/subsystem/src/messages.rs @@ -52,6 +52,7 @@ pub enum CandidateSelectionMessage { } impl CandidateSelectionMessage { + /// If the current variant contains the relay parent hash, return it. pub fn hash(&self) -> Option { match self { Self::Invalid(hash, _) => Some(*hash), @@ -75,6 +76,7 @@ pub enum CandidateBackingMessage { impl CandidateBackingMessage { + /// If the current variant contains the relay parent hash, return it. pub fn hash(&self) -> Option { match self { Self::GetBackedCandidates(hash, _) => Some(*hash), @@ -122,6 +124,7 @@ pub enum CandidateValidationMessage { } impl CandidateValidationMessage { + /// If the current variant contains the relay parent hash, return it. pub fn hash(&self) -> Option { match self { Self::ValidateFromChainState(CandidateDescriptor{ relay_parent, ..}, _, _) => Some(*relay_parent), @@ -163,6 +166,7 @@ pub enum NetworkBridgeMessage { } impl NetworkBridgeMessage { + /// If the current variant contains the relay parent hash, return it. pub fn hash(&self) -> Option { match self { Self::RegisterEventProducer(_, _) => None, @@ -186,6 +190,7 @@ pub enum AvailabilityDistributionMessage { } impl AvailabilityDistributionMessage { + /// If the current variant contains the relay parent hash, return it. pub fn hash(&self) -> Option { match self { Self::DistributeChunk(hash, _) => Some(*hash), @@ -206,6 +211,7 @@ pub enum BitfieldDistributionMessage { } impl BitfieldDistributionMessage { + /// If the current variant contains the relay parent hash, return it. pub fn hash(&self) -> Option { match self { Self::DistributeBitfield(hash, _) => Some(*hash), @@ -228,6 +234,7 @@ pub enum AvailabilityStoreMessage { } impl AvailabilityStoreMessage { + /// If the current variant contains the relay parent hash, return it. pub fn hash(&self) -> Option { match self { Self::QueryPoV(hash, _) => Some(*hash), @@ -275,6 +282,7 @@ pub enum RuntimeApiMessage { } impl RuntimeApiMessage { + /// If the current variant contains the relay parent hash, return it. pub fn hash(&self) -> Option { match self { Self::Request(hash, _) => Some(*hash), @@ -293,6 +301,7 @@ pub enum StatementDistributionMessage { } impl StatementDistributionMessage { + /// If the current variant contains the relay parent hash, return it. pub fn hash(&self) -> Option { match self { Self::Share(hash, _) => Some(*hash), @@ -338,6 +347,7 @@ pub enum ProvisionerMessage { } impl ProvisionerMessage { + /// If the current variant contains the relay parent hash, return it. pub fn hash(&self) -> Option { match self { Self::RequestBlockAuthorshipData(hash, _) => Some(*hash), @@ -363,6 +373,7 @@ pub enum PoVDistributionMessage { } impl PoVDistributionMessage { + /// If the current variant contains the relay parent hash, return it. pub fn hash(&self) -> Option { match self { Self::FetchPoV(hash, _, _) => Some(*hash), From 841579d1e7d8de6fad4a20b4bec8659564bdae8d Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Fri, 10 Jul 2020 16:47:03 +0200 Subject: [PATCH 19/32] rename fn hash() -> fn relay_parent --- node/core/backing/src/lib.rs | 4 ++-- node/subsystem/src/messages.rs | 22 +++++++++++----------- node/subsystem/src/util.rs | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index 835e1d1d1520..d17eda17cec7 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -165,9 +165,9 @@ impl From for ToJob { impl util::ToJobTrait for ToJob { const STOP: Self = ToJob::Stop; - fn hash(&self) -> Option { + fn relay_parent(&self) -> Option { match self { - Self::CandidateBacking(cb) => cb.hash(), + Self::CandidateBacking(cb) => cb.relay_parent(), Self::Stop => None, } } diff --git a/node/subsystem/src/messages.rs b/node/subsystem/src/messages.rs index 58c4da76b038..bab18527f56d 100644 --- a/node/subsystem/src/messages.rs +++ b/node/subsystem/src/messages.rs @@ -53,7 +53,7 @@ pub enum CandidateSelectionMessage { impl CandidateSelectionMessage { /// If the current variant contains the relay parent hash, return it. - pub fn hash(&self) -> Option { + pub fn relay_parent(&self) -> Option { match self { Self::Invalid(hash, _) => Some(*hash), } @@ -77,7 +77,7 @@ pub enum CandidateBackingMessage { impl CandidateBackingMessage { /// If the current variant contains the relay parent hash, return it. - pub fn hash(&self) -> Option { + pub fn relay_parent(&self) -> Option { match self { Self::GetBackedCandidates(hash, _) => Some(*hash), Self::Second(hash, _, _) => Some(*hash), @@ -125,7 +125,7 @@ pub enum CandidateValidationMessage { impl CandidateValidationMessage { /// If the current variant contains the relay parent hash, return it. - pub fn hash(&self) -> Option { + pub fn relay_parent(&self) -> Option { match self { Self::ValidateFromChainState(CandidateDescriptor{ relay_parent, ..}, _, _) => Some(*relay_parent), Self::ValidateFromExhaustive(_, _, CandidateDescriptor{ relay_parent, ..}, _, _) => Some(*relay_parent), @@ -167,7 +167,7 @@ pub enum NetworkBridgeMessage { impl NetworkBridgeMessage { /// If the current variant contains the relay parent hash, return it. - pub fn hash(&self) -> Option { + pub fn relay_parent(&self) -> Option { match self { Self::RegisterEventProducer(_, _) => None, Self::ReportPeer(_, _) => None, @@ -191,7 +191,7 @@ pub enum AvailabilityDistributionMessage { impl AvailabilityDistributionMessage { /// If the current variant contains the relay parent hash, return it. - pub fn hash(&self) -> Option { + pub fn relay_parent(&self) -> Option { match self { Self::DistributeChunk(hash, _) => Some(*hash), Self::FetchChunk(hash, _) => Some(*hash), @@ -212,7 +212,7 @@ pub enum BitfieldDistributionMessage { impl BitfieldDistributionMessage { /// If the current variant contains the relay parent hash, return it. - pub fn hash(&self) -> Option { + pub fn relay_parent(&self) -> Option { match self { Self::DistributeBitfield(hash, _) => Some(*hash), Self::NetworkBridgeUpdate(_) => None, @@ -235,7 +235,7 @@ pub enum AvailabilityStoreMessage { impl AvailabilityStoreMessage { /// If the current variant contains the relay parent hash, return it. - pub fn hash(&self) -> Option { + pub fn relay_parent(&self) -> Option { match self { Self::QueryPoV(hash, _) => Some(*hash), Self::QueryChunk(hash, _, _) => Some(*hash), @@ -283,7 +283,7 @@ pub enum RuntimeApiMessage { impl RuntimeApiMessage { /// If the current variant contains the relay parent hash, return it. - pub fn hash(&self) -> Option { + pub fn relay_parent(&self) -> Option { match self { Self::Request(hash, _) => Some(*hash), } @@ -302,7 +302,7 @@ pub enum StatementDistributionMessage { impl StatementDistributionMessage { /// If the current variant contains the relay parent hash, return it. - pub fn hash(&self) -> Option { + pub fn relay_parent(&self) -> Option { match self { Self::Share(hash, _) => Some(*hash), Self::NetworkBridgeUpdate(_) => None, @@ -348,7 +348,7 @@ pub enum ProvisionerMessage { impl ProvisionerMessage { /// If the current variant contains the relay parent hash, return it. - pub fn hash(&self) -> Option { + pub fn relay_parent(&self) -> Option { match self { Self::RequestBlockAuthorshipData(hash, _) => Some(*hash), Self::RequestInherentData(hash, _) => Some(*hash), @@ -374,7 +374,7 @@ pub enum PoVDistributionMessage { impl PoVDistributionMessage { /// If the current variant contains the relay parent hash, return it. - pub fn hash(&self) -> Option { + pub fn relay_parent(&self) -> Option { match self { Self::FetchPoV(hash, _, _) => Some(*hash), Self::DistributePoV(hash, _, _) => Some(*hash), diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index bb917a170685..704814c64741 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -242,8 +242,8 @@ pub trait ToJobTrait: TryFrom { /// The `Stop` variant of the ToJob enum. const STOP: Self; - /// If the message variant contains a hash, return it here - fn hash(&self) -> Option; + /// If the message variant contains its relay parent, return it here + fn relay_parent(&self) -> Option; } /// A JobHandle manages a particular job for a subsystem. @@ -506,7 +506,7 @@ where } Ok(Communication { msg }) => { if let Ok(to_job) = ::try_from(msg) { - match to_job.hash() { + match to_job.relay_parent() { Some(hash) => { if let Err(err) = jobs.send_msg(hash, to_job).await { log::error!("Failed to send a message to a job: {:?}", err); From 38ee247f96bbf3b7a70f2eebed827b80d04e7092 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Fri, 10 Jul 2020 17:08:20 +0200 Subject: [PATCH 20/32] gracefully shut down running futures on Conclude --- node/subsystem/src/util.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 704814c64741..81bfe466a83c 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -499,9 +499,9 @@ where } } Ok(Signal(Conclude)) => { - // breaking the loop ends fn run, which drops `jobs`, which immediately drops all ongoing work - // - // REVIEW: is this the behavior we want? + // Breaking the loop ends fn run, which drops `jobs`, which immediately drops all ongoing work. + // We can afford to wait a little while to shut them all down properly before doing that. + future::join_all(jobs.running.drain().map(|(_, handle)| handle.stop())).await; return true; } Ok(Communication { msg }) => { From 8bb08b8b234707c8196f23a7e239ddae6d8ecd19 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Fri, 10 Jul 2020 17:15:47 +0200 Subject: [PATCH 21/32] ensure we're validating with the proper validator index --- node/subsystem/src/util.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 81bfe466a83c..45fa73548107 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -227,10 +227,17 @@ impl Validator { } /// Validate the payload with this validator + /// + /// Validation can only succeed if `signed.validator_index() == self.index()`. + /// Normally, this will always be the case for a properly operating program, + /// but it's double-checked here anyway. pub fn check_payload, RealPayload: Encode>( &self, signed: Signed, ) -> Result<(), ()> { + if signed.validator_index() != self.index { + return Err(()); + } signed.check_signature(&self.signing_context, &self.id()) } } From fa595c2cb33808c042dd55a7219a3c5f6de09359 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Mon, 13 Jul 2020 08:59:13 +0200 Subject: [PATCH 22/32] rename: handle_unhashed_msg -> handle_orphan_msg --- node/subsystem/src/util.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 45fa73548107..9f21b4ab60d0 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -319,7 +319,7 @@ pub trait JobTrait { tx_from: mpsc::Sender, ) -> Pin> + Send>>; - /// Handle a message which can't be dispatched to a particular job + /// Handle a message which has no relay parent, and therefore can't be dispatched to a particular job /// /// By default, this is implemented with a NOP function. However, if /// ToJob occasionally has messages which do not correspond to a particular @@ -329,7 +329,7 @@ pub trait JobTrait { // once we're implementing a subsystem which actually needs this feature. // In particular, we're quite likely to want this to return a future instead of // interrupting the active thread for the duration of the handler. - fn handle_unhashed_msg(_msg: Self::ToJob) -> Result<(), Self::Error> { + fn handle_orphan_msg(_msg: Self::ToJob) -> Result<(), Self::Error> { Ok(()) } } @@ -521,7 +521,7 @@ where } } None => { - if let Err(err) = Job::handle_unhashed_msg(to_job) { + if let Err(err) = Job::handle_orphan_msg(to_job) { log::error!("Failed to handle unhashed message: {:?}", err); return true; } From 940c111285dbcb876633d6cac4808b52aa1a7343 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Mon, 13 Jul 2020 11:28:11 +0200 Subject: [PATCH 23/32] impl Stream for Jobs This turns out to be relatively complicated and requires some unsafe code, so we'll want either detailed review, or to choose to revert this commit. --- node/subsystem/src/util.rs | 68 +++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 9f21b4ab60d0..1f23c8994027 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -29,7 +29,8 @@ use futures::{ future::Either, prelude::*, select, - task::{Spawn, SpawnError, SpawnExt}, + stream::Stream, + task::{self, Spawn, SpawnError, SpawnExt}, }; use futures_timer::Delay; use keystore::KeyStorePtr; @@ -42,6 +43,7 @@ use sp_core::Pair; use std::{ collections::HashMap, convert::{TryFrom, TryInto}, + marker::Unpin, pin::Pin, time::Duration, }; @@ -296,7 +298,7 @@ impl JobHandle { /// Jobs are instantiated and killed automatically on appropriate overseer messages. /// Other messages are passed along to and from the job via the overseer to other /// subsystems. -pub trait JobTrait { +pub trait JobTrait: Unpin { /// Message type to the job. Typically a subset of AllMessages. type ToJob: 'static + ToJobTrait + Send; /// Message type from the job. Typically a subset of AllMessages. @@ -413,12 +415,33 @@ impl Jobs { Ok(()) } - /// Get the next message from any of the underlying jobs. - async fn next(&mut self) -> Option { - self.outgoing_msgs.next().await.and_then(|(e, _)| match e { - StreamYield::Item(e) => Some(e), - _ => None, - }) + /// Get the pin projection for the `outgoing_msgs` field + /// + /// From the [pin docs]: + /// + /// > It is actually up to the author of the data structure to decide whether the pinned + /// > projection for a particular field turns `Pin<&mut Struct>` into `Pin<&mut Field>` + /// > or `&mut Field`. There are some constraints, though, and the most important constraint + /// > is _consistency_: every field can be _either_ projected to a pinned reference, _or_ + /// > have pinning removed as part of the projection. If both are done for the same field, + /// > that will likely be unsound! + /// + /// In this case, pinning is structural. + /// + /// ## Considerations + /// + /// 1. The struct must only be `Unpin` if all the structural fields are `Unpin`: ✔ + /// 2. The destructor of the struct must not move structural fields out of its argument: ✔ + /// 3. Uphold the `Drop` guarantee: once the struct is pinned, the memory which contains + /// the content is not overwritten or deallocated without calling the content's destructors. + /// I.e. you may not free or reuse the storage without calling `drop`. ✔ + /// 4. You must not offer any other operations (i.e. `take`) which could lead to data being + /// moved out of the structural fields when your type is pinned: ✔ + /// + /// [pin docs]: https://doc.rust-lang.org/std/pin/index.html#projections-and-structural-pinning + fn pin_get_outgoing_msgs(self: Pin<&mut Self>) -> Pin<&mut StreamUnordered>> { + // This is ok because `self.outgoing_msgs` is pinned when `self` is. + unsafe { self.map_unchecked_mut(|s| &mut s.outgoing_msgs) } } } @@ -426,12 +449,33 @@ impl Jobs { // we just abort them all. Still better than letting them dangle. impl Drop for Jobs { fn drop(&mut self) { - for job_handle in self.running.values() { - job_handle.abort_handle.abort(); + // `new_unchecked` is ok because we know this value is never used again after being dropped + inner_drop(unsafe { Pin::new_unchecked(self) }); + fn inner_drop(slf: Pin<&mut Jobs>) { + for job_handle in slf.running.values() { + job_handle.abort_handle.abort(); + } } } } +impl Stream for Jobs +where + Spawner: Spawn, + Job: JobTrait, +{ + type Item = Job::FromJob; + + fn poll_next(self: Pin<&mut Self>, cx: &mut task::Context) -> task::Poll> { + self.pin_get_outgoing_msgs() + .poll_next(cx) + .map(|opt| opt.and_then(|(stream_yield, _)| match stream_yield { + StreamYield::Item(msg) => Some(msg), + StreamYield::Finished(_) => None, + })) + } +} + /// A basic implementation of a subsystem. /// /// This struct is responsible for handling message traffic between @@ -447,7 +491,7 @@ pub struct JobManager { impl JobManager where - Spawner: Spawn + Clone + Send, + Spawner: Spawn + Clone + Send + Unpin, Context: SubsystemContext, Job: JobTrait, Job::RunArgs: Clone, @@ -552,7 +596,7 @@ where impl Subsystem for JobManager where - Spawner: Spawn + Send + Clone + 'static, + Spawner: Spawn + Send + Clone + Unpin + 'static, Context: SubsystemContext, ::Message: Into, Job: JobTrait + Send, From 11772f58afeddd9df6c072a04e9f49db13e2b146 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Mon, 13 Jul 2020 13:28:47 +0200 Subject: [PATCH 24/32] add missing documentation for public items --- node/subsystem/src/lib.rs | 3 +++ node/subsystem/src/util.rs | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/node/subsystem/src/lib.rs b/node/subsystem/src/lib.rs index c318d5440992..b6c3a79ef3d9 100644 --- a/node/subsystem/src/lib.rs +++ b/node/subsystem/src/lib.rs @@ -20,6 +20,8 @@ //! that communicate via message-passing. They are coordinated by an overseer, provided by a //! separate crate. +#![warn(missing_docs)] + use std::pin::Pin; use futures::prelude::*; @@ -57,6 +59,7 @@ pub enum FromOverseer { /// Some other `Subsystem`'s message. Communication { + /// Contained message msg: M, }, } diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 1f23c8994027..9ffe79f39f2c 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -54,14 +54,19 @@ pub const JOB_GRACEFUL_STOP_DURATION: Duration = Duration::from_secs(1); /// Capacity of channels to and from individual jobs pub const JOB_CHANNEL_CAPACITY: usize = 64; +/// Utility errors #[derive(Debug, derive_more::From)] pub enum Error { + /// Attempted to send or receive on a oneshot channel which had been canceled #[from] Oneshot(oneshot::Canceled), + /// Attempted to send on a MPSC channel which has been canceled #[from] Mpsc(mpsc::SendError), + /// Attempted to spawn a new task, and failed #[from] Spawn(SpawnError), + /// Attempted to convert from an AllMessages to a FromJob, and failed. SenderConversion(String), /// The local node is not a validator. NotAValidator, @@ -336,6 +341,13 @@ pub trait JobTrait: Unpin { } } +/// Jobs manager for a subsystem +/// +/// - Spawns new jobs for a given relay-parent on demand. +/// - Closes old jobs for a given relay-parent on demand. +/// - Dispatches messages to the appropriate job for a given relay-parent. +/// - When dropped, aborts all remaining jobs. +/// - implements `Stream`, collecting all messages from subordinate jobs. pub struct Jobs { spawner: Spawner, running: HashMap>, From dc9e12c764eddb62e7c03a55b67e2d773f62868a Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Mon, 13 Jul 2020 13:42:29 +0200 Subject: [PATCH 25/32] use pin-project to eliminate unsafe code from this codebase --- Cargo.lock | 1 + node/subsystem/Cargo.toml | 1 + node/subsystem/src/util.rs | 49 +++++++++----------------------------- 3 files changed, 13 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a2c08f13459..90fd96800ba9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4549,6 +4549,7 @@ dependencies = [ "futures-timer 3.0.2", "log 0.4.8", "parity-scale-codec", + "pin-project", "polkadot-node-primitives", "polkadot-primitives", "polkadot-statement-table", diff --git a/node/subsystem/Cargo.toml b/node/subsystem/Cargo.toml index fe743a9add41..188e7cbfa764 100644 --- a/node/subsystem/Cargo.toml +++ b/node/subsystem/Cargo.toml @@ -13,6 +13,7 @@ futures-timer = "3.0.2" keystore = { package = "sc-keystore", git = "https://github.com/paritytech/substrate", branch = "master" } log = "0.4.8" parity-scale-codec = "1.3.0" +pin-project = "0.4.22" polkadot-node-primitives = { path = "../primitives" } polkadot-primitives = { path = "../../primitives" } polkadot-statement-table = { path = "../../statement-table" } diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 9ffe79f39f2c..e872884dc6fb 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -35,6 +35,7 @@ use futures::{ use futures_timer::Delay; use keystore::KeyStorePtr; use parity_scale_codec::Encode; +use pin_project::{pin_project, pinned_drop}; use polkadot_primitives::v1::{ EncodeAs, Hash, HeadData, Id as ParaId, Signed, SigningContext, ValidatorId, ValidatorIndex, ValidatorPair, @@ -348,9 +349,11 @@ pub trait JobTrait: Unpin { /// - Dispatches messages to the appropriate job for a given relay-parent. /// - When dropped, aborts all remaining jobs. /// - implements `Stream`, collecting all messages from subordinate jobs. +#[pin_project(PinnedDrop)] pub struct Jobs { spawner: Spawner, running: HashMap>, + #[pin] outgoing_msgs: StreamUnordered>, job: std::marker::PhantomData, } @@ -426,47 +429,15 @@ impl Jobs { } Ok(()) } - - /// Get the pin projection for the `outgoing_msgs` field - /// - /// From the [pin docs]: - /// - /// > It is actually up to the author of the data structure to decide whether the pinned - /// > projection for a particular field turns `Pin<&mut Struct>` into `Pin<&mut Field>` - /// > or `&mut Field`. There are some constraints, though, and the most important constraint - /// > is _consistency_: every field can be _either_ projected to a pinned reference, _or_ - /// > have pinning removed as part of the projection. If both are done for the same field, - /// > that will likely be unsound! - /// - /// In this case, pinning is structural. - /// - /// ## Considerations - /// - /// 1. The struct must only be `Unpin` if all the structural fields are `Unpin`: ✔ - /// 2. The destructor of the struct must not move structural fields out of its argument: ✔ - /// 3. Uphold the `Drop` guarantee: once the struct is pinned, the memory which contains - /// the content is not overwritten or deallocated without calling the content's destructors. - /// I.e. you may not free or reuse the storage without calling `drop`. ✔ - /// 4. You must not offer any other operations (i.e. `take`) which could lead to data being - /// moved out of the structural fields when your type is pinned: ✔ - /// - /// [pin docs]: https://doc.rust-lang.org/std/pin/index.html#projections-and-structural-pinning - fn pin_get_outgoing_msgs(self: Pin<&mut Self>) -> Pin<&mut StreamUnordered>> { - // This is ok because `self.outgoing_msgs` is pinned when `self` is. - unsafe { self.map_unchecked_mut(|s| &mut s.outgoing_msgs) } - } } // Note that on drop, we don't have the chance to gracefully spin down each of the remaining handles; // we just abort them all. Still better than letting them dangle. -impl Drop for Jobs { - fn drop(&mut self) { - // `new_unchecked` is ok because we know this value is never used again after being dropped - inner_drop(unsafe { Pin::new_unchecked(self) }); - fn inner_drop(slf: Pin<&mut Jobs>) { - for job_handle in slf.running.values() { - job_handle.abort_handle.abort(); - } +#[pinned_drop] +impl PinnedDrop for Jobs { + fn drop(self: Pin<&mut Self>) { + for job_handle in self.running.values() { + job_handle.abort_handle.abort(); } } } @@ -479,7 +450,9 @@ where type Item = Job::FromJob; fn poll_next(self: Pin<&mut Self>, cx: &mut task::Context) -> task::Poll> { - self.pin_get_outgoing_msgs() + // pin-project the outgoing messages + self.project() + .outgoing_msgs .poll_next(cx) .map(|opt| opt.and_then(|(stream_yield, _)| match stream_yield { StreamYield::Item(msg) => Some(msg), From 802981a9eb227f3b37f327e6146958c65abe94bd Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Mon, 13 Jul 2020 14:16:37 +0200 Subject: [PATCH 26/32] rename SenderMessage -> FromJob --- node/subsystem/src/util.rs | 48 +++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index e872884dc6fb..26d6ee3f6adc 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -76,15 +76,15 @@ pub enum Error { } /// Request some data from the `RuntimeApi`. -pub async fn request_from_runtime( +pub async fn request_from_runtime( parent: Hash, - sender: &mut mpsc::Sender, + sender: &mut mpsc::Sender, request_builder: RequestBuilder, ) -> Result, Error> where RequestBuilder: FnOnce(oneshot::Sender) -> RuntimeApiRequest, - SenderMessage: TryFrom, - >::Error: std::fmt::Debug, + FromJob: TryFrom, + >::Error: std::fmt::Debug, { let (tx, rx) = oneshot::channel(); @@ -100,50 +100,50 @@ where } /// Request a validator set from the `RuntimeApi`. -pub async fn request_validators( +pub async fn request_validators( parent: Hash, - s: &mut mpsc::Sender, + s: &mut mpsc::Sender, ) -> Result>, Error> where - SenderMessage: TryFrom, - >::Error: std::fmt::Debug, + FromJob: TryFrom, + >::Error: std::fmt::Debug, { request_from_runtime(parent, s, |tx| RuntimeApiRequest::Validators(tx)).await } /// Request the scheduler roster from `RuntimeApi`. -pub async fn request_validator_groups( +pub async fn request_validator_groups( parent: Hash, - s: &mut mpsc::Sender, + s: &mut mpsc::Sender, ) -> Result, Error> where - SenderMessage: TryFrom, - >::Error: std::fmt::Debug, + FromJob: TryFrom, + >::Error: std::fmt::Debug, { request_from_runtime(parent, s, |tx| RuntimeApiRequest::ValidatorGroups(tx)).await } /// Request a `SigningContext` from the `RuntimeApi`. -pub async fn request_signing_context( +pub async fn request_signing_context( parent: Hash, - s: &mut mpsc::Sender, + s: &mut mpsc::Sender, ) -> Result, Error> where - SenderMessage: TryFrom, - >::Error: std::fmt::Debug, + FromJob: TryFrom, + >::Error: std::fmt::Debug, { request_from_runtime(parent, s, |tx| RuntimeApiRequest::SigningContext(tx)).await } /// Request `HeadData` for some `ParaId` from `RuntimeApi`. -pub async fn request_head_data( +pub async fn request_head_data( parent: Hash, - s: &mut mpsc::Sender, + s: &mut mpsc::Sender, id: ParaId, ) -> Result, Error> where - SenderMessage: TryFrom, - >::Error: std::fmt::Debug, + FromJob: TryFrom, + >::Error: std::fmt::Debug, { request_from_runtime(parent, s, |tx| RuntimeApiRequest::HeadData(id, tx)).await } @@ -168,14 +168,14 @@ pub struct Validator { impl Validator { /// Get a struct representing this node's validator if this node is in fact a validator in the context of the given block. - pub async fn new( + pub async fn new( parent: Hash, keystore: KeyStorePtr, - mut sender: mpsc::Sender, + mut sender: mpsc::Sender, ) -> Result where - SenderMessage: TryFrom, - >::Error: std::fmt::Debug, + FromJob: TryFrom, + >::Error: std::fmt::Debug, { // Note: request_validators and request_signing_context do not and cannot run concurrently: they both // have a mutable handle to the same sender. From a5639e36017a72656b478caddcaa30e2d4e6112a Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Mon, 13 Jul 2020 17:27:29 +0200 Subject: [PATCH 27/32] reenvision the subsystem requests as an extension trait This works within `util.rs`, but fails in `core/backing/src/lib.rs`, because we don't actually create the struct soon enough. Continuing down this path would imply substantial rewriting. --- node/core/backing/src/lib.rs | 74 ++------- node/subsystem/src/util.rs | 303 ++++++++++++++++++++++++----------- 2 files changed, 223 insertions(+), 154 deletions(-) diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index d17eda17cec7..62974ced9d3c 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -43,13 +43,12 @@ use polkadot_subsystem::{ messages::{ AllMessages, AvailabilityStoreMessage, CandidateBackingMessage, CandidateSelectionMessage, CandidateValidationMessage, NewBackedCandidate, PoVDistributionMessage, ProvisionableData, - ProvisionerMessage, RuntimeApiMessage, StatementDistributionMessage, ValidationFailed, + ProvisionerMessage, RuntimeApiMessage, RuntimeApiRequest, StatementDistributionMessage, ValidationFailed, }, util::{ self, - request_signing_context, - request_validator_groups, - request_validators, + JobTrait, + JobTraitExt, Validator, }, }; @@ -198,23 +197,6 @@ impl From for AllMessages { } } -impl TryFrom for FromJob { - type Error = &'static str; - - fn try_from(f: AllMessages) -> Result { - match f { - AllMessages::AvailabilityStore(msg) => Ok(FromJob::AvailabilityStore(msg)), - AllMessages::RuntimeApi(msg) => Ok(FromJob::RuntimeApiMessage(msg)), - AllMessages::CandidateValidation(msg) => Ok(FromJob::CandidateValidation(msg)), - AllMessages::CandidateSelection(msg) => Ok(FromJob::CandidateSelection(msg)), - AllMessages::StatementDistribution(msg) => Ok(FromJob::StatementDistribution(msg)), - AllMessages::PoVDistribution(msg) => Ok(FromJob::PoVDistribution(msg)), - AllMessages::Provisioner(msg) => Ok(FromJob::Provisioner(msg)), - _ => Err("can't convert this AllMessages variant to FromJob"), - } - } -} - // It looks like it's not possible to do an `impl From` given the current state of // the code. So this does the necessary conversion. fn primitive_statement_to_table(s: &SignedFullStatement) -> TableSignedStatement { @@ -547,38 +529,6 @@ impl CandidateBackingJob { Ok(()) } - async fn request_pov_from_distribution( - &mut self, - descriptor: CandidateDescriptor, - ) -> Result, Error> { - let (tx, rx) = oneshot::channel(); - - self.tx_from.send(FromJob::PoVDistribution( - PoVDistributionMessage::FetchPoV(self.parent, descriptor, tx) - )).await?; - - Ok(rx.await?) - } - - async fn request_candidate_validation( - &mut self, - candidate: CandidateDescriptor, - pov: Arc, - ) -> Result { - let (tx, rx) = oneshot::channel(); - - self.tx_from.send(FromJob::CandidateValidation( - CandidateValidationMessage::ValidateFromChainState( - candidate, - pov, - tx, - ) - ) - ).await?; - - Ok(rx.await??) - } - async fn store_chunk( &mut self, id: ValidatorIndex, @@ -656,7 +606,7 @@ impl CandidateBackingJob { } } -impl util::JobTrait for CandidateBackingJob { +impl JobTrait for CandidateBackingJob { type ToJob = ToJob; type FromJob = FromJob; type Error = Error; @@ -672,9 +622,9 @@ impl util::JobTrait for CandidateBackingJob { ) -> Pin> + Send>> { async move { let (validators, roster, signing_context) = futures::try_join!( - request_validators(parent, &mut tx_from).await?, - request_validator_groups(parent, &mut tx_from).await?, - request_signing_context(parent, &mut tx_from).await?, + self.request_validators().await?, + self.request_validator_groups().await?, + self.request_signing_context().await?, )?; let validator = Validator::construct(&validators, signing_context, keystore.clone())?; @@ -724,6 +674,16 @@ impl util::JobTrait for CandidateBackingJob { } } +impl JobTraitExt for CandidateBackingJob { + fn sender(&self) -> mpsc::Sender { + self.tx_from.clone() + } + + fn relay_parent(&self) -> Hash { + self.parent + } +} + /// An implementation of the Candidate Backing subsystem. pub type CandidateBackingSubsystem = util::JobManager; diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 26d6ee3f6adc..e7e0f234857c 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -21,7 +21,7 @@ //! this module. use crate::{ - messages::{AllMessages, RuntimeApiMessage, RuntimeApiRequest, SchedulerRoster}, + messages::{self, AllMessages, SchedulerRoster}, FromOverseer, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemResult, }; use futures::{ @@ -36,9 +36,10 @@ use futures_timer::Delay; use keystore::KeyStorePtr; use parity_scale_codec::Encode; use pin_project::{pin_project, pinned_drop}; +use polkadot_node_primitives::ValidationResult; use polkadot_primitives::v1::{ - EncodeAs, Hash, HeadData, Id as ParaId, Signed, SigningContext, - ValidatorId, ValidatorIndex, ValidatorPair, + CandidateDescriptor, EncodeAs, Hash, PoV, Signed, SigningContext, ValidatorId, ValidatorIndex, + ValidatorPair, }; use sp_core::Pair; use std::{ @@ -46,6 +47,7 @@ use std::{ convert::{TryFrom, TryInto}, marker::Unpin, pin::Pin, + sync::Arc, time::Duration, }; use streamunordered::{StreamUnordered, StreamYield}; @@ -67,6 +69,12 @@ pub enum Error { /// Attempted to spawn a new task, and failed #[from] Spawn(SpawnError), + /// Failed to determine whether a candidate was valid or invalid + #[from] + ValidationFailed(messages::ValidationFailed), + /// Failed to determine whether a candidate was valid or invalid, in a different way + #[from] + ValidationResult(ValidationResult), /// Attempted to convert from an AllMessages to a FromJob, and failed. SenderConversion(String), /// The local node is not a validator. @@ -75,79 +83,6 @@ pub enum Error { JobNotFound(Hash), } -/// Request some data from the `RuntimeApi`. -pub async fn request_from_runtime( - parent: Hash, - sender: &mut mpsc::Sender, - request_builder: RequestBuilder, -) -> Result, Error> -where - RequestBuilder: FnOnce(oneshot::Sender) -> RuntimeApiRequest, - FromJob: TryFrom, - >::Error: std::fmt::Debug, -{ - let (tx, rx) = oneshot::channel(); - - sender - .send( - AllMessages::RuntimeApi(RuntimeApiMessage::Request(parent, request_builder(tx))) - .try_into() - .map_err(|err| Error::SenderConversion(format!("{:?}", err)))?, - ) - .await?; - - Ok(rx) -} - -/// Request a validator set from the `RuntimeApi`. -pub async fn request_validators( - parent: Hash, - s: &mut mpsc::Sender, -) -> Result>, Error> -where - FromJob: TryFrom, - >::Error: std::fmt::Debug, -{ - request_from_runtime(parent, s, |tx| RuntimeApiRequest::Validators(tx)).await -} - -/// Request the scheduler roster from `RuntimeApi`. -pub async fn request_validator_groups( - parent: Hash, - s: &mut mpsc::Sender, -) -> Result, Error> -where - FromJob: TryFrom, - >::Error: std::fmt::Debug, -{ - request_from_runtime(parent, s, |tx| RuntimeApiRequest::ValidatorGroups(tx)).await -} - -/// Request a `SigningContext` from the `RuntimeApi`. -pub async fn request_signing_context( - parent: Hash, - s: &mut mpsc::Sender, -) -> Result, Error> -where - FromJob: TryFrom, - >::Error: std::fmt::Debug, -{ - request_from_runtime(parent, s, |tx| RuntimeApiRequest::SigningContext(tx)).await -} - -/// Request `HeadData` for some `ParaId` from `RuntimeApi`. -pub async fn request_head_data( - parent: Hash, - s: &mut mpsc::Sender, - id: ParaId, -) -> Result, Error> -where - FromJob: TryFrom, - >::Error: std::fmt::Debug, -{ - request_from_runtime(parent, s, |tx| RuntimeApiRequest::HeadData(id, tx)).await -} - /// From the given set of validators, find the first key we can sign with, if any. pub fn signing_key(validators: &[ValidatorId], keystore: &KeyStorePtr) -> Option { let keystore = keystore.read(); @@ -168,29 +103,19 @@ pub struct Validator { impl Validator { /// Get a struct representing this node's validator if this node is in fact a validator in the context of the given block. - pub async fn new( - parent: Hash, + pub async fn new( + job: Job, keystore: KeyStorePtr, - mut sender: mpsc::Sender, - ) -> Result - where - FromJob: TryFrom, - >::Error: std::fmt::Debug, - { - // Note: request_validators and request_signing_context do not and cannot run concurrently: they both - // have a mutable handle to the same sender. - // However, each of them returns a oneshot::Receiver, and those are resolved concurrently. + ) -> Result { let (validators, signing_context) = futures::try_join!( - request_validators(parent, &mut sender).await?, - request_signing_context(parent, &mut sender).await?, + job.request_validators().await?, + job.request_signing_context().await?, )?; Self::construct(&validators, signing_context, keystore) } - /// Construct a validator instance without performing runtime fetches. - /// - /// This can be useful if external code also needs the same data. + /// Construct a validator instance pub fn construct( validators: &[ValidatorId], signing_context: SigningContext, @@ -342,6 +267,191 @@ pub trait JobTrait: Unpin { } } +/// This trait provides a helper abstraction for sending a message to another subsystem +/// and collecting their response. +pub trait JobTraitExt: JobTrait { + /// Get a clone of the sender to the overseer. + fn sender(&self) -> mpsc::Sender; + + /// Get the relay parent for this job. + fn relay_parent(&self) -> Hash; + + /// Request some data from another subsystem via the Overseer. + /// + /// The arguments to `request_builder` are `parent_hash, response_sender`. + fn request( + &self, + request_builder: RequestBuilder, + ) -> Pin, Error>> + Send>> + where + RequestBuilder: 'static + Send + FnOnce(Hash, oneshot::Sender) -> Self::FromJob, + Response: Send, + { + let mut sender = self.sender(); + let parent = self.relay_parent(); + + async move { + let (tx, rx) = oneshot::channel(); + + sender.send(request_builder(parent, tx)).await?; + + Ok(rx) + } + .boxed() + } +} + +/// This trait enables a blanket impl of several useful getters. +/// +/// The blanket impl takes effect for any job for which `Job: JobTraitExt` and +/// `Job::FromJob: TryFrom`. +/// +/// It's distinct from JobTraitExt because it may be useful to `impl JobTraitExt` +/// for some `Job` for which `Job::FromJob: !TryFrom`, so we don't want +/// to tie this too tightly to that trait. +pub trait SubsystemRequests: JobTraitExt { + /// Request some data from another subsystem via the Overseer. + /// + /// The arguments to `request_builder` are `parent_hash, response_sender`. + /// + /// The difference between this methods and `self.request` is the return type of + /// `request_builder`: it returns an `AllMessages` instance instead of `Self::FromJob`. + fn request_allmessages( + &self, + request_builder: RequestBuilder, + ) -> Pin, Error>> + Send>> + where + RequestBuilder: 'static + Send + FnOnce(Hash, oneshot::Sender) -> AllMessages, + Response: Send; + + /// Request that a particular candidate is validated by `CandidateValidation`. + fn request_candidate_validation( + &self, + candidate: CandidateDescriptor, + pov: Arc, + ) -> Pin, Error>>>>; + + /// Request a PoV from `PoVDistribution` + fn request_pov( + &self, + candidate: CandidateDescriptor, + ) -> Pin>, Error>>>>; + + /// Request the current scheduler roster from the `RuntimeApi`. + fn request_scheduler_roster( + &self, + ) -> Pin, Error>>>>; + + /// Request the current signing context from the `RuntimeApi`. + fn request_signing_context( + &self, + ) -> Pin, Error>>>>; + + /// Request the current validator set from the `RuntimeApi`. + fn request_validators( + &self, + ) -> Pin>, Error>>>>; +} + +// We do not and cannot know the exact type of Job::FromJob, and it would be tedious for extension in the future +// to write a `FromJobTrait` which produced all necessary variants. Instead, we bound `Job::FromJob: TryFrom` +// so that we have a path to send the appropriate (limited) message through the job's outbound channel. +// +// This isn't an ideal API, as invalid requests become runtime errors instead of compile-time errors, +// but there isn't a better API apparent as of right now. +impl SubsystemRequests for Job +where + Job: JobTraitExt, + Job::FromJob: TryFrom, + >::Error: std::fmt::Debug, +{ + fn request_allmessages( + &self, + request_builder: RequestBuilder, + ) -> Pin, Error>> + Send>> + where + RequestBuilder: 'static + Send + FnOnce(Hash, oneshot::Sender) -> AllMessages, + Response: Send, + { + // although it would be cleaner to delegate to self.request(), doing so doesn't give us any good way + // to return an error in the event of conversion failure, so we just have to copy the implementation + // instead. + let mut sender = self.sender(); + let parent = self.relay_parent(); + + async move { + let (tx, rx) = oneshot::channel(); + + let msg = request_builder(parent, tx).try_into().map_err(|err| { + Error::SenderConversion(format!("could not construct AllMessages: {:?}", err)) + })?; + + sender.send(msg).await?; + + Ok(rx) + } + .boxed() + } + + fn request_candidate_validation( + &self, + candidate: CandidateDescriptor, + pov: Arc, + ) -> Pin, Error>>>> { + use messages::CandidateValidationMessage::ValidateFromChainState; + use AllMessages::CandidateValidation; + + let (tx, rx) = oneshot::channel(); + + let ram = self.request_allmessages(move |_parent, tx| { + CandidateValidation(ValidateFromChainState(candidate, pov, tx)) + }); + + async move { + let result = ram.await?.await??; + + tx.send(result)?; + + Ok(rx) + }.boxed() + } + + fn request_pov( + &self, + descriptor: CandidateDescriptor, + ) -> Pin>, Error>>>> { + use messages::PoVDistributionMessage::FetchPoV; + use AllMessages::PoVDistribution; + + self.request_allmessages(move |parent, tx| { + PoVDistribution(FetchPoV(parent, descriptor, tx)) + }) + } + + fn request_scheduler_roster( + &self, + ) -> Pin, Error>>>> { + unimplemented!() + } + + fn request_signing_context( + &self, + ) -> Pin, Error>>>> { + use messages::RuntimeApiMessage::Request; + use messages::RuntimeApiRequest::SigningContext; + use AllMessages::RuntimeApi; + + self + .request_allmessages(move |parent, tx| RuntimeApi(Request(parent, SigningContext(tx)))) + } + + fn request_validators( + &self, + ) -> Pin>, Error>>>> { + unimplemented!() + } +} + /// Jobs manager for a subsystem /// /// - Spawns new jobs for a given relay-parent on demand. @@ -451,13 +561,12 @@ where fn poll_next(self: Pin<&mut Self>, cx: &mut task::Context) -> task::Poll> { // pin-project the outgoing messages - self.project() - .outgoing_msgs - .poll_next(cx) - .map(|opt| opt.and_then(|(stream_yield, _)| match stream_yield { + self.project().outgoing_msgs.poll_next(cx).map(|opt| { + opt.and_then(|(stream_yield, _)| match stream_yield { StreamYield::Item(msg) => Some(msg), StreamYield::Finished(_) => None, - })) + }) + }) } } From e7f5276e27a6dfc2f6398061d62c1390e245bbfd Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Mon, 13 Jul 2020 17:28:46 +0200 Subject: [PATCH 28/32] Revert "reenvision the subsystem requests as an extension trait" This reverts commit a5639e36017a72656b478caddcaa30e2d4e6112a. The fact is, the new API is more complicated to no real benefit. --- node/core/backing/src/lib.rs | 74 +++++++-- node/subsystem/src/util.rs | 303 +++++++++++------------------------ 2 files changed, 154 insertions(+), 223 deletions(-) diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index 62974ced9d3c..d17eda17cec7 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -43,12 +43,13 @@ use polkadot_subsystem::{ messages::{ AllMessages, AvailabilityStoreMessage, CandidateBackingMessage, CandidateSelectionMessage, CandidateValidationMessage, NewBackedCandidate, PoVDistributionMessage, ProvisionableData, - ProvisionerMessage, RuntimeApiMessage, RuntimeApiRequest, StatementDistributionMessage, ValidationFailed, + ProvisionerMessage, RuntimeApiMessage, StatementDistributionMessage, ValidationFailed, }, util::{ self, - JobTrait, - JobTraitExt, + request_signing_context, + request_validator_groups, + request_validators, Validator, }, }; @@ -197,6 +198,23 @@ impl From for AllMessages { } } +impl TryFrom for FromJob { + type Error = &'static str; + + fn try_from(f: AllMessages) -> Result { + match f { + AllMessages::AvailabilityStore(msg) => Ok(FromJob::AvailabilityStore(msg)), + AllMessages::RuntimeApi(msg) => Ok(FromJob::RuntimeApiMessage(msg)), + AllMessages::CandidateValidation(msg) => Ok(FromJob::CandidateValidation(msg)), + AllMessages::CandidateSelection(msg) => Ok(FromJob::CandidateSelection(msg)), + AllMessages::StatementDistribution(msg) => Ok(FromJob::StatementDistribution(msg)), + AllMessages::PoVDistribution(msg) => Ok(FromJob::PoVDistribution(msg)), + AllMessages::Provisioner(msg) => Ok(FromJob::Provisioner(msg)), + _ => Err("can't convert this AllMessages variant to FromJob"), + } + } +} + // It looks like it's not possible to do an `impl From` given the current state of // the code. So this does the necessary conversion. fn primitive_statement_to_table(s: &SignedFullStatement) -> TableSignedStatement { @@ -529,6 +547,38 @@ impl CandidateBackingJob { Ok(()) } + async fn request_pov_from_distribution( + &mut self, + descriptor: CandidateDescriptor, + ) -> Result, Error> { + let (tx, rx) = oneshot::channel(); + + self.tx_from.send(FromJob::PoVDistribution( + PoVDistributionMessage::FetchPoV(self.parent, descriptor, tx) + )).await?; + + Ok(rx.await?) + } + + async fn request_candidate_validation( + &mut self, + candidate: CandidateDescriptor, + pov: Arc, + ) -> Result { + let (tx, rx) = oneshot::channel(); + + self.tx_from.send(FromJob::CandidateValidation( + CandidateValidationMessage::ValidateFromChainState( + candidate, + pov, + tx, + ) + ) + ).await?; + + Ok(rx.await??) + } + async fn store_chunk( &mut self, id: ValidatorIndex, @@ -606,7 +656,7 @@ impl CandidateBackingJob { } } -impl JobTrait for CandidateBackingJob { +impl util::JobTrait for CandidateBackingJob { type ToJob = ToJob; type FromJob = FromJob; type Error = Error; @@ -622,9 +672,9 @@ impl JobTrait for CandidateBackingJob { ) -> Pin> + Send>> { async move { let (validators, roster, signing_context) = futures::try_join!( - self.request_validators().await?, - self.request_validator_groups().await?, - self.request_signing_context().await?, + request_validators(parent, &mut tx_from).await?, + request_validator_groups(parent, &mut tx_from).await?, + request_signing_context(parent, &mut tx_from).await?, )?; let validator = Validator::construct(&validators, signing_context, keystore.clone())?; @@ -674,16 +724,6 @@ impl JobTrait for CandidateBackingJob { } } -impl JobTraitExt for CandidateBackingJob { - fn sender(&self) -> mpsc::Sender { - self.tx_from.clone() - } - - fn relay_parent(&self) -> Hash { - self.parent - } -} - /// An implementation of the Candidate Backing subsystem. pub type CandidateBackingSubsystem = util::JobManager; diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index e7e0f234857c..26d6ee3f6adc 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -21,7 +21,7 @@ //! this module. use crate::{ - messages::{self, AllMessages, SchedulerRoster}, + messages::{AllMessages, RuntimeApiMessage, RuntimeApiRequest, SchedulerRoster}, FromOverseer, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemResult, }; use futures::{ @@ -36,10 +36,9 @@ use futures_timer::Delay; use keystore::KeyStorePtr; use parity_scale_codec::Encode; use pin_project::{pin_project, pinned_drop}; -use polkadot_node_primitives::ValidationResult; use polkadot_primitives::v1::{ - CandidateDescriptor, EncodeAs, Hash, PoV, Signed, SigningContext, ValidatorId, ValidatorIndex, - ValidatorPair, + EncodeAs, Hash, HeadData, Id as ParaId, Signed, SigningContext, + ValidatorId, ValidatorIndex, ValidatorPair, }; use sp_core::Pair; use std::{ @@ -47,7 +46,6 @@ use std::{ convert::{TryFrom, TryInto}, marker::Unpin, pin::Pin, - sync::Arc, time::Duration, }; use streamunordered::{StreamUnordered, StreamYield}; @@ -69,12 +67,6 @@ pub enum Error { /// Attempted to spawn a new task, and failed #[from] Spawn(SpawnError), - /// Failed to determine whether a candidate was valid or invalid - #[from] - ValidationFailed(messages::ValidationFailed), - /// Failed to determine whether a candidate was valid or invalid, in a different way - #[from] - ValidationResult(ValidationResult), /// Attempted to convert from an AllMessages to a FromJob, and failed. SenderConversion(String), /// The local node is not a validator. @@ -83,6 +75,79 @@ pub enum Error { JobNotFound(Hash), } +/// Request some data from the `RuntimeApi`. +pub async fn request_from_runtime( + parent: Hash, + sender: &mut mpsc::Sender, + request_builder: RequestBuilder, +) -> Result, Error> +where + RequestBuilder: FnOnce(oneshot::Sender) -> RuntimeApiRequest, + FromJob: TryFrom, + >::Error: std::fmt::Debug, +{ + let (tx, rx) = oneshot::channel(); + + sender + .send( + AllMessages::RuntimeApi(RuntimeApiMessage::Request(parent, request_builder(tx))) + .try_into() + .map_err(|err| Error::SenderConversion(format!("{:?}", err)))?, + ) + .await?; + + Ok(rx) +} + +/// Request a validator set from the `RuntimeApi`. +pub async fn request_validators( + parent: Hash, + s: &mut mpsc::Sender, +) -> Result>, Error> +where + FromJob: TryFrom, + >::Error: std::fmt::Debug, +{ + request_from_runtime(parent, s, |tx| RuntimeApiRequest::Validators(tx)).await +} + +/// Request the scheduler roster from `RuntimeApi`. +pub async fn request_validator_groups( + parent: Hash, + s: &mut mpsc::Sender, +) -> Result, Error> +where + FromJob: TryFrom, + >::Error: std::fmt::Debug, +{ + request_from_runtime(parent, s, |tx| RuntimeApiRequest::ValidatorGroups(tx)).await +} + +/// Request a `SigningContext` from the `RuntimeApi`. +pub async fn request_signing_context( + parent: Hash, + s: &mut mpsc::Sender, +) -> Result, Error> +where + FromJob: TryFrom, + >::Error: std::fmt::Debug, +{ + request_from_runtime(parent, s, |tx| RuntimeApiRequest::SigningContext(tx)).await +} + +/// Request `HeadData` for some `ParaId` from `RuntimeApi`. +pub async fn request_head_data( + parent: Hash, + s: &mut mpsc::Sender, + id: ParaId, +) -> Result, Error> +where + FromJob: TryFrom, + >::Error: std::fmt::Debug, +{ + request_from_runtime(parent, s, |tx| RuntimeApiRequest::HeadData(id, tx)).await +} + /// From the given set of validators, find the first key we can sign with, if any. pub fn signing_key(validators: &[ValidatorId], keystore: &KeyStorePtr) -> Option { let keystore = keystore.read(); @@ -103,19 +168,29 @@ pub struct Validator { impl Validator { /// Get a struct representing this node's validator if this node is in fact a validator in the context of the given block. - pub async fn new( - job: Job, + pub async fn new( + parent: Hash, keystore: KeyStorePtr, - ) -> Result { + mut sender: mpsc::Sender, + ) -> Result + where + FromJob: TryFrom, + >::Error: std::fmt::Debug, + { + // Note: request_validators and request_signing_context do not and cannot run concurrently: they both + // have a mutable handle to the same sender. + // However, each of them returns a oneshot::Receiver, and those are resolved concurrently. let (validators, signing_context) = futures::try_join!( - job.request_validators().await?, - job.request_signing_context().await?, + request_validators(parent, &mut sender).await?, + request_signing_context(parent, &mut sender).await?, )?; Self::construct(&validators, signing_context, keystore) } - /// Construct a validator instance + /// Construct a validator instance without performing runtime fetches. + /// + /// This can be useful if external code also needs the same data. pub fn construct( validators: &[ValidatorId], signing_context: SigningContext, @@ -267,191 +342,6 @@ pub trait JobTrait: Unpin { } } -/// This trait provides a helper abstraction for sending a message to another subsystem -/// and collecting their response. -pub trait JobTraitExt: JobTrait { - /// Get a clone of the sender to the overseer. - fn sender(&self) -> mpsc::Sender; - - /// Get the relay parent for this job. - fn relay_parent(&self) -> Hash; - - /// Request some data from another subsystem via the Overseer. - /// - /// The arguments to `request_builder` are `parent_hash, response_sender`. - fn request( - &self, - request_builder: RequestBuilder, - ) -> Pin, Error>> + Send>> - where - RequestBuilder: 'static + Send + FnOnce(Hash, oneshot::Sender) -> Self::FromJob, - Response: Send, - { - let mut sender = self.sender(); - let parent = self.relay_parent(); - - async move { - let (tx, rx) = oneshot::channel(); - - sender.send(request_builder(parent, tx)).await?; - - Ok(rx) - } - .boxed() - } -} - -/// This trait enables a blanket impl of several useful getters. -/// -/// The blanket impl takes effect for any job for which `Job: JobTraitExt` and -/// `Job::FromJob: TryFrom`. -/// -/// It's distinct from JobTraitExt because it may be useful to `impl JobTraitExt` -/// for some `Job` for which `Job::FromJob: !TryFrom`, so we don't want -/// to tie this too tightly to that trait. -pub trait SubsystemRequests: JobTraitExt { - /// Request some data from another subsystem via the Overseer. - /// - /// The arguments to `request_builder` are `parent_hash, response_sender`. - /// - /// The difference between this methods and `self.request` is the return type of - /// `request_builder`: it returns an `AllMessages` instance instead of `Self::FromJob`. - fn request_allmessages( - &self, - request_builder: RequestBuilder, - ) -> Pin, Error>> + Send>> - where - RequestBuilder: 'static + Send + FnOnce(Hash, oneshot::Sender) -> AllMessages, - Response: Send; - - /// Request that a particular candidate is validated by `CandidateValidation`. - fn request_candidate_validation( - &self, - candidate: CandidateDescriptor, - pov: Arc, - ) -> Pin, Error>>>>; - - /// Request a PoV from `PoVDistribution` - fn request_pov( - &self, - candidate: CandidateDescriptor, - ) -> Pin>, Error>>>>; - - /// Request the current scheduler roster from the `RuntimeApi`. - fn request_scheduler_roster( - &self, - ) -> Pin, Error>>>>; - - /// Request the current signing context from the `RuntimeApi`. - fn request_signing_context( - &self, - ) -> Pin, Error>>>>; - - /// Request the current validator set from the `RuntimeApi`. - fn request_validators( - &self, - ) -> Pin>, Error>>>>; -} - -// We do not and cannot know the exact type of Job::FromJob, and it would be tedious for extension in the future -// to write a `FromJobTrait` which produced all necessary variants. Instead, we bound `Job::FromJob: TryFrom` -// so that we have a path to send the appropriate (limited) message through the job's outbound channel. -// -// This isn't an ideal API, as invalid requests become runtime errors instead of compile-time errors, -// but there isn't a better API apparent as of right now. -impl SubsystemRequests for Job -where - Job: JobTraitExt, - Job::FromJob: TryFrom, - >::Error: std::fmt::Debug, -{ - fn request_allmessages( - &self, - request_builder: RequestBuilder, - ) -> Pin, Error>> + Send>> - where - RequestBuilder: 'static + Send + FnOnce(Hash, oneshot::Sender) -> AllMessages, - Response: Send, - { - // although it would be cleaner to delegate to self.request(), doing so doesn't give us any good way - // to return an error in the event of conversion failure, so we just have to copy the implementation - // instead. - let mut sender = self.sender(); - let parent = self.relay_parent(); - - async move { - let (tx, rx) = oneshot::channel(); - - let msg = request_builder(parent, tx).try_into().map_err(|err| { - Error::SenderConversion(format!("could not construct AllMessages: {:?}", err)) - })?; - - sender.send(msg).await?; - - Ok(rx) - } - .boxed() - } - - fn request_candidate_validation( - &self, - candidate: CandidateDescriptor, - pov: Arc, - ) -> Pin, Error>>>> { - use messages::CandidateValidationMessage::ValidateFromChainState; - use AllMessages::CandidateValidation; - - let (tx, rx) = oneshot::channel(); - - let ram = self.request_allmessages(move |_parent, tx| { - CandidateValidation(ValidateFromChainState(candidate, pov, tx)) - }); - - async move { - let result = ram.await?.await??; - - tx.send(result)?; - - Ok(rx) - }.boxed() - } - - fn request_pov( - &self, - descriptor: CandidateDescriptor, - ) -> Pin>, Error>>>> { - use messages::PoVDistributionMessage::FetchPoV; - use AllMessages::PoVDistribution; - - self.request_allmessages(move |parent, tx| { - PoVDistribution(FetchPoV(parent, descriptor, tx)) - }) - } - - fn request_scheduler_roster( - &self, - ) -> Pin, Error>>>> { - unimplemented!() - } - - fn request_signing_context( - &self, - ) -> Pin, Error>>>> { - use messages::RuntimeApiMessage::Request; - use messages::RuntimeApiRequest::SigningContext; - use AllMessages::RuntimeApi; - - self - .request_allmessages(move |parent, tx| RuntimeApi(Request(parent, SigningContext(tx)))) - } - - fn request_validators( - &self, - ) -> Pin>, Error>>>> { - unimplemented!() - } -} - /// Jobs manager for a subsystem /// /// - Spawns new jobs for a given relay-parent on demand. @@ -561,12 +451,13 @@ where fn poll_next(self: Pin<&mut Self>, cx: &mut task::Context) -> task::Poll> { // pin-project the outgoing messages - self.project().outgoing_msgs.poll_next(cx).map(|opt| { - opt.and_then(|(stream_yield, _)| match stream_yield { + self.project() + .outgoing_msgs + .poll_next(cx) + .map(|opt| opt.and_then(|(stream_yield, _)| match stream_yield { StreamYield::Item(msg) => Some(msg), StreamYield::Finished(_) => None, - }) - }) + })) } } From 0eaca1ba70ac5d73b3db4bce401cb766148b4ef4 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Tue, 14 Jul 2020 09:46:45 +0200 Subject: [PATCH 29/32] apply suggested futuresunordered join_all impl --- node/subsystem/src/util.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index 26d6ee3f6adc..c6a2b9332cfc 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -537,7 +537,21 @@ where Ok(Signal(Conclude)) => { // Breaking the loop ends fn run, which drops `jobs`, which immediately drops all ongoing work. // We can afford to wait a little while to shut them all down properly before doing that. - future::join_all(jobs.running.drain().map(|(_, handle)| handle.stop())).await; + // + // Forwarding the stream to a drain means we wait until all of the items in the stream + // have completed. Contrast with `into_future`, which turns it into a future of `(head, rest_stream)`. + use futures::stream::StreamExt; + use futures::stream::FuturesUnordered; + + let unordered = jobs.running + .drain() + .map(|(_, handle)| handle.stop()) + .collect::>(); + // now wait for all the futures to complete; collect a vector of their results + // this is strictly less efficient than draining them into oblivion, but this compiles, and that doesn't + // https://github.com/paritytech/polkadot/pull/1376#pullrequestreview-446488645 + let _ = async move { unordered.collect::>() }.await; + return true; } Ok(Communication { msg }) => { From 863d7334ff1e580de5c5a48507825cc1766aac94 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Tue, 14 Jul 2020 09:50:58 +0200 Subject: [PATCH 30/32] CandidateValidationMessage variants have no top-level relay parents --- node/subsystem/src/messages.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node/subsystem/src/messages.rs b/node/subsystem/src/messages.rs index bab18527f56d..d3c630cb56f0 100644 --- a/node/subsystem/src/messages.rs +++ b/node/subsystem/src/messages.rs @@ -127,8 +127,8 @@ impl CandidateValidationMessage { /// If the current variant contains the relay parent hash, return it. pub fn relay_parent(&self) -> Option { match self { - Self::ValidateFromChainState(CandidateDescriptor{ relay_parent, ..}, _, _) => Some(*relay_parent), - Self::ValidateFromExhaustive(_, _, CandidateDescriptor{ relay_parent, ..}, _, _) => Some(*relay_parent), + Self::ValidateFromChainState(_, _, _) => None, + Self::ValidateFromExhaustive(_, _, _, _, _) => None, } } } From 1e14746c16d698a6d5bdac0af4f28f0ed937243a Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Tue, 14 Jul 2020 09:54:16 +0200 Subject: [PATCH 31/32] rename handle_orphan_msg -> handle_unanchored_msg --- node/subsystem/src/util.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node/subsystem/src/util.rs b/node/subsystem/src/util.rs index c6a2b9332cfc..1b89bfb053a6 100644 --- a/node/subsystem/src/util.rs +++ b/node/subsystem/src/util.rs @@ -337,7 +337,7 @@ pub trait JobTrait: Unpin { // once we're implementing a subsystem which actually needs this feature. // In particular, we're quite likely to want this to return a future instead of // interrupting the active thread for the duration of the handler. - fn handle_orphan_msg(_msg: Self::ToJob) -> Result<(), Self::Error> { + fn handle_unanchored_msg(_msg: Self::ToJob) -> Result<(), Self::Error> { Ok(()) } } @@ -564,7 +564,7 @@ where } } None => { - if let Err(err) = Job::handle_orphan_msg(to_job) { + if let Err(err) = Job::handle_unanchored_msg(to_job) { log::error!("Failed to handle unhashed message: {:?}", err); return true; } From e3f76a44f98116e4bfbb47fcbecc038ab4d30463 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Tue, 14 Jul 2020 10:24:48 +0200 Subject: [PATCH 32/32] make most node-core-backing types private Now the only public types exposed in that module are CandidateBackingSubsystem and ToJob. While ideally we could reduce the public interface to only the former type, that doesn't work because ToJob appears in the public interface of CandidateBackingSubsystem. This also involves changing the definition of CandidateBackingSubsystem; it is no longer a typedef, but a struct wrapping the job manager. --- node/core/backing/src/lib.rs | 49 +++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/node/core/backing/src/lib.rs b/node/core/backing/src/lib.rs index d17eda17cec7..e0309ec8428e 100644 --- a/node/core/backing/src/lib.rs +++ b/node/core/backing/src/lib.rs @@ -24,7 +24,7 @@ use std::sync::Arc; use bitvec::vec::BitVec; use futures::{ channel::{mpsc, oneshot}, - task::SpawnError, + task::{Spawn, SpawnError}, Future, FutureExt, SinkExt, StreamExt, }; @@ -40,6 +40,7 @@ use polkadot_node_primitives::{ ValidationOutputs, ValidationResult, }; use polkadot_subsystem::{ + Subsystem, SubsystemContext, SpawnedSubsystem, messages::{ AllMessages, AvailabilityStoreMessage, CandidateBackingMessage, CandidateSelectionMessage, CandidateValidationMessage, NewBackedCandidate, PoVDistributionMessage, ProvisionableData, @@ -64,7 +65,7 @@ use statement_table::{ }; #[derive(Debug, derive_more::From)] -pub enum Error { +enum Error { CandidateNotFound, InvalidSignature, #[from] @@ -82,7 +83,7 @@ pub enum Error { } /// Holds all data needed for candidate backing job operation. -pub struct CandidateBackingJob { +struct CandidateBackingJob { /// The hash of the relay parent on top of which this job is doing it's work. parent: Hash, /// Inbound message channel receiving part. @@ -174,7 +175,7 @@ impl util::ToJobTrait for ToJob { } /// A message type that is sent from `CandidateBackingJob` to `CandidateBackingSubsystem`. -pub enum FromJob { +enum FromJob { AvailabilityStore(AvailabilityStoreMessage), RuntimeApiMessage(RuntimeApiMessage), CandidateValidation(CandidateValidationMessage), @@ -724,9 +725,45 @@ impl util::JobTrait for CandidateBackingJob { } } +/// Manager type for the CandidateBackingSubsystem +type Manager = util::JobManager; + /// An implementation of the Candidate Backing subsystem. -pub type CandidateBackingSubsystem = - util::JobManager; +pub struct CandidateBackingSubsystem { + manager: Manager, +} + +impl CandidateBackingSubsystem +where + Spawner: Clone + Spawn + Send + Unpin, + Context: SubsystemContext, + ToJob: From<::Message>, +{ + /// Creates a new `CandidateBackingSubsystem`. + pub fn new(spawner: Spawner, keystore: KeyStorePtr) -> Self { + CandidateBackingSubsystem { + manager: util::JobManager::new(spawner, keystore) + } + } + + /// Run this subsystem + pub async fn run(ctx: Context, keystore: KeyStorePtr, spawner: Spawner) { + >::run(ctx, keystore, spawner).await + } +} + +impl Subsystem for CandidateBackingSubsystem +where + Spawner: Spawn + Send + Clone + Unpin + 'static, + Context: SubsystemContext, + ::Message: Into, +{ + fn start(self, ctx: Context) -> SpawnedSubsystem { + self.manager.start(ctx) + } +} + + #[cfg(test)] mod tests {