Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] feat(consensus): committees data extractor (BFT-443) #2518

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

This file was deleted.

1 change: 1 addition & 0 deletions core/lib/dal/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ zksync_contracts.workspace = true
zksync_types.workspace = true
zksync_consensus_roles.workspace = true
zksync_consensus_storage.workspace = true
zksync_consensus_crypto.workspace = true
zksync_protobuf.workspace = true
zksync_db_connection.workspace = true

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ALTER TABLE l1_batches_consensus
DROP COLUMN committee;

ALTER TABLE l1_batches_consensus
ALTER COLUMN certificate SET NOT NULL;
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
ALTER TABLE l1_batches_consensus
ADD committee JSONB NOT NULL;

ALTER TABLE l1_batches_consensus
ALTER COLUMN certificate DROP NOT NULL;

--ALTER TABLE l1_batches_consensus DROP CONSTRAINT l1_batches_consensus_certificate_check;
--
--ALTER TABLE l1_batches_consensus ADD CONSTRAINT l1_batches_consensus_certificate_check
--CHECK (certificate IS NULL OR ((certificate->'message'->>'number')::numeric = l1_batch_number));
--
117 changes: 116 additions & 1 deletion core/lib/dal/src/consensus/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ pub mod proto;
mod tests;

use anyhow::{anyhow, Context as _};
use zksync_consensus_roles::validator;
use zksync_consensus_crypto::ByteFmt;
use zksync_consensus_roles::{attester, validator};
use zksync_protobuf::{required, ProtoFmt, ProtoRepr};
use zksync_types::{
abi, ethabi,
Expand Down Expand Up @@ -439,3 +440,117 @@ impl ProtoRepr for proto::Transaction {
}
}
}

#[derive(Debug, PartialEq)]
pub struct Validator {
/// Validator public key.
pub pub_key: validator::PublicKey,
/// Validator weight inside a Committee.
pub weight: validator::Weight,
/// Proof-of-possession (PoP) of the validator's public key (a signature over the public key)
pub proof_of_possession: validator::Signature,
}

impl ProtoFmt for Validator {
type Proto = proto::Validator;

fn read(r: &Self::Proto) -> anyhow::Result<Self> {
Ok(Self {
pub_key: required(&r.pub_key)
.map(|x| validator::PublicKey::decode(x))
.context("pub_key")??,
weight: *required(&r.weight).context("weight")?,
proof_of_possession: required(&r.proof_of_possession)
.map(|x| validator::Signature::decode(x))
.context("proof_of_possession")??,
})
}

fn build(&self) -> Self::Proto {
Self::Proto {
pub_key: Some(self.pub_key.encode()),
weight: Some(self.weight),
proof_of_possession: Some(self.proof_of_possession.encode()),
}
}
}

#[derive(Debug, PartialEq)]
pub struct ValidatorCommittee {
pub members: Vec<Validator>,
}

impl ProtoFmt for ValidatorCommittee {
type Proto = proto::ValidatorCommittee;

fn read(r: &Self::Proto) -> anyhow::Result<Self> {
Ok(Self {
members: r
.members
.iter()
.map(ProtoFmt::read)
.collect::<Result<_, _>>()
.context("members")?,
})
}

fn build(&self) -> Self::Proto {
Self::Proto {
members: self.members.iter().map(|x| x.build()).collect(),
}
}
}

#[derive(Debug, PartialEq, Clone)]
pub struct Attester {
/// Attester public key.
pub pub_key: attester::PublicKey,
/// Attester weight inside a Committee.
pub weight: attester::Weight,
}

impl ProtoFmt for Attester {
type Proto = proto::Attester;

fn read(r: &Self::Proto) -> anyhow::Result<Self> {
Ok(Self {
pub_key: required(&r.pub_key)
.map(|x| attester::PublicKey::decode(x))
.context("pub_key")??,
weight: *required(&r.weight).context("weight")?,
})
}

fn build(&self) -> Self::Proto {
Self::Proto {
pub_key: Some(self.pub_key.encode()),
weight: Some(self.weight),
}
}
}

#[derive(Debug, PartialEq, Default, Clone)]
pub struct AttesterCommittee {
pub members: Vec<Attester>,
}

impl ProtoFmt for AttesterCommittee {
type Proto = proto::AttesterCommittee;

fn read(r: &Self::Proto) -> anyhow::Result<Self> {
Ok(Self {
members: r
.members
.iter()
.map(ProtoFmt::read)
.collect::<Result<_, _>>()
.context("members")?,
})
}

fn build(&self) -> Self::Proto {
Self::Proto {
members: self.members.iter().map(|x| x.build()).collect(),
}
}
}
19 changes: 19 additions & 0 deletions core/lib/dal/src/consensus/proto/mod.proto
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,22 @@ message PaymasterParams {
optional bytes paymaster_address = 1; // required; H160
optional bytes paymaster_input = 2; // required
}

message Validator {
optional bytes pub_key = 1; // required
optional uint64 weight = 2; // required
optional bytes proof_of_possession = 3; // required
}

message Attester {
optional bytes pub_key = 1; // required
optional uint64 weight = 2; // required
}

message ValidatorCommittee {
repeated Validator members = 1; // required
}

message AttesterCommittee {
repeated Attester members = 1; // required
}
118 changes: 109 additions & 9 deletions core/lib/dal/src/consensus_dal.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
use anyhow::Context as _;
use bigdecimal::Zero as _;
use zksync_consensus_roles::{attester, validator};
use zksync_consensus_roles::{
attester, attester::BatchNumber, validator, validator::WeightedValidator,
};
use zksync_consensus_storage::{BlockStoreState, ReplicaState};
use zksync_db_connection::{
connection::Connection,
error::{DalError, DalResult, SqlxContext},
instrument::{InstrumentExt, Instrumented},
};
use zksync_protobuf::ProtoFmt;
use zksync_types::L2BlockNumber;

pub use crate::consensus::Payload;
use crate::{Core, CoreDal};
pub use crate::consensus::{Attester, AttesterCommittee, Payload, Validator, ValidatorCommittee};
use crate::{consensus, Core, CoreDal};

/// Storage access methods for `zksync_core::consensus` module.
#[derive(Debug)]
Expand Down Expand Up @@ -317,7 +320,36 @@ impl ConsensusDal<'_, '_> {
else {
return Ok(None);
};
Ok(Some(zksync_protobuf::serde::deserialize(row.certificate)?))
let Some(certificate) = row.certificate else {
return Ok(None);
};
Ok(Some(zksync_protobuf::serde::deserialize(certificate)?))
}

pub async fn batch_committee(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be great to state it in docstrings that this returns the committee which should attest to the batch_number, and not the one which is the result of executing the batch, ie. the ones attesting for batch_number+1.

&mut self,
batch_number: attester::BatchNumber,
) -> anyhow::Result<Option<AttesterCommittee>> {
let Some(row) = sqlx::query!(
r#"
SELECT
committee
FROM
l1_batches_consensus
WHERE
l1_batch_number = $1
"#,
i64::try_from(batch_number.0)?
)
.instrument("batch_committee")
.report_latency()
.fetch_optional(self.storage)
.await?
else {
return Ok(None);
};

Ok(Some(zksync_protobuf::serde::deserialize(row.committee)?))
}

/// Fetches a range of L2 blocks from storage and converts them to `Payload`s.
Expand Down Expand Up @@ -402,6 +434,32 @@ impl ConsensusDal<'_, '_> {
Ok(())
}

pub async fn insert_batch_committee(
&mut self,
number: BatchNumber,
committee: AttesterCommittee,
) -> anyhow::Result<()> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we are going to query the last batch with committees/certificate, then keeping them in the same table makes little sense, because we will need extra indices for efficient lookups. Do we really want to go this way?

Copy link
Contributor Author

@moshababo moshababo Jul 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Querying the last batch with committees is a one-time read on server start, so I didn’t thought it’s a big deal. And I don’t see such an index for the query of the last batch with certificate (unless it’s applied internally via the toolchain).

In the overall, I’m not against separating the tables, and originally suggested that. IIRC @RomanBrodetski thought it’s an overkill, and that having all info in respect to a given batch in one place is useful.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is no index, because the the table serves as index for itself when it has just 1 non null column. With 2 columns, it won't be the case.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because we will need extra indices for efficient lookups.

why exactly? select ... order by ID desc where certificate is not null will only scan the rows for which there is committee but not certificate. Given that it happens on startup, it shouldn't be a problem - am I missing smt?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is no index, because the the table serves as index for itself when it has just 1 non null column. With 2 columns, it won't be the case.

Right, I overlooked that in that case there's no need for WHERE clause.

why exactly? select ... order by ID desc where certificate is not null will only scan the rows for which there is committee but not certificate. Given that it happens on startup, it shouldn't be a problem - am I missing smt?

If column X is NOT NULL, there's no need for WHERE X IS NOT NULL, and SELECT MAX(PK) alone is sufficient.

Since the NULL column is the certificate, and not the committee, this won't happen just on startup. However, I'm not sure about the frequency of this query and whether it justifies an index.

Copy link
Contributor

@aakoshh aakoshh Aug 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW I would keep the committee in a separate table. As I understand we are doing this without concern for performance, but there are many ways to store the committee, and some don't involve repeating the entire committee in every batch, only when it changes. To keep our options open for future changes, I would keep them separate from the beginning, rather than start messing around with existing constraints to make room for this in the same table, having to revisit the (luckily few) existing queries where we now have to add filtering, turning inserts into updates.

let res = sqlx::query!(
r#"
INSERT INTO
l1_batches_consensus (l1_batch_number, certificate, committee, created_at, updated_at)
VALUES
($1, NULL, $2, NOW(), NOW())
ON CONFLICT (l1_batch_number) DO NOTHING
"#,
i64::try_from(number.0).context("overflow")?.into(),
zksync_protobuf::serde::serialize(&committee, serde_json::value::Serializer).unwrap(),
)
.instrument("insert_batch_committee")
.report_latency()
.execute(self.storage)
.await?;
if res.rows_affected().is_zero() {
tracing::debug!(l1_batch_number = ?number, "duplicate batch certificate");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
tracing::debug!(l1_batch_number = ?number, "duplicate batch certificate");
tracing::debug!(l1_batch_number = ?number, "duplicate batch committee");

}
Ok(())
}

/// Inserts a certificate for the L1 batch.
/// Noop if a certificate for the same L1 batch is already present.
/// No verification is performed - it cannot be performed due to circular dependency on
Expand All @@ -412,11 +470,12 @@ impl ConsensusDal<'_, '_> {
) -> anyhow::Result<()> {
let res = sqlx::query!(
r#"
INSERT INTO
l1_batches_consensus (l1_batch_number, certificate, created_at, updated_at)
VALUES
($1, $2, NOW(), NOW())
ON CONFLICT (l1_batch_number) DO NOTHING
UPDATE l1_batches_consensus
SET
certificate = $2,
updated_at = NOW()
WHERE
l1_batch_number = $1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The insertion of committees happens asynchronously. In theory the certificate could arrive before the polling loop tries to extract the committee from the VM, in which case this update would not do anything, and the certificate would be lost.

Also the logging that happens as "duplicate certificate" when 0 rows are affected would be untrue, it would apply for a case described above.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On second thought this shouldn't happen: before the certificate for batch N can be verified the node needs the committee from the execution of batch N-1, so I assume there is plenty of time between when the committee is extracted from N-1 and inserted for N, especially because we need the commitment for batch N too.

Still it feels like having two tables would avoid any confusion.

"#,
i64::try_from(cert.message.number.0).context("overflow")?,
// Unwrap is ok, because serialization should always succeed.
Expand All @@ -443,6 +502,8 @@ impl ConsensusDal<'_, '_> {
MAX(l1_batch_number) AS "number"
FROM
l1_batches_consensus
WHERE
certificate IS NOT NULL
"#
)
.instrument("get_last_batch_certificate_number")
Expand Down Expand Up @@ -491,6 +552,45 @@ impl ConsensusDal<'_, '_> {
// and doesn't have prunning enabled.
Ok(attester::BatchNumber(0))
}

pub async fn get_last_batch_committee_number(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some docstrings to explain what this method is supposed to do? It sounds like "the committee number of the last batch" but there is no such thing. Is it the batch number of the last executed batch, for which a committee is known? Isn't that available somewhere else?

&mut self,
) -> anyhow::Result<Option<attester::BatchNumber>> {
let row = sqlx::query!(
r#"
SELECT
MAX(l1_batch_number) AS "number"
FROM
l1_batches_consensus
"#
)
.instrument("get_last_batch_committee_number")
.report_latency()
.fetch_one(self.storage)
.await?;

let Some(n) = row.number else {
return Ok(None);
};
Ok(Some(attester::BatchNumber(
n.try_into().context("overflow")?,
)))
}

pub async fn next_batch_to_extract_committee(
&mut self,
) -> anyhow::Result<attester::BatchNumber> {
// First batch that we don't have a committee for.
if let Some(last) = self
.get_last_batch_committee_number()
.await
.context("get_last_batch_certificate_number()")?
{
return Ok(last + 1);
}

Ok(attester::BatchNumber(0))
}
}

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion core/node/consensus/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use zksync_config::{
};
use zksync_consensus_crypto::{Text, TextFmt};
use zksync_consensus_executor as executor;
use zksync_consensus_roles::{attester, node, validator};
use zksync_consensus_roles::{attester, node, validator, validator::Signature};

fn read_secret_text<T: TextFmt>(text: Option<&Secret<String>>) -> anyhow::Result<Option<T>> {
text.map(|text| Text::new(text.expose_secret()).decode())
Expand Down
Loading
Loading