From afbea8da8a618f428cb6bd2450428e5819819f2d Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Fri, 21 Jun 2024 11:46:55 -0600 Subject: [PATCH 01/17] chore: copy existing protocol into new --- common/types/proposal.go | 2 + go.mod | 1 + go.sum | 2 + hare4/compat/weakcoin.go | 34 ++ hare4/eligibility/interface.go | 26 + hare4/eligibility/mocks.go | 320 ++++++++++ hare4/eligibility/oracle.go | 577 ++++++++++++++++++ hare4/eligibility/oracle_scale.go | 76 +++ hare4/eligibility/oracle_test.go | 965 +++++++++++++++++++++++++++++ hare4/hare.go | 632 +++++++++++++++++++ hare4/hare_test.go | 972 ++++++++++++++++++++++++++++++ hare4/legacy_oracle.go | 70 +++ hare4/malfeasance.go | 96 +++ hare4/malfeasance_test.go | 310 ++++++++++ hare4/metrics.go | 66 ++ hare4/protocol.go | 630 +++++++++++++++++++ hare4/protocol_test.go | 584 ++++++++++++++++++ hare4/tracer.go | 25 + hare4/types.go | 172 ++++++ hare4/types_scale.go | 200 ++++++ hare4/types_test.go | 41 ++ 21 files changed, 5801 insertions(+) create mode 100644 hare4/compat/weakcoin.go create mode 100644 hare4/eligibility/interface.go create mode 100644 hare4/eligibility/mocks.go create mode 100644 hare4/eligibility/oracle.go create mode 100644 hare4/eligibility/oracle_scale.go create mode 100644 hare4/eligibility/oracle_test.go create mode 100644 hare4/hare.go create mode 100644 hare4/hare_test.go create mode 100644 hare4/legacy_oracle.go create mode 100644 hare4/malfeasance.go create mode 100644 hare4/malfeasance_test.go create mode 100644 hare4/metrics.go create mode 100644 hare4/protocol.go create mode 100644 hare4/protocol_test.go create mode 100644 hare4/tracer.go create mode 100644 hare4/types.go create mode 100644 hare4/types_scale.go create mode 100644 hare4/types_test.go diff --git a/common/types/proposal.go b/common/types/proposal.go index 37c363e4ff..dfb9238a91 100644 --- a/common/types/proposal.go +++ b/common/types/proposal.go @@ -28,6 +28,8 @@ type ProposalID Hash20 // EmptyProposalID is a canonical empty ProposalID. var EmptyProposalID = ProposalID{} +type CompactProposalID [2]byte + // EncodeScale implements scale codec interface. func (id *ProposalID) EncodeScale(e *scale.Encoder) (int, error) { return scale.EncodeByteArray(e, id[:]) diff --git a/go.mod b/go.mod index ac50366248..ab5853e45e 100644 --- a/go.mod +++ b/go.mod @@ -74,6 +74,7 @@ require ( cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect cloud.google.com/go/compute/metadata v0.3.0 // indirect cloud.google.com/go/iam v1.1.8 // indirect + github.com/aead/siphash v1.0.1 // indirect github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect github.com/anacrolix/chansync v0.3.0 // indirect github.com/anacrolix/missinggo v1.2.1 // indirect diff --git a/go.sum b/go.sum index 2ddced53c9..5a4bd90e5d 100644 --- a/go.sum +++ b/go.sum @@ -25,6 +25,8 @@ github.com/ALTree/bigfloat v0.2.0 h1:AwNzawrpFuw55/YDVlcPw0F0cmmXrmngBHhVrvdXPvM github.com/ALTree/bigfloat v0.2.0/go.mod h1:+NaH2gLeY6RPBPPQf4aRotPPStg+eXc8f9ZaE4vRfD4= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/RoaringBitmap/roaring v0.4.7/go.mod h1:8khRDP4HmeXns4xIj9oGrKSz7XTQiJx2zgh7AcNke4w= +github.com/aead/siphash v1.0.1 h1:FwHfE/T45KPKYuuSAKyyvE+oPWcaQ+CUmFW0bPlM+kg= +github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= github.com/anacrolix/chansync v0.3.0 h1:lRu9tbeuw3wl+PhMu/r+JJCRu5ArFXIluOgdF0ao6/U= diff --git a/hare4/compat/weakcoin.go b/hare4/compat/weakcoin.go new file mode 100644 index 0000000000..79404aceb9 --- /dev/null +++ b/hare4/compat/weakcoin.go @@ -0,0 +1,34 @@ +package compat + +import ( + "context" + + "go.uber.org/zap" + + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/hare3" +) + +type weakCoin interface { + Set(types.LayerID, bool) error +} + +func ReportWeakcoin(ctx context.Context, logger *zap.Logger, from <-chan hare3.WeakCoinOutput, to weakCoin) { + for { + select { + case <-ctx.Done(): + logger.Info("weak coin reporter exited") + return + case out, open := <-from: + if !open { + return + } + if err := to.Set(out.Layer, out.Coin); err != nil { + logger.Error("failed to update weakcoin", + zap.Uint32("lid", out.Layer.Uint32()), + zap.Error(err), + ) + } + } + } +} diff --git a/hare4/eligibility/interface.go b/hare4/eligibility/interface.go new file mode 100644 index 0000000000..e87f959dc5 --- /dev/null +++ b/hare4/eligibility/interface.go @@ -0,0 +1,26 @@ +package eligibility + +import ( + "context" + + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/signing" +) + +//go:generate mockgen -typed -package=eligibility -destination=./mocks.go -source=./interface.go + +type activeSetCache interface { + Add(key types.EpochID, value *cachedActiveSet) (evicted bool) + Get(key types.EpochID) (value *cachedActiveSet, ok bool) +} + +type vrfVerifier interface { + Verify(nodeID types.NodeID, msg []byte, sig types.VrfSignature) bool +} + +// Rolacle is the roles oracle provider. +type Rolacle interface { + Validate(context.Context, types.LayerID, uint32, int, types.NodeID, types.VrfSignature, uint16) (bool, error) + CalcEligibility(context.Context, types.LayerID, uint32, int, types.NodeID, types.VrfSignature) (uint16, error) + Proof(context.Context, *signing.VRFSigner, types.LayerID, uint32) (types.VrfSignature, error) +} diff --git a/hare4/eligibility/mocks.go b/hare4/eligibility/mocks.go new file mode 100644 index 0000000000..e99c29ac08 --- /dev/null +++ b/hare4/eligibility/mocks.go @@ -0,0 +1,320 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: ./interface.go +// +// Generated by this command: +// +// mockgen -typed -package=eligibility -destination=./mocks.go -source=./interface.go +// + +// Package eligibility is a generated GoMock package. +package eligibility + +import ( + context "context" + reflect "reflect" + + types "github.com/spacemeshos/go-spacemesh/common/types" + signing "github.com/spacemeshos/go-spacemesh/signing" + gomock "go.uber.org/mock/gomock" +) + +// MockactiveSetCache is a mock of activeSetCache interface. +type MockactiveSetCache struct { + ctrl *gomock.Controller + recorder *MockactiveSetCacheMockRecorder +} + +// MockactiveSetCacheMockRecorder is the mock recorder for MockactiveSetCache. +type MockactiveSetCacheMockRecorder struct { + mock *MockactiveSetCache +} + +// NewMockactiveSetCache creates a new mock instance. +func NewMockactiveSetCache(ctrl *gomock.Controller) *MockactiveSetCache { + mock := &MockactiveSetCache{ctrl: ctrl} + mock.recorder = &MockactiveSetCacheMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockactiveSetCache) EXPECT() *MockactiveSetCacheMockRecorder { + return m.recorder +} + +// Add mocks base method. +func (m *MockactiveSetCache) Add(key types.EpochID, value *cachedActiveSet) bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Add", key, value) + ret0, _ := ret[0].(bool) + return ret0 +} + +// Add indicates an expected call of Add. +func (mr *MockactiveSetCacheMockRecorder) Add(key, value any) *MockactiveSetCacheAddCall { + mr.mock.ctrl.T.Helper() + call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Add", reflect.TypeOf((*MockactiveSetCache)(nil).Add), key, value) + return &MockactiveSetCacheAddCall{Call: call} +} + +// MockactiveSetCacheAddCall wrap *gomock.Call +type MockactiveSetCacheAddCall struct { + *gomock.Call +} + +// Return rewrite *gomock.Call.Return +func (c *MockactiveSetCacheAddCall) Return(evicted bool) *MockactiveSetCacheAddCall { + c.Call = c.Call.Return(evicted) + return c +} + +// Do rewrite *gomock.Call.Do +func (c *MockactiveSetCacheAddCall) Do(f func(types.EpochID, *cachedActiveSet) bool) *MockactiveSetCacheAddCall { + c.Call = c.Call.Do(f) + return c +} + +// DoAndReturn rewrite *gomock.Call.DoAndReturn +func (c *MockactiveSetCacheAddCall) DoAndReturn(f func(types.EpochID, *cachedActiveSet) bool) *MockactiveSetCacheAddCall { + c.Call = c.Call.DoAndReturn(f) + return c +} + +// Get mocks base method. +func (m *MockactiveSetCache) Get(key types.EpochID) (*cachedActiveSet, bool) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Get", key) + ret0, _ := ret[0].(*cachedActiveSet) + ret1, _ := ret[1].(bool) + return ret0, ret1 +} + +// Get indicates an expected call of Get. +func (mr *MockactiveSetCacheMockRecorder) Get(key any) *MockactiveSetCacheGetCall { + mr.mock.ctrl.T.Helper() + call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Get", reflect.TypeOf((*MockactiveSetCache)(nil).Get), key) + return &MockactiveSetCacheGetCall{Call: call} +} + +// MockactiveSetCacheGetCall wrap *gomock.Call +type MockactiveSetCacheGetCall struct { + *gomock.Call +} + +// Return rewrite *gomock.Call.Return +func (c *MockactiveSetCacheGetCall) Return(value *cachedActiveSet, ok bool) *MockactiveSetCacheGetCall { + c.Call = c.Call.Return(value, ok) + return c +} + +// Do rewrite *gomock.Call.Do +func (c *MockactiveSetCacheGetCall) Do(f func(types.EpochID) (*cachedActiveSet, bool)) *MockactiveSetCacheGetCall { + c.Call = c.Call.Do(f) + return c +} + +// DoAndReturn rewrite *gomock.Call.DoAndReturn +func (c *MockactiveSetCacheGetCall) DoAndReturn(f func(types.EpochID) (*cachedActiveSet, bool)) *MockactiveSetCacheGetCall { + c.Call = c.Call.DoAndReturn(f) + return c +} + +// MockvrfVerifier is a mock of vrfVerifier interface. +type MockvrfVerifier struct { + ctrl *gomock.Controller + recorder *MockvrfVerifierMockRecorder +} + +// MockvrfVerifierMockRecorder is the mock recorder for MockvrfVerifier. +type MockvrfVerifierMockRecorder struct { + mock *MockvrfVerifier +} + +// NewMockvrfVerifier creates a new mock instance. +func NewMockvrfVerifier(ctrl *gomock.Controller) *MockvrfVerifier { + mock := &MockvrfVerifier{ctrl: ctrl} + mock.recorder = &MockvrfVerifierMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockvrfVerifier) EXPECT() *MockvrfVerifierMockRecorder { + return m.recorder +} + +// Verify mocks base method. +func (m *MockvrfVerifier) Verify(nodeID types.NodeID, msg []byte, sig types.VrfSignature) bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Verify", nodeID, msg, sig) + ret0, _ := ret[0].(bool) + return ret0 +} + +// Verify indicates an expected call of Verify. +func (mr *MockvrfVerifierMockRecorder) Verify(nodeID, msg, sig any) *MockvrfVerifierVerifyCall { + mr.mock.ctrl.T.Helper() + call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Verify", reflect.TypeOf((*MockvrfVerifier)(nil).Verify), nodeID, msg, sig) + return &MockvrfVerifierVerifyCall{Call: call} +} + +// MockvrfVerifierVerifyCall wrap *gomock.Call +type MockvrfVerifierVerifyCall struct { + *gomock.Call +} + +// Return rewrite *gomock.Call.Return +func (c *MockvrfVerifierVerifyCall) Return(arg0 bool) *MockvrfVerifierVerifyCall { + c.Call = c.Call.Return(arg0) + return c +} + +// Do rewrite *gomock.Call.Do +func (c *MockvrfVerifierVerifyCall) Do(f func(types.NodeID, []byte, types.VrfSignature) bool) *MockvrfVerifierVerifyCall { + c.Call = c.Call.Do(f) + return c +} + +// DoAndReturn rewrite *gomock.Call.DoAndReturn +func (c *MockvrfVerifierVerifyCall) DoAndReturn(f func(types.NodeID, []byte, types.VrfSignature) bool) *MockvrfVerifierVerifyCall { + c.Call = c.Call.DoAndReturn(f) + return c +} + +// MockRolacle is a mock of Rolacle interface. +type MockRolacle struct { + ctrl *gomock.Controller + recorder *MockRolacleMockRecorder +} + +// MockRolacleMockRecorder is the mock recorder for MockRolacle. +type MockRolacleMockRecorder struct { + mock *MockRolacle +} + +// NewMockRolacle creates a new mock instance. +func NewMockRolacle(ctrl *gomock.Controller) *MockRolacle { + mock := &MockRolacle{ctrl: ctrl} + mock.recorder = &MockRolacleMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockRolacle) EXPECT() *MockRolacleMockRecorder { + return m.recorder +} + +// CalcEligibility mocks base method. +func (m *MockRolacle) CalcEligibility(arg0 context.Context, arg1 types.LayerID, arg2 uint32, arg3 int, arg4 types.NodeID, arg5 types.VrfSignature) (uint16, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CalcEligibility", arg0, arg1, arg2, arg3, arg4, arg5) + ret0, _ := ret[0].(uint16) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// CalcEligibility indicates an expected call of CalcEligibility. +func (mr *MockRolacleMockRecorder) CalcEligibility(arg0, arg1, arg2, arg3, arg4, arg5 any) *MockRolacleCalcEligibilityCall { + mr.mock.ctrl.T.Helper() + call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CalcEligibility", reflect.TypeOf((*MockRolacle)(nil).CalcEligibility), arg0, arg1, arg2, arg3, arg4, arg5) + return &MockRolacleCalcEligibilityCall{Call: call} +} + +// MockRolacleCalcEligibilityCall wrap *gomock.Call +type MockRolacleCalcEligibilityCall struct { + *gomock.Call +} + +// Return rewrite *gomock.Call.Return +func (c *MockRolacleCalcEligibilityCall) Return(arg0 uint16, arg1 error) *MockRolacleCalcEligibilityCall { + c.Call = c.Call.Return(arg0, arg1) + return c +} + +// Do rewrite *gomock.Call.Do +func (c *MockRolacleCalcEligibilityCall) Do(f func(context.Context, types.LayerID, uint32, int, types.NodeID, types.VrfSignature) (uint16, error)) *MockRolacleCalcEligibilityCall { + c.Call = c.Call.Do(f) + return c +} + +// DoAndReturn rewrite *gomock.Call.DoAndReturn +func (c *MockRolacleCalcEligibilityCall) DoAndReturn(f func(context.Context, types.LayerID, uint32, int, types.NodeID, types.VrfSignature) (uint16, error)) *MockRolacleCalcEligibilityCall { + c.Call = c.Call.DoAndReturn(f) + return c +} + +// Proof mocks base method. +func (m *MockRolacle) Proof(arg0 context.Context, arg1 *signing.VRFSigner, arg2 types.LayerID, arg3 uint32) (types.VrfSignature, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Proof", arg0, arg1, arg2, arg3) + ret0, _ := ret[0].(types.VrfSignature) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Proof indicates an expected call of Proof. +func (mr *MockRolacleMockRecorder) Proof(arg0, arg1, arg2, arg3 any) *MockRolacleProofCall { + mr.mock.ctrl.T.Helper() + call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Proof", reflect.TypeOf((*MockRolacle)(nil).Proof), arg0, arg1, arg2, arg3) + return &MockRolacleProofCall{Call: call} +} + +// MockRolacleProofCall wrap *gomock.Call +type MockRolacleProofCall struct { + *gomock.Call +} + +// Return rewrite *gomock.Call.Return +func (c *MockRolacleProofCall) Return(arg0 types.VrfSignature, arg1 error) *MockRolacleProofCall { + c.Call = c.Call.Return(arg0, arg1) + return c +} + +// Do rewrite *gomock.Call.Do +func (c *MockRolacleProofCall) Do(f func(context.Context, *signing.VRFSigner, types.LayerID, uint32) (types.VrfSignature, error)) *MockRolacleProofCall { + c.Call = c.Call.Do(f) + return c +} + +// DoAndReturn rewrite *gomock.Call.DoAndReturn +func (c *MockRolacleProofCall) DoAndReturn(f func(context.Context, *signing.VRFSigner, types.LayerID, uint32) (types.VrfSignature, error)) *MockRolacleProofCall { + c.Call = c.Call.DoAndReturn(f) + return c +} + +// Validate mocks base method. +func (m *MockRolacle) Validate(arg0 context.Context, arg1 types.LayerID, arg2 uint32, arg3 int, arg4 types.NodeID, arg5 types.VrfSignature, arg6 uint16) (bool, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Validate", arg0, arg1, arg2, arg3, arg4, arg5, arg6) + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Validate indicates an expected call of Validate. +func (mr *MockRolacleMockRecorder) Validate(arg0, arg1, arg2, arg3, arg4, arg5, arg6 any) *MockRolacleValidateCall { + mr.mock.ctrl.T.Helper() + call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Validate", reflect.TypeOf((*MockRolacle)(nil).Validate), arg0, arg1, arg2, arg3, arg4, arg5, arg6) + return &MockRolacleValidateCall{Call: call} +} + +// MockRolacleValidateCall wrap *gomock.Call +type MockRolacleValidateCall struct { + *gomock.Call +} + +// Return rewrite *gomock.Call.Return +func (c *MockRolacleValidateCall) Return(arg0 bool, arg1 error) *MockRolacleValidateCall { + c.Call = c.Call.Return(arg0, arg1) + return c +} + +// Do rewrite *gomock.Call.Do +func (c *MockRolacleValidateCall) Do(f func(context.Context, types.LayerID, uint32, int, types.NodeID, types.VrfSignature, uint16) (bool, error)) *MockRolacleValidateCall { + c.Call = c.Call.Do(f) + return c +} + +// DoAndReturn rewrite *gomock.Call.DoAndReturn +func (c *MockRolacleValidateCall) DoAndReturn(f func(context.Context, types.LayerID, uint32, int, types.NodeID, types.VrfSignature, uint16) (bool, error)) *MockRolacleValidateCall { + c.Call = c.Call.DoAndReturn(f) + return c +} diff --git a/hare4/eligibility/oracle.go b/hare4/eligibility/oracle.go new file mode 100644 index 0000000000..823aa71910 --- /dev/null +++ b/hare4/eligibility/oracle.go @@ -0,0 +1,577 @@ +package eligibility + +import ( + "context" + "errors" + "fmt" + "math" + "sync" + + lru "github.com/hashicorp/golang-lru/v2" + "github.com/spacemeshos/fixed" + "golang.org/x/exp/maps" + + "github.com/spacemeshos/go-spacemesh/atxsdata" + "github.com/spacemeshos/go-spacemesh/codec" + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/log" + "github.com/spacemeshos/go-spacemesh/miner" + "github.com/spacemeshos/go-spacemesh/signing" + "github.com/spacemeshos/go-spacemesh/sql" + "github.com/spacemeshos/go-spacemesh/sql/activesets" + "github.com/spacemeshos/go-spacemesh/sql/ballots" + "github.com/spacemeshos/go-spacemesh/system" +) + +const ( + // CertifyRound is not part of the hare protocol, but it shares the same oracle for eligibility. + CertifyRound uint32 = math.MaxUint32 >> 1 +) + +const ( + activesCacheSize = 5 // we don't expect to handle more than two layers concurrently + maxSupportedN = (math.MaxInt32 / 2) + 1 // higher values result in an overflow when calculating CDF +) + +var ( + errZeroCommitteeSize = errors.New("zero committee size") + errEmptyActiveSet = errors.New("empty active set") + errZeroTotalWeight = errors.New("zero total weight") + ErrNotActive = errors.New("oracle: miner is not active in epoch") +) + +type identityWeight struct { + atx types.ATXID + weight uint64 +} + +type cachedActiveSet struct { + set map[types.NodeID]identityWeight + total uint64 +} + +func (c *cachedActiveSet) atxs() []types.ATXID { + atxs := make([]types.ATXID, 0, len(c.set)) + for _, id := range c.set { + atxs = append(atxs, id.atx) + } + return atxs +} + +// Config is the configuration of the oracle package. +type Config struct { + // ConfidenceParam specifies how many layers into the epoch hare uses active set generated in the previous epoch. + // For example, if epoch size is 100 and confidence is 10 hare will use previous active set for layers 0-9 + // and then generate a new activeset. + // + // This was done like that so that we have higher `confidence` that hare will succeed atleast + // once during this interval. If it doesn't we have to provide centralized fallback. + ConfidenceParam uint32 `mapstructure:"eligibility-confidence-param"` +} + +func (c *Config) MarshalLogObject(encoder log.ObjectEncoder) error { + encoder.AddUint32("confidence param", c.ConfidenceParam) + return nil +} + +// DefaultConfig returns the default configuration for the oracle package. +func DefaultConfig() Config { + return Config{ConfidenceParam: 1} +} + +// Oracle is the hare eligibility oracle. +type Oracle struct { + mu sync.Mutex + activesCache activeSetCache + fallback map[types.EpochID][]types.ATXID + sync system.SyncStateProvider + // NOTE(dshulyak) on switch from synced to not synced reset the cache + // to cope with https://github.com/spacemeshos/go-spacemesh/issues/4552 + // until graded oracle is implemented + synced bool + + beacons system.BeaconGetter + atxsdata *atxsdata.Data + db sql.Executor + vrfVerifier vrfVerifier + layersPerEpoch uint32 + cfg Config + log log.Log +} + +type Opt func(*Oracle) + +func WithConfig(config Config) Opt { + return func(o *Oracle) { + o.cfg = config + } +} + +func WithLogger(logger log.Log) Opt { + return func(o *Oracle) { + o.log = logger + } +} + +// New returns a new eligibility oracle instance. +func New( + beacons system.BeaconGetter, + db sql.Executor, + atxsdata *atxsdata.Data, + vrfVerifier vrfVerifier, + layersPerEpoch uint32, + opts ...Opt, +) *Oracle { + activesCache, err := lru.New[types.EpochID, *cachedActiveSet](activesCacheSize) + if err != nil { + panic("failed to create lru cache for active set" + err.Error()) + } + oracle := &Oracle{ + beacons: beacons, + db: db, + atxsdata: atxsdata, + vrfVerifier: vrfVerifier, + layersPerEpoch: layersPerEpoch, + activesCache: activesCache, + fallback: map[types.EpochID][]types.ATXID{}, + cfg: DefaultConfig(), + log: log.NewNop(), + } + for _, opt := range opts { + opt(oracle) + } + oracle.log.With().Info("hare oracle initialized", log.Uint32("epoch size", layersPerEpoch), log.Inline(&oracle.cfg)) + return oracle +} + +//go:generate scalegen -types VrfMessage + +// VrfMessage is a verification message. It is also the payload for the signature in `types.HareEligibility`. +type VrfMessage struct { + Type types.EligibilityType // always types.EligibilityHare + Beacon types.Beacon + Round uint32 + Layer types.LayerID +} + +func (o *Oracle) SetSync(sync system.SyncStateProvider) { + o.mu.Lock() + defer o.mu.Unlock() + o.sync = sync +} + +func (o *Oracle) resetCacheOnSynced(ctx context.Context) { + if o.sync == nil { + return + } + synced := o.synced + o.synced = o.sync.IsSynced(ctx) + if !synced && o.synced { + ac, err := lru.New[types.EpochID, *cachedActiveSet](activesCacheSize) + if err != nil { + o.log.With().Fatal("failed to create lru cache for active set", log.Err(err)) + } + o.activesCache = ac + } +} + +// buildVRFMessage builds the VRF message used as input for hare eligibility validation. +func (o *Oracle) buildVRFMessage(ctx context.Context, layer types.LayerID, round uint32) ([]byte, error) { + beacon, err := o.beacons.GetBeacon(layer.GetEpoch()) + if err != nil { + return nil, fmt.Errorf("get beacon: %w", err) + } + return codec.MustEncode(&VrfMessage{Type: types.EligibilityHare, Beacon: beacon, Round: round, Layer: layer}), nil +} + +func (o *Oracle) totalWeight(ctx context.Context, layer types.LayerID) (uint64, error) { + actives, err := o.actives(ctx, layer) + if err != nil { + return 0, err + } + return actives.total, nil +} + +func (o *Oracle) minerWeight(ctx context.Context, layer types.LayerID, id types.NodeID) (uint64, error) { + actives, err := o.actives(ctx, layer) + if err != nil { + return 0, err + } + + w, ok := actives.set[id] + if !ok { + return 0, fmt.Errorf("%w: %v", ErrNotActive, id) + } + return w.weight, nil +} + +func calcVrfFrac(vrfSig types.VrfSignature) fixed.Fixed { + return fixed.FracFromBytes(vrfSig[:8]) +} + +func (o *Oracle) prepareEligibilityCheck( + ctx context.Context, + layer types.LayerID, + round uint32, + committeeSize int, + id types.NodeID, + vrfSig types.VrfSignature, +) (int, fixed.Fixed, fixed.Fixed, bool, error) { + logger := o.log.WithContext(ctx).WithFields( + layer, + layer.GetEpoch(), + log.Stringer("smesher", id), + log.Uint32("round", round), + log.Int("committee_size", committeeSize), + ) + + if committeeSize < 1 { + logger.With().Error("committee size must be positive", log.Int("committee_size", committeeSize)) + return 0, fixed.Fixed{}, fixed.Fixed{}, true, errZeroCommitteeSize + } + + // calc hash & check threshold + // this is cheap in case the node is not eligible + minerWeight, err := o.minerWeight(ctx, layer, id) + if err != nil { + return 0, fixed.Fixed{}, fixed.Fixed{}, true, err + } + + msg, err := o.buildVRFMessage(ctx, layer, round) + if err != nil { + logger.With().Warning("could not build vrf message", log.Err(err)) + return 0, fixed.Fixed{}, fixed.Fixed{}, true, err + } + + // validate message + if !o.vrfVerifier.Verify(id, msg, vrfSig) { + logger.Debug("eligibility: a node did not pass vrf signature verification") + return 0, fixed.Fixed{}, fixed.Fixed{}, true, nil + } + + // get active set size + totalWeight, err := o.totalWeight(ctx, layer) + if err != nil { + logger.With().Error("failed to get total weight", log.Err(err)) + return 0, fixed.Fixed{}, fixed.Fixed{}, true, err + } + + // require totalWeight > 0 + if totalWeight == 0 { + logger.Warning("eligibility: total weight is zero") + return 0, fixed.Fixed{}, fixed.Fixed{}, true, errZeroTotalWeight + } + + logger.With().Debug("preparing eligibility check", + log.Uint64("miner_weight", minerWeight), + log.Uint64("total_weight", totalWeight), + ) + + n := minerWeight + + // calc p + if uint64(committeeSize) > totalWeight { + logger.With().Warning("committee size is greater than total weight", + log.Int("committee_size", committeeSize), + log.Uint64("total_weight", totalWeight), + ) + totalWeight *= uint64(committeeSize) + n *= uint64(committeeSize) + } + if n > maxSupportedN { + return 0, fixed.Fixed{}, fixed.Fixed{}, false, fmt.Errorf( + "miner weight exceeds supported maximum (id: %v, weight: %d, max: %d", + id, + minerWeight, + maxSupportedN, + ) + } + + p := fixed.DivUint64(uint64(committeeSize), totalWeight) + return int(n), p, calcVrfFrac(vrfSig), false, nil +} + +// Validate validates the number of eligibilities of ID on the given Layer where msg is the VRF message, sig is the role +// proof and assuming commSize as the expected committee size. +func (o *Oracle) Validate( + ctx context.Context, + layer types.LayerID, + round uint32, + committeeSize int, + id types.NodeID, + sig types.VrfSignature, + eligibilityCount uint16, +) (bool, error) { + n, p, vrfFrac, done, err := o.prepareEligibilityCheck(ctx, layer, round, committeeSize, id, sig) + if done || err != nil { + return false, err + } + + defer func() { + if msg := recover(); msg != nil { + o.log.WithContext(ctx).With().Fatal("panic in validate", + log.Any("msg", msg), + log.Int("n", n), + log.String("p", p.String()), + log.String("vrf_frac", vrfFrac.String()), + ) + } + }() + + x := int(eligibilityCount) + if !fixed.BinCDF(n, p, x-1).GreaterThan(vrfFrac) && vrfFrac.LessThan(fixed.BinCDF(n, p, x)) { + return true, nil + } + o.log.WithContext(ctx).With().Warning("eligibility: node did not pass vrf eligibility threshold", + layer, + log.Uint32("round", round), + log.Int("committee_size", committeeSize), + id, + log.Uint16("eligibility_count", eligibilityCount), + log.Int("n", n), + log.Float64("p", p.Float()), + log.Float64("vrf_frac", vrfFrac.Float()), + log.Int("x", x), + ) + return false, nil +} + +// CalcEligibility calculates the number of eligibilities of ID on the given Layer where msg is the VRF message, sig is +// the role proof and assuming commSize as the expected committee size. +func (o *Oracle) CalcEligibility( + ctx context.Context, + layer types.LayerID, + round uint32, + committeeSize int, + id types.NodeID, + vrfSig types.VrfSignature, +) (uint16, error) { + n, p, vrfFrac, done, err := o.prepareEligibilityCheck(ctx, layer, round, committeeSize, id, vrfSig) + if done { + return 0, err + } + + defer func() { + if msg := recover(); msg != nil { + o.log.With().Fatal("panic in calc eligibility", + layer, + layer.GetEpoch(), + log.Uint32("round_id", round), + log.Any("msg", msg), + log.Int("committee_size", committeeSize), + log.Int("n", n), + log.Float64("p", p.Float()), + log.Float64("vrf_frac", vrfFrac.Float()), + ) + } + }() + + o.log.With().Debug("params", + layer, + layer.GetEpoch(), + log.Uint32("round_id", round), + log.Int("committee_size", committeeSize), + log.Int("n", n), + log.Float64("p", p.Float()), + log.Float64("vrf_frac", vrfFrac.Float()), + ) + + for x := 0; x < n; x++ { + if fixed.BinCDF(n, p, x).GreaterThan(vrfFrac) { + // even with large N and large P, x will be << 2^16, so this cast is safe + return uint16(x), nil + } + } + + // since BinCDF(n, p, n) is 1 for any p, this code can only be reached if n is much smaller + // than 2^16 (so that BinCDF(n, p, n-1) is still lower than vrfFrac) + return uint16(n), nil +} + +// Proof returns the role proof for the current Layer & Round. +func (o *Oracle) Proof( + ctx context.Context, + signer *signing.VRFSigner, + layer types.LayerID, + round uint32, +) (types.VrfSignature, error) { + beacon, err := o.beacons.GetBeacon(layer.GetEpoch()) + if err != nil { + return types.EmptyVrfSignature, fmt.Errorf("get beacon: %w", err) + } + return GenVRF(ctx, signer, beacon, layer, round), nil +} + +// GenVRF generates vrf for hare eligibility. +func GenVRF( + ctx context.Context, + signer *signing.VRFSigner, + beacon types.Beacon, + layer types.LayerID, + round uint32, +) types.VrfSignature { + return signer.Sign( + codec.MustEncode(&VrfMessage{Type: types.EligibilityHare, Beacon: beacon, Round: round, Layer: layer}), + ) +} + +// Returns a map of all active node IDs in the specified layer id. +func (o *Oracle) actives(ctx context.Context, targetLayer types.LayerID) (*cachedActiveSet, error) { + if !targetLayer.After(types.GetEffectiveGenesis()) { + return nil, errEmptyActiveSet + } + targetEpoch := targetLayer.GetEpoch() + // the first bootstrap data targets first epoch after genesis (epoch 2) + // and the epoch where checkpoint recovery happens + if targetEpoch > types.GetEffectiveGenesis().Add(1).GetEpoch() && + targetLayer.Difference(targetEpoch.FirstLayer()) < o.cfg.ConfidenceParam { + targetEpoch -= 1 + } + o.log.WithContext(ctx).With().Debug("hare oracle getting active set", + log.Stringer("target_layer", targetLayer), + log.Stringer("target_layer_epoch", targetLayer.GetEpoch()), + log.Stringer("target_epoch", targetEpoch), + ) + + o.mu.Lock() + defer o.mu.Unlock() + o.resetCacheOnSynced(ctx) + if value, exists := o.activesCache.Get(targetEpoch); exists { + return value, nil + } + activeSet, err := o.computeActiveSet(ctx, targetEpoch) + if err != nil { + return nil, err + } + if len(activeSet) == 0 { + return nil, errEmptyActiveSet + } + activeWeights, err := o.computeActiveWeights(targetEpoch, activeSet) + if err != nil { + return nil, err + } + + aset := &cachedActiveSet{set: activeWeights} + for _, aweight := range activeWeights { + aset.total += aweight.weight + } + o.log.WithContext(ctx).With().Info("got hare active set", log.Int("count", len(activeWeights))) + o.activesCache.Add(targetEpoch, aset) + return aset, nil +} + +func (o *Oracle) ActiveSet(ctx context.Context, targetEpoch types.EpochID) ([]types.ATXID, error) { + aset, err := o.actives(ctx, targetEpoch.FirstLayer().Add(o.cfg.ConfidenceParam)) + if err != nil { + return nil, err + } + return aset.atxs(), nil +} + +func (o *Oracle) computeActiveSet(ctx context.Context, targetEpoch types.EpochID) ([]types.ATXID, error) { + activeSet, ok := o.fallback[targetEpoch] + if ok { + o.log.WithContext(ctx).With().Info("using fallback active set", + targetEpoch, + log.Int("size", len(activeSet)), + ) + return activeSet, nil + } + + activeSet, err := miner.ActiveSetFromEpochFirstBlock(o.db, targetEpoch) + if err != nil && !errors.Is(err, sql.ErrNotFound) { + return nil, err + } + if len(activeSet) == 0 { + return o.activeSetFromRefBallots(targetEpoch) + } + return activeSet, nil +} + +func (o *Oracle) computeActiveWeights( + targetEpoch types.EpochID, + activeSet []types.ATXID, +) (map[types.NodeID]identityWeight, error) { + identities := make(map[types.NodeID]identityWeight, len(activeSet)) + for _, id := range activeSet { + atx := o.atxsdata.Get(targetEpoch, id) + if atx == nil { + return nil, fmt.Errorf("oracle: missing atx in atxsdata %s/%s", targetEpoch, id.ShortString()) + } + identities[atx.Node] = identityWeight{atx: id, weight: atx.Weight} + } + return identities, nil +} + +func (o *Oracle) activeSetFromRefBallots(epoch types.EpochID) ([]types.ATXID, error) { + beacon, err := o.beacons.GetBeacon(epoch) + if err != nil { + return nil, fmt.Errorf("get beacon: %w", err) + } + ballotsrst, err := ballots.AllFirstInEpoch(o.db, epoch) + if err != nil { + return nil, fmt.Errorf("first in epoch %d: %w", epoch, err) + } + activeMap := make(map[types.ATXID]struct{}, len(ballotsrst)) + for _, ballot := range ballotsrst { + if ballot.EpochData == nil { + o.log.With().Error("invalid data. first ballot doesn't have epoch data", log.Inline(ballot)) + continue + } + if ballot.EpochData.Beacon != beacon { + o.log.With().Debug("beacon mismatch", log.Stringer("local", beacon), log.Object("ballot", ballot)) + continue + } + actives, err := activesets.Get(o.db, ballot.EpochData.ActiveSetHash) + if err != nil { + o.log.With().Error("failed to get active set", + log.String("actives hash", ballot.EpochData.ActiveSetHash.ShortString()), + log.String("ballot ", ballot.ID().String()), + log.Err(err), + ) + continue + } + for _, id := range actives.Set { + activeMap[id] = struct{}{} + } + } + o.log.With().Warning("using tortoise active set", + log.Int("actives size", len(activeMap)), + log.Uint32("epoch", epoch.Uint32()), + log.Stringer("beacon", beacon), + ) + return maps.Keys(activeMap), nil +} + +// IsIdentityActiveOnConsensusView returns true if the provided identity is active on the consensus view derived +// from the specified layer, false otherwise. +func (o *Oracle) IsIdentityActiveOnConsensusView( + ctx context.Context, + edID types.NodeID, + layer types.LayerID, +) (bool, error) { + o.log.WithContext(ctx).With().Debug("hare oracle checking for active identity") + defer func() { + o.log.WithContext(ctx).With().Debug("hare oracle active identity check complete") + }() + actives, err := o.actives(ctx, layer) + if err != nil { + return false, err + } + _, exist := actives.set[edID] + return exist, nil +} + +func (o *Oracle) UpdateActiveSet(epoch types.EpochID, activeSet []types.ATXID) { + o.log.With().Info("received activeset update", + epoch, + log.Int("size", len(activeSet)), + ) + o.mu.Lock() + defer o.mu.Unlock() + if _, ok := o.fallback[epoch]; ok { + o.log.With().Debug("fallback active set already exists", epoch) + return + } + o.fallback[epoch] = activeSet +} diff --git a/hare4/eligibility/oracle_scale.go b/hare4/eligibility/oracle_scale.go new file mode 100644 index 0000000000..03f43bbfef --- /dev/null +++ b/hare4/eligibility/oracle_scale.go @@ -0,0 +1,76 @@ +// Code generated by github.com/spacemeshos/go-scale/scalegen. DO NOT EDIT. + +// nolint +package eligibility + +import ( + "github.com/spacemeshos/go-scale" + "github.com/spacemeshos/go-spacemesh/common/types" +) + +func (t *VrfMessage) EncodeScale(enc *scale.Encoder) (total int, err error) { + { + n, err := scale.EncodeCompact16(enc, uint16(t.Type)) + if err != nil { + return total, err + } + total += n + } + { + n, err := scale.EncodeByteArray(enc, t.Beacon[:]) + if err != nil { + return total, err + } + total += n + } + { + n, err := scale.EncodeCompact32(enc, uint32(t.Round)) + if err != nil { + return total, err + } + total += n + } + { + n, err := scale.EncodeCompact32(enc, uint32(t.Layer)) + if err != nil { + return total, err + } + total += n + } + return total, nil +} + +func (t *VrfMessage) DecodeScale(dec *scale.Decoder) (total int, err error) { + { + field, n, err := scale.DecodeCompact16(dec) + if err != nil { + return total, err + } + total += n + t.Type = types.EligibilityType(field) + } + { + n, err := scale.DecodeByteArray(dec, t.Beacon[:]) + if err != nil { + return total, err + } + total += n + } + { + field, n, err := scale.DecodeCompact32(dec) + if err != nil { + return total, err + } + total += n + t.Round = uint32(field) + } + { + field, n, err := scale.DecodeCompact32(dec) + if err != nil { + return total, err + } + total += n + t.Layer = types.LayerID(field) + } + return total, nil +} diff --git a/hare4/eligibility/oracle_test.go b/hare4/eligibility/oracle_test.go new file mode 100644 index 0000000000..ff1e13bf22 --- /dev/null +++ b/hare4/eligibility/oracle_test.go @@ -0,0 +1,965 @@ +package eligibility + +import ( + "context" + "encoding/hex" + "errors" + "math/rand" + "os" + "strconv" + "sync" + "testing" + "time" + + "github.com/spacemeshos/fixed" + "github.com/spacemeshos/go-scale/tester" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" + "golang.org/x/exp/maps" + + "github.com/spacemeshos/go-spacemesh/atxsdata" + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/log/logtest" + "github.com/spacemeshos/go-spacemesh/signing" + "github.com/spacemeshos/go-spacemesh/sql" + "github.com/spacemeshos/go-spacemesh/sql/activesets" + "github.com/spacemeshos/go-spacemesh/sql/atxs" + "github.com/spacemeshos/go-spacemesh/sql/ballots" + "github.com/spacemeshos/go-spacemesh/sql/blocks" + "github.com/spacemeshos/go-spacemesh/sql/layers" + "github.com/spacemeshos/go-spacemesh/system/mocks" +) + +const ( + defLayersPerEpoch uint32 = 10 + confidenceParam uint32 = 3 + ballotsPerLayer = 50 +) + +func TestMain(m *testing.M) { + types.SetLayersPerEpoch(defLayersPerEpoch) + res := m.Run() + os.Exit(res) +} + +type testOracle struct { + *Oracle + tb testing.TB + db *sql.Database + atxsdata *atxsdata.Data + mBeacon *mocks.MockBeaconGetter + mVerifier *MockvrfVerifier +} + +func defaultOracle(tb testing.TB) *testOracle { + db := sql.InMemory() + atxsdata := atxsdata.New() + + ctrl := gomock.NewController(tb) + mBeacon := mocks.NewMockBeaconGetter(ctrl) + mVerifier := NewMockvrfVerifier(ctrl) + + to := &testOracle{ + Oracle: New( + mBeacon, + db, + atxsdata, + mVerifier, + defLayersPerEpoch, + WithConfig(Config{ConfidenceParam: confidenceParam}), + WithLogger(logtest.New(tb)), + ), + tb: tb, + mBeacon: mBeacon, + mVerifier: mVerifier, + db: db, + atxsdata: atxsdata, + } + return to +} + +func (t *testOracle) createBallots( + lid types.LayerID, + activeSet types.ATXIDList, + miners []types.NodeID, +) []*types.Ballot { + t.tb.Helper() + numBallots := ballotsPerLayer + if len(activeSet) < numBallots { + numBallots = len(activeSet) + } + var result []*types.Ballot + for i := 0; i < numBallots; i++ { + b := types.RandomBallot() + b.Layer = lid + b.AtxID = activeSet[i] + b.RefBallot = types.EmptyBallotID + b.EpochData = &types.EpochData{ActiveSetHash: activeSet.Hash()} + b.Signature = types.RandomEdSignature() + b.SmesherID = miners[i] + require.NoError(t.tb, b.Initialize()) + require.NoError(t.tb, ballots.Add(t.db, b)) + activesets.Add(t.db, b.EpochData.ActiveSetHash, &types.EpochActiveSet{ + Epoch: lid.GetEpoch(), + Set: activeSet, + }) + result = append(result, b) + } + return result +} + +func (t *testOracle) createBlock(blts []*types.Ballot) { + t.tb.Helper() + block := &types.Block{ + InnerBlock: types.InnerBlock{ + LayerIndex: blts[0].Layer, + }, + } + for _, b := range blts { + block.Rewards = append(block.Rewards, types.AnyReward{AtxID: b.AtxID}) + } + block.Initialize() + require.NoError(t.tb, blocks.Add(t.db, block)) + require.NoError(t.tb, layers.SetApplied(t.db, block.LayerIndex, block.ID())) +} + +func (t *testOracle) createLayerData(lid types.LayerID, numMiners int) []types.NodeID { + t.tb.Helper() + activeSet := types.RandomActiveSet(numMiners) + miners := t.createActiveSet(lid.GetEpoch().FirstLayer().Sub(1), activeSet) + blts := t.createBallots(lid, activeSet, miners) + t.createBlock(blts) + return miners +} + +func (t *testOracle) createActiveSet( + lid types.LayerID, + activeSet []types.ATXID, +) []types.NodeID { + var miners []types.NodeID + for i, id := range activeSet { + nodeID := types.BytesToNodeID([]byte(strconv.Itoa(i))) + miners = append(miners, nodeID) + atx := &types.ActivationTx{ + PublishEpoch: lid.GetEpoch(), + Weight: uint64(i + 1), + SmesherID: nodeID, + } + atx.SetID(id) + atx.SetReceived(time.Now()) + t.addAtx(atx) + } + return miners +} + +func (t *testOracle) addAtx(atx *types.ActivationTx) { + t.tb.Helper() + require.NoError(t.tb, atxs.Add(t.db, atx, types.AtxBlob{})) + t.atxsdata.AddFromAtx(atx, false) +} + +// create n identities with weights and identifiers 1,2,3,...,n. +func createIdentities(n int) map[types.NodeID]identityWeight { + m := map[types.NodeID]identityWeight{} + for i := 0; i < n; i++ { + m[types.BytesToNodeID([]byte(strconv.Itoa(i)))] = identityWeight{ + atx: types.ATXID(types.BytesToHash([]byte(strconv.Itoa(i)))), + weight: uint64(i + 1), + } + } + return m +} + +func TestCalcEligibility(t *testing.T) { + nid := types.NodeID{1, 1} + + t.Run("zero committee", func(t *testing.T) { + o := defaultOracle(t) + res, err := o.CalcEligibility(context.Background(), types.LayerID(50), 1, 0, nid, types.EmptyVrfSignature) + require.ErrorIs(t, err, errZeroCommitteeSize) + require.Equal(t, 0, int(res)) + }) + + t.Run("empty active set", func(t *testing.T) { + o := defaultOracle(t) + o.mBeacon.EXPECT().GetBeacon(gomock.Any()) + lid := types.EpochID(5).FirstLayer() + res, err := o.CalcEligibility(context.Background(), lid, 1, 1, nid, types.EmptyVrfSignature) + require.ErrorIs(t, err, errEmptyActiveSet) + require.Equal(t, 0, int(res)) + }) + + t.Run("miner not active", func(t *testing.T) { + o := defaultOracle(t) + lid := types.EpochID(5).FirstLayer() + o.createLayerData(lid.Sub(defLayersPerEpoch), 11) + res, err := o.CalcEligibility(context.Background(), lid, 1, 1, nid, types.EmptyVrfSignature) + require.ErrorIs(t, err, ErrNotActive) + require.Equal(t, 0, int(res)) + }) + + t.Run("beacon failure", func(t *testing.T) { + o := defaultOracle(t) + layer := types.EpochID(5).FirstLayer() + miners := o.createLayerData(layer.Sub(defLayersPerEpoch), 5) + errUnknown := errors.New("unknown") + o.mBeacon.EXPECT().GetBeacon(layer.GetEpoch()).Return(types.EmptyBeacon, errUnknown).Times(1) + + res, err := o.CalcEligibility(context.Background(), layer, 0, 1, miners[0], types.EmptyVrfSignature) + require.ErrorIs(t, err, errUnknown) + require.Equal(t, 0, int(res)) + }) + + t.Run("verify failure", func(t *testing.T) { + o := defaultOracle(t) + layer := types.EpochID(5).FirstLayer() + miners := o.createLayerData(layer.Sub(defLayersPerEpoch), 5) + o.mBeacon.EXPECT().GetBeacon(layer.GetEpoch()).Return(types.RandomBeacon(), nil).Times(1) + o.mVerifier.EXPECT().Verify(gomock.Any(), gomock.Any(), gomock.Any()).Return(false).Times(1) + + res, err := o.CalcEligibility(context.Background(), layer, 0, 1, miners[0], types.EmptyVrfSignature) + require.NoError(t, err) + require.Equal(t, 0, int(res)) + }) + + t.Run("empty active with fallback", func(t *testing.T) { + o := defaultOracle(t) + o.mBeacon.EXPECT().GetBeacon(gomock.Any()) + lid := types.EpochID(5).FirstLayer().Add(o.cfg.ConfidenceParam) + res, err := o.CalcEligibility(context.Background(), lid, 1, 1, nid, types.EmptyVrfSignature) + require.ErrorIs(t, err, errEmptyActiveSet) + require.Equal(t, 0, int(res)) + + activeSet := types.RandomActiveSet(111) + miners := o.createActiveSet(types.EpochID(4).FirstLayer(), activeSet) + o.UpdateActiveSet(5, activeSet) + o.mBeacon.EXPECT().GetBeacon(lid.GetEpoch()).Return(types.RandomBeacon(), nil) + o.mVerifier.EXPECT().Verify(gomock.Any(), gomock.Any(), gomock.Any()).Return(true) + _, err = o.CalcEligibility(context.Background(), lid, 1, 1, miners[0], types.EmptyVrfSignature) + require.NoError(t, err) + }) + + t.Run("miner active", func(t *testing.T) { + o := defaultOracle(t) + lid := types.EpochID(5).FirstLayer() + beacon := types.Beacon{1, 0, 0, 0} + miners := o.createLayerData(lid.Sub(defLayersPerEpoch), 5) + sigs := map[string]uint16{ + "0516a574aef37257d6811ea53ef55d4cbb0e14674900a0d5165bd6742513840d" + + "02442d979fdabc7059645d1e8f8a0f44d0db2aa90f23374dd74a3636d4ecdab7": 1, + "73929b4b69090bb6133e2f8cd73989b35228e7e6d8c6745e4100d9c5eb48ca26" + + "24ee2889e55124195a130f74ea56e53a73a1c4dee60baa13ad3b1c0ed4f80d9c": 0, + "e2c27ad65b752b763173b588518764b6c1e42896d57e0eabef9bcac68e07b877" + + "29a4ef9e5f17d8c1cb34ffd0d65ee9a7e63e63b77a7bcab1140a76fc04c271de": 0, + "384460966938c87644987fe00c0f9d4f9a5e2dcd4bdc08392ed94203895ba325" + + "036725a22346e35aa707993babef716aa1b6b3dfc653a44cb23ac8f743cbbc3d": 1, + "15c5f565a75888970059b070bfaed1998a9d423ddac9f6af83da51db02149044" + + "ea6aeb86294341c7a950ac5de2855bbebc11cc28b02c08bc903e4cf41439717d": 1, + } + for vrf, exp := range sigs { + sig, err := hex.DecodeString(vrf) + require.NoError(t, err) + + var vrfSig types.VrfSignature + copy(vrfSig[:], sig) + + o.mBeacon.EXPECT().GetBeacon(lid.GetEpoch()).Return(beacon, nil).Times(1) + o.mVerifier.EXPECT().Verify(gomock.Any(), gomock.Any(), gomock.Any()).Return(true).Times(1) + res, err := o.CalcEligibility(context.Background(), lid, 1, 10, miners[0], vrfSig) + require.NoError(t, err, vrf) + require.Equal(t, exp, res, vrf) + } + }) +} + +func TestCalcEligibilityWithSpaceUnit(t *testing.T) { + const committeeSize = 800 + tcs := []struct { + desc string + numMiners int + }{ + { + desc: "small network", + numMiners: 50, + }, + { + desc: "large network", + numMiners: 2000, + }, + } + for _, tc := range tcs { + t.Run(tc.desc, func(t *testing.T) { + o := defaultOracle(t) + o.mVerifier.EXPECT().Verify(gomock.Any(), gomock.Any(), gomock.Any()).Return(true).AnyTimes() + + lid := types.EpochID(5).FirstLayer() + beacon := types.Beacon{1, 0, 0, 0} + miners := o.createLayerData(lid.Sub(defLayersPerEpoch), tc.numMiners) + + var eligibilityCount uint16 + for _, nodeID := range miners { + sig := types.RandomVrfSignature() + + o.mBeacon.EXPECT().GetBeacon(lid.GetEpoch()).Return(beacon, nil).Times(2) + res, err := o.CalcEligibility(context.Background(), lid, 1, committeeSize, nodeID, sig) + require.NoError(t, err) + + valid, err := o.Validate(context.Background(), lid, 1, committeeSize, nodeID, sig, res) + require.NoError(t, err) + require.True(t, valid) + + eligibilityCount += res + } + + require.InDelta(t, committeeSize, eligibilityCount, committeeSize*15/100) // up to 15% difference + // a correct check would be to calculate the expected variance of the binomial distribution + // which depends on the number of miners and the number of units each miner has + // and then assert that the difference is within 3 standard deviations of the expected value + }) + } +} + +func BenchmarkOracle_CalcEligibility(b *testing.B) { + r := require.New(b) + + o := defaultOracle(b) + o.mBeacon.EXPECT().GetBeacon(gomock.Any()).Return(types.RandomBeacon(), nil).AnyTimes() + o.mVerifier.EXPECT().Verify(gomock.Any(), gomock.Any(), gomock.Any()).Return(true).AnyTimes() + numOfMiners := 2000 + committeeSize := 800 + + lid := types.EpochID(5).FirstLayer() + o.createLayerData(lid, numOfMiners) + + var nodeIDs []types.NodeID + for pubkey := range createIdentities(b.N) { + nodeIDs = append(nodeIDs, pubkey) + } + b.ResetTimer() + for _, nodeID := range nodeIDs { + res, err := o.CalcEligibility(context.Background(), lid, 1, committeeSize, nodeID, types.EmptyVrfSignature) + + if err == nil { + valid, err := o.Validate(context.Background(), lid, 1, committeeSize, nodeID, types.EmptyVrfSignature, res) + r.NoError(err) + r.True(valid) + } + } +} + +func Test_VrfSignVerify(t *testing.T) { + // eligibility of the proof depends on the identity + rng := rand.New(rand.NewSource(5)) + + signer, err := signing.NewEdSigner(signing.WithKeyFromRand(rng)) + require.NoError(t, err) + + o := defaultOracle(t) + nid := signer.NodeID() + + lid := types.EpochID(5).FirstLayer().Add(confidenceParam) + first := types.EpochID(5).FirstLayer() + prevEpoch := lid.GetEpoch() - 1 + o.mBeacon.EXPECT().GetBeacon(lid.GetEpoch()).Return(types.Beacon{1, 0, 0, 0}, nil).AnyTimes() + + numMiners := 2 + activeSet := types.RandomActiveSet(numMiners) + atx1 := &types.ActivationTx{ + PublishEpoch: prevEpoch, + Weight: 1 * 1024, + SmesherID: signer.NodeID(), + } + atx1.SetID(activeSet[0]) + atx1.SetReceived(time.Now()) + o.addAtx(atx1) + + signer2, err := signing.NewEdSigner(signing.WithKeyFromRand(rng)) + require.NoError(t, err) + + atx2 := &types.ActivationTx{ + PublishEpoch: prevEpoch, + Weight: 9 * 1024, + SmesherID: signer2.NodeID(), + } + atx2.SetID(activeSet[1]) + atx2.SetReceived(time.Now()) + o.addAtx(atx2) + miners := []types.NodeID{atx1.SmesherID, atx2.SmesherID} + o.createBlock(o.createBallots(first, activeSet, miners)) + + o.vrfVerifier = signing.NewVRFVerifier() + + // round is handpicked for vrf signature to pass + const round = 0 + + proof, err := o.Proof(context.Background(), signer.VRFSigner(), lid, round) + require.NoError(t, err) + + res, err := o.CalcEligibility(context.Background(), lid, round, 10, nid, proof) + require.NoError(t, err) + require.Equal(t, 1, int(res)) + + valid, err := o.Validate(context.Background(), lid, round, 10, nid, proof, 1) + require.NoError(t, err) + require.True(t, valid) +} + +func Test_Proof_BeaconError(t *testing.T) { + o := defaultOracle(t) + + signer, err := signing.NewEdSigner() + require.NoError(t, err) + + layer := types.LayerID(2) + errUnknown := errors.New("unknown") + o.mBeacon.EXPECT().GetBeacon(layer.GetEpoch()).Return(types.EmptyBeacon, errUnknown).Times(1) + + _, err = o.Proof(context.Background(), signer.VRFSigner(), layer, 3) + require.ErrorIs(t, err, errUnknown) +} + +func Test_Proof(t *testing.T) { + o := defaultOracle(t) + layer := types.LayerID(2) + o.mBeacon.EXPECT().GetBeacon(layer.GetEpoch()).Return(types.Beacon{1, 0, 0, 0}, nil) + + signer, err := signing.NewEdSigner() + require.NoError(t, err) + + sig, err := o.Proof(context.Background(), signer.VRFSigner(), layer, 3) + require.NoError(t, err) + require.NotNil(t, sig) +} + +func TestOracle_IsIdentityActive(t *testing.T) { + o := defaultOracle(t) + layer := types.LayerID(defLayersPerEpoch * 4) + numMiners := 2 + miners := o.createLayerData(layer.Sub(defLayersPerEpoch), numMiners) + for _, nodeID := range miners { + v, err := o.IsIdentityActiveOnConsensusView(context.Background(), nodeID, layer) + require.NoError(t, err) + require.True(t, v) + } + v, err := o.IsIdentityActiveOnConsensusView(context.Background(), types.NodeID{7, 7, 7}, layer) + require.NoError(t, err) + require.False(t, v) +} + +func TestBuildVRFMessage_BeaconError(t *testing.T) { + o := defaultOracle(t) + errUnknown := errors.New("unknown") + o.mBeacon.EXPECT().GetBeacon(gomock.Any()).Return(types.EmptyBeacon, errUnknown).Times(1) + msg, err := o.buildVRFMessage(context.Background(), types.LayerID(1), 1) + require.ErrorIs(t, err, errUnknown) + require.Nil(t, msg) +} + +func TestBuildVRFMessage(t *testing.T) { + o := defaultOracle(t) + firstLayer := types.LayerID(1) + secondLayer := firstLayer.Add(1) + beacon := types.RandomBeacon() + o.mBeacon.EXPECT().GetBeacon(firstLayer.GetEpoch()).Return(beacon, nil).Times(1) + m1, err := o.buildVRFMessage(context.Background(), firstLayer, 2) + require.NoError(t, err) + + // check not same for different round + o.mBeacon.EXPECT().GetBeacon(firstLayer.GetEpoch()).Return(beacon, nil).Times(1) + m3, err := o.buildVRFMessage(context.Background(), firstLayer, 3) + require.NoError(t, err) + require.NotEqual(t, m1, m3) + + // check not same for different layer + o.mBeacon.EXPECT().GetBeacon(firstLayer.GetEpoch()).Return(beacon, nil).Times(1) + m4, err := o.buildVRFMessage(context.Background(), secondLayer, 2) + require.NoError(t, err) + require.NotEqual(t, m1, m4) + + // check same call returns same result + o.mBeacon.EXPECT().GetBeacon(firstLayer.GetEpoch()).Return(beacon, nil).Times(1) + m5, err := o.buildVRFMessage(context.Background(), firstLayer, 2) + require.NoError(t, err) + require.Equal(t, m1, m5) // check same result +} + +func TestBuildVRFMessage_Concurrency(t *testing.T) { + o := defaultOracle(t) + + total := 1000 + expectAdd := 10 + wg := sync.WaitGroup{} + firstLayer := types.LayerID(1) + o.mBeacon.EXPECT().GetBeacon(firstLayer.GetEpoch()).Return(types.RandomBeacon(), nil).AnyTimes() + for i := 0; i < total; i++ { + wg.Add(1) + go func(x int) { + _, err := o.buildVRFMessage(context.Background(), firstLayer, uint32(x%expectAdd)) + assert.NoError(t, err) + wg.Done() + }(i) + } + + wg.Wait() +} + +func TestActiveSet(t *testing.T) { + numMiners := 5 + o := defaultOracle(t) + targetEpoch := types.EpochID(5) + layer := targetEpoch.FirstLayer().Add(o.cfg.ConfidenceParam) + o.createLayerData(targetEpoch.FirstLayer(), numMiners) + + aset, err := o.actives(context.Background(), layer) + require.NoError(t, err) + require.ElementsMatch( + t, + maps.Keys(createIdentities(numMiners)), + maps.Keys(aset.set), + "assertion relies on the enumeration of identities", + ) + + got, err := o.ActiveSet(context.Background(), targetEpoch) + require.NoError(t, err) + require.Len(t, got, len(aset.set)) + for _, id := range got { + atx, err := atxs.Get(o.db, id) + require.NoError(t, err) + require.Contains(t, aset.set, atx.SmesherID, "id %s atx %s", id.ShortString(), atx.ShortString()) + delete(aset.set, atx.SmesherID) + } +} + +func TestActives(t *testing.T) { + numMiners := 5 + t.Run("genesis bootstrap", func(t *testing.T) { + o := defaultOracle(t) + first := types.GetEffectiveGenesis().Add(1) + bootstrap := types.RandomActiveSet(numMiners) + o.createActiveSet(types.EpochID(1).FirstLayer(), bootstrap) + o.UpdateActiveSet(types.GetEffectiveGenesis().GetEpoch()+1, bootstrap) + + for lid := types.LayerID(0); lid.Before(first); lid = lid.Add(1) { + activeSet, err := o.actives(context.Background(), lid) + require.ErrorIs(t, err, errEmptyActiveSet) + require.Nil(t, activeSet) + } + activeSet, err := o.actives(context.Background(), first) + require.NoError(t, err) + require.ElementsMatch( + t, + maps.Keys(createIdentities(numMiners)), + maps.Keys(activeSet.set), + "assertion relies on the enumeration of identities", + ) + }) + t.Run("steady state", func(t *testing.T) { + numMiners++ + o := defaultOracle(t) + o.mBeacon.EXPECT().GetBeacon(gomock.Any()) + layer := types.EpochID(4).FirstLayer() + o.createLayerData(layer, numMiners) + + start := layer.Add(o.cfg.ConfidenceParam) + activeSet, err := o.actives(context.Background(), start) + require.NoError(t, err) + require.ElementsMatch( + t, + maps.Keys(createIdentities(numMiners)), + maps.Keys(activeSet.set), + "assertion relies on the enumeration of identities", + ) + end := (layer.GetEpoch() + 1).FirstLayer().Add(o.cfg.ConfidenceParam) + + for lid := start.Add(1); lid.Before(end); lid = lid.Add(1) { + got, err := o.actives(context.Background(), lid) + require.NoError(t, err) + // cached + require.Equal(t, &activeSet, &got) + } + got, err := o.actives(context.Background(), end) + require.ErrorIs(t, err, errEmptyActiveSet) + require.Nil(t, got) + }) + t.Run("use fallback despite block", func(t *testing.T) { + numMiners++ + o := defaultOracle(t) + o.mBeacon.EXPECT().GetBeacon(gomock.Any()).AnyTimes() + layer := types.EpochID(4).FirstLayer() + end := layer.Add(o.cfg.ConfidenceParam) + o.createLayerData(layer, numMiners) + fallback := types.RandomActiveSet(numMiners + 1) + o.createActiveSet(types.EpochID(3).FirstLayer(), fallback) + o.UpdateActiveSet(end.GetEpoch(), fallback) + + for lid := layer; lid.Before(end); lid = lid.Add(1) { + got, err := o.actives(context.Background(), lid) + require.ErrorIs(t, err, errEmptyActiveSet) + require.Nil(t, got) + } + activeSet, err := o.actives(context.Background(), end) + require.NoError(t, err) + require.ElementsMatch( + t, + maps.Keys(createIdentities(numMiners+1)), + maps.Keys(activeSet.set), + "assertion relies on the enumeration of identities", + ) + }) + t.Run("recover at epoch start", func(t *testing.T) { + numMiners++ + o := defaultOracle(t) + o.mBeacon.EXPECT().GetBeacon(gomock.Any()).AnyTimes() + layer := types.EpochID(4).FirstLayer() + old := types.GetEffectiveGenesis() + types.SetEffectiveGenesis(layer.Uint32() - 1) + t.Cleanup(func() { + types.SetEffectiveGenesis(old.Uint32()) + }) + o.createLayerData(layer, numMiners) + fallback := types.RandomActiveSet(numMiners + 1) + o.createActiveSet(types.EpochID(3).FirstLayer(), fallback) + o.UpdateActiveSet(layer.GetEpoch(), fallback) + + activeSet, err := o.actives(context.Background(), layer) + require.NoError(t, err) + require.ElementsMatch( + t, + maps.Keys(createIdentities(numMiners+1)), + maps.Keys(activeSet.set), + "assertion relies on the enumeration of identities", + ) + activeSet2, err := o.actives(context.Background(), layer+1) + require.NoError(t, err) + require.Equal(t, activeSet, activeSet2) + }) +} + +func TestActives_ConcurrentCalls(t *testing.T) { + r := require.New(t) + o := defaultOracle(t) + layer := types.LayerID(100) + o.createLayerData(layer.Sub(defLayersPerEpoch), 5) + + mc := NewMockactiveSetCache(gomock.NewController(t)) + firstCall := true + mc.EXPECT().Get(layer.GetEpoch() - 1).DoAndReturn( + func(types.EpochID) (*cachedActiveSet, bool) { + if firstCall { + firstCall = false + return nil, false + } + aset := cachedActiveSet{set: createIdentities(5)} + for _, value := range aset.set { + aset.total += value.weight + } + return &aset, true + }).Times(102) + mc.EXPECT().Add(layer.GetEpoch()-1, gomock.Any()) + o.activesCache = mc + + var wg sync.WaitGroup + wg.Add(102) + runFn := func() { + _, err := o.actives(context.Background(), layer) + r.NoError(err) + wg.Done() + } + + // outstanding probability for concurrent access to calc active set size + for i := 0; i < 100; i++ { + go runFn() + } + + // make sure we wait at least two calls duration + runFn() + runFn() + wg.Wait() +} + +func TestMaxSupportedN(t *testing.T) { + n := maxSupportedN + p := fixed.DivUint64(800, uint64(n*100)) + x := 0 + + require.Panics(t, func() { + fixed.BinCDF(n+1, p, x) + }) + + require.NotPanics(t, func() { + for x = 0; x < 800; x++ { + fixed.BinCDF(n, p, x) + } + }) +} + +func TestActiveSetMatrix(t *testing.T) { + t.Parallel() + + target := types.EpochID(4) + bgen := func( + id types.BallotID, + lid types.LayerID, + node types.NodeID, + beacon types.Beacon, + atxs types.ATXIDList, + option ...func(*types.Ballot), + ) types.Ballot { + ballot := types.Ballot{} + ballot.Layer = lid + ballot.EpochData = &types.EpochData{Beacon: beacon, ActiveSetHash: atxs.Hash()} + ballot.SmesherID = node + ballot.SetID(id) + for _, opt := range option { + opt(&ballot) + } + return ballot + } + agen := func( + id types.ATXID, + node types.NodeID, + option ...func(*types.ActivationTx), + ) *types.ActivationTx { + atx := &types.ActivationTx{ + PublishEpoch: target - 1, + SmesherID: node, + NumUnits: 1, + TickCount: 1, + } + atx.SetID(id) + atx.SetReceived(time.Time{}.Add(1)) + + for _, opt := range option { + opt(atx) + } + return atx + } + + for _, tc := range []struct { + desc string + beacon types.Beacon // local beacon + ballots []types.Ballot + atxs []*types.ActivationTx + actives []types.ATXIDList + expect any + }{ + { + desc: "merged activesets", + beacon: types.Beacon{1}, + ballots: []types.Ballot{ + bgen( + types.BallotID{1}, + target.FirstLayer(), + types.NodeID{1}, + types.Beacon{1}, + []types.ATXID{{1}, {2}}, + ), + bgen( + types.BallotID{2}, + target.FirstLayer(), + types.NodeID{2}, + types.Beacon{1}, + []types.ATXID{{2}, {3}}, + ), + }, + atxs: []*types.ActivationTx{ + agen(types.ATXID{1}, types.NodeID{1}), + agen(types.ATXID{2}, types.NodeID{2}), + agen(types.ATXID{3}, types.NodeID{3}), + }, + actives: []types.ATXIDList{{{1}, {2}}, {{2}, {3}}}, + expect: []types.ATXID{{1}, {2}, {3}}, + }, + { + desc: "filter by beacon", + beacon: types.Beacon{1}, + ballots: []types.Ballot{ + bgen( + types.BallotID{1}, + target.FirstLayer(), + types.NodeID{1}, + types.Beacon{1}, + []types.ATXID{{1}, {2}}, + ), + bgen( + types.BallotID{2}, + target.FirstLayer(), + types.NodeID{2}, + types.Beacon{2, 2, 2, 2}, + []types.ATXID{{2}, {3}}, + ), + }, + atxs: []*types.ActivationTx{ + agen(types.ATXID{1}, types.NodeID{1}), + agen(types.ATXID{2}, types.NodeID{2}), + }, + actives: []types.ATXIDList{{{1}, {2}}, {{2}, {3}}}, + expect: []types.ATXID{{1}, {2}}, + }, + { + desc: "no local beacon", + beacon: types.EmptyBeacon, + ballots: []types.Ballot{ + bgen( + types.BallotID{1}, + target.FirstLayer(), + types.NodeID{1}, + types.Beacon{1}, + []types.ATXID{{1}, {2}}, + ), + bgen( + types.BallotID{2}, + target.FirstLayer(), + types.NodeID{2}, + types.Beacon{2, 2, 2, 2}, + []types.ATXID{{2}, {3}}, + ), + }, + atxs: []*types.ActivationTx{}, + actives: []types.ATXIDList{{{1}, {2}}, {{2}, {3}}}, + expect: "not found", + }, + { + desc: "unknown atxs", + beacon: types.Beacon{1}, + ballots: []types.Ballot{ + bgen( + types.BallotID{1}, + target.FirstLayer(), + types.NodeID{1}, + types.Beacon{1}, + []types.ATXID{{1}, {2}}, + ), + bgen( + types.BallotID{2}, + target.FirstLayer(), + types.NodeID{2}, + types.Beacon{2, 2, 2, 2}, + []types.ATXID{{2}, {3}}, + ), + }, + atxs: []*types.ActivationTx{}, + actives: []types.ATXIDList{{{1}, {2}}, {{2}, {3}}}, + expect: "missing atx in atxsdata", + }, + { + desc: "ballot no epoch data", + beacon: types.Beacon{1}, + ballots: []types.Ballot{ + bgen( + types.BallotID{1}, + target.FirstLayer(), + types.NodeID{1}, + types.Beacon{1}, + []types.ATXID{{1}, {2}}, + func(ballot *types.Ballot) { + ballot.EpochData = nil + }, + ), + bgen( + types.BallotID{2}, + target.FirstLayer(), + types.NodeID{2}, + types.Beacon{1}, + []types.ATXID{{2}, {3}}, + ), + }, + atxs: []*types.ActivationTx{ + agen(types.ATXID{2}, types.NodeID{2}), + agen(types.ATXID{3}, types.NodeID{3}), + }, + actives: []types.ATXIDList{{{2}, {3}}}, + expect: []types.ATXID{{2}, {3}}, + }, + { + desc: "wrong target epoch", + beacon: types.Beacon{1}, + ballots: []types.Ballot{ + bgen( + types.BallotID{1}, + target.FirstLayer(), + types.NodeID{1}, + types.Beacon{1}, + []types.ATXID{{2}}, + ), + }, + atxs: []*types.ActivationTx{ + agen(types.ATXID{2}, types.NodeID{1}, func(verified *types.ActivationTx) { + verified.PublishEpoch = target + }), + }, + actives: []types.ATXIDList{{{2}}}, + expect: "missing atx in atxsdata 4/0200000000", + }, + } { + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + oracle := defaultOracle(t) + for _, actives := range tc.actives { + require.NoError(t, activesets.Add(oracle.db, actives.Hash(), &types.EpochActiveSet{Set: actives})) + } + for _, ballot := range tc.ballots { + require.NoError(t, ballots.Add(oracle.db, &ballot)) + } + for _, atx := range tc.atxs { + require.NoError(t, atxs.Add(oracle.db, atx, types.AtxBlob{})) + oracle.atxsdata.AddFromAtx(atx, false) + } + if tc.beacon != types.EmptyBeacon { + oracle.mBeacon.EXPECT().GetBeacon(target).Return(tc.beacon, nil) + } else { + oracle.mBeacon.EXPECT().GetBeacon(target).Return(types.EmptyBeacon, sql.ErrNotFound) + } + rst, err := oracle.ActiveSet(context.TODO(), target) + + switch typed := tc.expect.(type) { + case []types.ATXID: + require.NoError(t, err) + require.ElementsMatch(t, typed, rst) + case string: + require.Empty(t, rst) + require.ErrorContains(t, err, typed) + default: + require.Failf(t, "unknown assert type", "%v", typed) + } + }) + } +} + +func TestResetCache(t *testing.T) { + oracle := defaultOracle(t) + ctrl := gomock.NewController(t) + + prev := oracle.activesCache + prev.Add(1, nil) + + oracle.resetCacheOnSynced(context.Background()) + require.Equal(t, prev, oracle.activesCache) + + sync := mocks.NewMockSyncStateProvider(ctrl) + oracle.SetSync(sync) + + sync.EXPECT().IsSynced(gomock.Any()).Return(false) + oracle.resetCacheOnSynced(context.Background()) + require.Equal(t, prev, oracle.activesCache) + + sync.EXPECT().IsSynced(gomock.Any()).Return(true) + oracle.resetCacheOnSynced(context.Background()) + require.NotEqual(t, prev, oracle.activesCache) + + prev = oracle.activesCache + prev.Add(1, nil) + + sync.EXPECT().IsSynced(gomock.Any()).Return(true) + oracle.resetCacheOnSynced(context.Background()) + require.Equal(t, prev, oracle.activesCache) +} + +func FuzzVrfMessageConsistency(f *testing.F) { + tester.FuzzConsistency[VrfMessage](f) +} + +func FuzzVrfMessageSafety(f *testing.F) { + tester.FuzzSafety[VrfMessage](f) +} diff --git a/hare4/hare.go b/hare4/hare.go new file mode 100644 index 0000000000..fcd049a926 --- /dev/null +++ b/hare4/hare.go @@ -0,0 +1,632 @@ +package hare3 + +import ( + "context" + "errors" + "fmt" + "math" + "sync" + "time" + + "github.com/jonboulle/clockwork" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "golang.org/x/exp/maps" + "golang.org/x/sync/errgroup" + + "github.com/spacemeshos/go-spacemesh/atxsdata" + "github.com/spacemeshos/go-spacemesh/codec" + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/layerpatrol" + "github.com/spacemeshos/go-spacemesh/log" + "github.com/spacemeshos/go-spacemesh/metrics" + "github.com/spacemeshos/go-spacemesh/p2p" + "github.com/spacemeshos/go-spacemesh/p2p/pubsub" + "github.com/spacemeshos/go-spacemesh/proposals/store" + "github.com/spacemeshos/go-spacemesh/signing" + "github.com/spacemeshos/go-spacemesh/sql" + "github.com/spacemeshos/go-spacemesh/sql/atxs" + "github.com/spacemeshos/go-spacemesh/sql/beacons" + "github.com/spacemeshos/go-spacemesh/sql/identities" + "github.com/spacemeshos/go-spacemesh/system" +) + +type CommitteeUpgrade struct { + Layer types.LayerID + Size uint16 +} + +type Config struct { + Enable bool `mapstructure:"enable"` + EnableLayer types.LayerID `mapstructure:"enable-layer"` + DisableLayer types.LayerID `mapstructure:"disable-layer"` + Committee uint16 `mapstructure:"committee"` + CommitteeUpgrade *CommitteeUpgrade + Leaders uint16 `mapstructure:"leaders"` + IterationsLimit uint8 `mapstructure:"iterations-limit"` + PreroundDelay time.Duration `mapstructure:"preround-delay"` + RoundDuration time.Duration `mapstructure:"round-duration"` + // LogStats if true will log iteration statistics with INFO level at the start of the next iteration. + // This requires additional computation and should be used for debugging only. + LogStats bool `mapstructure:"log-stats"` + ProtocolName string `mapstructure:"protocolname"` +} + +func (cfg *Config) CommitteeFor(layer types.LayerID) uint16 { + if cfg.CommitteeUpgrade != nil && layer >= cfg.CommitteeUpgrade.Layer { + return cfg.CommitteeUpgrade.Size + } + return cfg.Committee +} + +func (cfg *Config) Validate(zdist time.Duration) error { + terminates := cfg.roundStart(IterRound{Iter: cfg.IterationsLimit, Round: hardlock}) + if terminates > zdist { + return fmt.Errorf("hare terminates later (%v) than expected (%v)", terminates, zdist) + } + if cfg.Enable && cfg.DisableLayer <= cfg.EnableLayer { + return fmt.Errorf("disabled layer (%d) must be larger than enabled (%d)", + cfg.DisableLayer, cfg.EnableLayer) + } + return nil +} + +func (cfg *Config) MarshalLogObject(encoder zapcore.ObjectEncoder) error { + encoder.AddBool("enabled", cfg.Enable) + encoder.AddUint32("enabled layer", cfg.EnableLayer.Uint32()) + encoder.AddUint32("disabled layer", cfg.DisableLayer.Uint32()) + encoder.AddUint16("committee", cfg.Committee) + if cfg.CommitteeUpgrade != nil { + encoder.AddUint32("committee upgrade layer", cfg.CommitteeUpgrade.Layer.Uint32()) + encoder.AddUint16("committee upgrade size", cfg.CommitteeUpgrade.Size) + } + encoder.AddUint16("leaders", cfg.Leaders) + encoder.AddUint8("iterations limit", cfg.IterationsLimit) + encoder.AddDuration("preround delay", cfg.PreroundDelay) + encoder.AddDuration("round duration", cfg.RoundDuration) + encoder.AddBool("log stats", cfg.LogStats) + encoder.AddString("p2p protocol", cfg.ProtocolName) + return nil +} + +// roundStart returns expected time for iter/round relative to +// layer start. +func (cfg *Config) roundStart(round IterRound) time.Duration { + if round.Round == 0 { + return cfg.PreroundDelay + } + return cfg.PreroundDelay + time.Duration(round.Absolute()-1)*cfg.RoundDuration +} + +func DefaultConfig() Config { + return Config{ + // NOTE(talm) We aim for a 2^{-40} error probability; if the population at large has a 2/3 honest majority, + // we need a committee of size ~800 to guarantee this error rate (at least, + // this is what the Chernoff bound gives you; the actual value is a bit lower, + // so we can probably get away with a smaller committee). For a committee of size 400, + // the Chernoff bound gives 2^{-20} probability of a dishonest majority when 1/3 of the population is dishonest. + Committee: 800, + Leaders: 5, + IterationsLimit: 4, + PreroundDelay: 25 * time.Second, + RoundDuration: 12 * time.Second, + // can be bumped to 3.1 when oracle upgrades + ProtocolName: "/h/3.0", + DisableLayer: math.MaxUint32, + } +} + +type ConsensusOutput struct { + Layer types.LayerID + Proposals []types.ProposalID +} + +type WeakCoinOutput struct { + Layer types.LayerID + Coin bool +} + +type Opt func(*Hare) + +func WithWallclock(clock clockwork.Clock) Opt { + return func(hr *Hare) { + hr.wallclock = clock + } +} + +func WithConfig(cfg Config) Opt { + return func(hr *Hare) { + hr.config = cfg + hr.oracle.config = cfg + } +} + +func WithLogger(logger *zap.Logger) Opt { + return func(hr *Hare) { + hr.log = logger + hr.oracle.log = logger + } +} + +func WithTracer(tracer Tracer) Opt { + return func(hr *Hare) { + hr.tracer = tracer + } +} + +type nodeclock interface { + AwaitLayer(types.LayerID) <-chan struct{} + CurrentLayer() types.LayerID + LayerToTime(types.LayerID) time.Time +} + +func New( + nodeclock nodeclock, + pubsub pubsub.PublishSubsciber, + db *sql.Database, + atxsdata *atxsdata.Data, + proposals *store.Store, + verifier *signing.EdVerifier, + oracle oracle, + sync system.SyncStateProvider, + patrol *layerpatrol.LayerPatrol, + opts ...Opt, +) *Hare { + ctx, cancel := context.WithCancel(context.Background()) + hr := &Hare{ + ctx: ctx, + cancel: cancel, + results: make(chan ConsensusOutput, 32), + coins: make(chan WeakCoinOutput, 32), + signers: map[string]*signing.EdSigner{}, + sessions: map[types.LayerID]*protocol{}, + + config: DefaultConfig(), + log: zap.NewNop(), + wallclock: clockwork.NewRealClock(), + + nodeclock: nodeclock, + pubsub: pubsub, + db: db, + atxsdata: atxsdata, + proposals: proposals, + verifier: verifier, + oracle: &legacyOracle{ + log: zap.NewNop(), + oracle: oracle, + config: DefaultConfig(), + }, + sync: sync, + patrol: patrol, + tracer: noopTracer{}, + } + for _, opt := range opts { + opt(hr) + } + return hr +} + +type Hare struct { + // state + ctx context.Context + cancel context.CancelFunc + eg errgroup.Group + results chan ConsensusOutput + coins chan WeakCoinOutput + mu sync.Mutex + signers map[string]*signing.EdSigner + sessions map[types.LayerID]*protocol + + // options + config Config + log *zap.Logger + wallclock clockwork.Clock + + // dependencies + nodeclock nodeclock + pubsub pubsub.PublishSubsciber + db *sql.Database + atxsdata *atxsdata.Data + proposals *store.Store + verifier *signing.EdVerifier + oracle *legacyOracle + sync system.SyncStateProvider + patrol *layerpatrol.LayerPatrol + tracer Tracer +} + +func (h *Hare) Register(sig *signing.EdSigner) { + h.mu.Lock() + defer h.mu.Unlock() + h.log.Info("registered signing key", log.ZShortStringer("id", sig.NodeID())) + h.signers[string(sig.NodeID().Bytes())] = sig +} + +func (h *Hare) Results() <-chan ConsensusOutput { + return h.results +} + +func (h *Hare) Coins() <-chan WeakCoinOutput { + return h.coins +} + +func (h *Hare) Start() { + h.pubsub.Register(h.config.ProtocolName, h.Handler, pubsub.WithValidatorInline(true)) + current := h.nodeclock.CurrentLayer() + 1 + enabled := max(current, h.config.EnableLayer, types.GetEffectiveGenesis()+1) + disabled := types.LayerID(math.MaxUint32) + if h.config.DisableLayer > 0 { + disabled = h.config.DisableLayer + } + h.log.Info("started", + zap.Inline(&h.config), + zap.Uint32("enabled", enabled.Uint32()), + zap.Uint32("disabled", disabled.Uint32()), + ) + h.eg.Go(func() error { + for next := enabled; next < disabled; next++ { + select { + case <-h.nodeclock.AwaitLayer(next): + h.log.Debug("notified", zap.Uint32("lid", next.Uint32())) + h.onLayer(next) + case <-h.ctx.Done(): + return nil + } + } + return nil + }) +} + +func (h *Hare) Running() int { + h.mu.Lock() + defer h.mu.Unlock() + return len(h.sessions) +} + +func (h *Hare) Handler(ctx context.Context, peer p2p.Peer, buf []byte) error { + msg := &Message{} + if err := codec.Decode(buf, msg); err != nil { + malformedError.Inc() + return fmt.Errorf("%w: decoding error %s", pubsub.ErrValidationReject, err.Error()) + } + if err := msg.Validate(); err != nil { + malformedError.Inc() + return fmt.Errorf("%w: validation %s", pubsub.ErrValidationReject, err.Error()) + } + h.tracer.OnMessageReceived(msg) + h.mu.Lock() + session, registered := h.sessions[msg.Layer] + h.mu.Unlock() + if !registered { + notRegisteredError.Inc() + return fmt.Errorf("layer %d is not registered", msg.Layer) + } + if !h.verifier.Verify(signing.HARE, msg.Sender, msg.ToMetadata().ToBytes(), msg.Signature) { + signatureError.Inc() + return fmt.Errorf("%w: invalid signature", pubsub.ErrValidationReject) + } + malicious := h.atxsdata.IsMalicious(msg.Sender) + + start := time.Now() + g := h.oracle.validate(msg) + oracleLatency.Observe(time.Since(start).Seconds()) + if g == grade0 { + oracleError.Inc() + return errors.New("zero grade") + } + start = time.Now() + input := &input{ + Message: msg, + msgHash: msg.ToHash(), + malicious: malicious, + atxgrade: g, + } + h.log.Debug("on message", zap.Inline(input)) + gossip, equivocation := session.OnInput(input) + h.log.Debug("after on message", log.ZShortStringer("hash", input.msgHash), zap.Bool("gossip", gossip)) + submitLatency.Observe(time.Since(start).Seconds()) + if equivocation != nil && !malicious { + h.log.Debug("registered equivocation", + zap.Uint32("lid", msg.Layer.Uint32()), + zap.Stringer("sender", equivocation.Messages[0].SmesherID)) + proof := equivocation.ToMalfeasanceProof() + if err := identities.SetMalicious( + h.db, equivocation.Messages[0].SmesherID, codec.MustEncode(proof), time.Now()); err != nil { + h.log.Error("failed to save malicious identity", zap.Error(err)) + } + h.atxsdata.SetMalicious(equivocation.Messages[0].SmesherID) + } + if !gossip { + droppedMessages.Inc() + return errors.New("dropped by graded gossip") + } + expected := h.nodeclock.LayerToTime(msg.Layer).Add(h.config.roundStart(msg.IterRound)) + metrics.ReportMessageLatency(h.config.ProtocolName, msg.Round.String(), time.Since(expected)) + return nil +} + +func (h *Hare) onLayer(layer types.LayerID) { + h.proposals.OnLayer(layer) + if !h.sync.IsSynced(h.ctx) { + h.log.Debug("not synced", zap.Uint32("lid", layer.Uint32())) + return + } + beacon, err := beacons.Get(h.db, layer.GetEpoch()) + if err != nil || beacon == types.EmptyBeacon { + h.log.Debug("no beacon", + zap.Uint32("epoch", layer.GetEpoch().Uint32()), + zap.Uint32("lid", layer.Uint32()), + zap.Error(err), + ) + return + } + h.patrol.SetHareInCharge(layer) + + h.mu.Lock() + // signer can't join mid session + s := &session{ + lid: layer, + beacon: beacon, + signers: maps.Values(h.signers), + vrfs: make([]*types.HareEligibility, len(h.signers)), + proto: newProtocol(h.config.CommitteeFor(layer)/2 + 1), + } + h.sessions[layer] = s.proto + h.mu.Unlock() + + sessionStart.Inc() + h.tracer.OnStart(layer) + h.log.Debug("registered layer", zap.Uint32("lid", layer.Uint32())) + h.eg.Go(func() error { + if err := h.run(s); err != nil { + h.log.Warn("failed", + zap.Uint32("lid", layer.Uint32()), + zap.Error(err), + ) + exitErrors.Inc() + // if terminated successfully it will notify block generator + // and it will have to CompleteHare + h.patrol.CompleteHare(layer) + } else { + h.log.Debug("terminated", + zap.Uint32("lid", layer.Uint32()), + ) + } + h.mu.Lock() + delete(h.sessions, layer) + h.mu.Unlock() + sessionTerminated.Inc() + h.tracer.OnStop(layer) + return nil + }) +} + +func (h *Hare) run(session *session) error { + // oracle may load non-negligible amount of data from disk + // we do it before preround starts, so that load can have some slack time + // before it needs to be used in validation + var ( + current = IterRound{Round: preround} + start = time.Now() + active bool + ) + for i := range session.signers { + session.vrfs[i] = h.oracle.active(session.signers[i], session.beacon, session.lid, current) + active = active || session.vrfs[i] != nil + } + h.tracer.OnActive(session.vrfs) + activeLatency.Observe(time.Since(start).Seconds()) + + walltime := h.nodeclock.LayerToTime(session.lid).Add(h.config.PreroundDelay) + if active { + h.log.Debug("active in preround. waiting for preround delay", zap.Uint32("lid", session.lid.Uint32())) + // initial set is not needed if node is not active in preround + select { + case <-h.wallclock.After(walltime.Sub(h.wallclock.Now())): + case <-h.ctx.Done(): + return h.ctx.Err() + } + start := time.Now() + session.proto.OnInitial(h.selectProposals(session)) + proposalsLatency.Observe(time.Since(start).Seconds()) + } + if err := h.onOutput(session, current, session.proto.Next()); err != nil { + return err + } + result := false + for { + walltime = walltime.Add(h.config.RoundDuration) + current = session.proto.IterRound + start = time.Now() + + for i := range session.signers { + if current.IsMessageRound() { + session.vrfs[i] = h.oracle.active(session.signers[i], session.beacon, session.lid, current) + } else { + session.vrfs[i] = nil + } + } + h.tracer.OnActive(session.vrfs) + activeLatency.Observe(time.Since(start).Seconds()) + + select { + case <-h.wallclock.After(walltime.Sub(h.wallclock.Now())): + h.log.Debug("execute round", + zap.Uint32("lid", session.lid.Uint32()), + zap.Uint8("iter", session.proto.Iter), zap.Stringer("round", session.proto.Round), + zap.Bool("active", active), + ) + out := session.proto.Next() + if out.result != nil { + result = true + } + if err := h.onOutput(session, current, out); err != nil { + return err + } + // we are logginng stats 1 network delay after new iteration start + // so that we can receive notify messages from previous iteration + if session.proto.Round == softlock && h.config.LogStats { + h.log.Info("stats", zap.Uint32("lid", session.lid.Uint32()), zap.Inline(session.proto.Stats())) + } + if out.terminated { + if !result { + return errors.New("terminated without result") + } + return nil + } + if current.Iter == h.config.IterationsLimit { + return fmt.Errorf("hare failed to reach consensus in %d iterations", + h.config.IterationsLimit) + } + case <-h.ctx.Done(): + return nil + } + } +} + +func (h *Hare) onOutput(session *session, ir IterRound, out output) error { + for i, vrf := range session.vrfs { + if vrf == nil || out.message == nil { + continue + } + msg := *out.message // shallow copy + msg.Layer = session.lid + msg.Eligibility = *vrf + msg.Sender = session.signers[i].NodeID() + msg.Signature = session.signers[i].Sign(signing.HARE, msg.ToMetadata().ToBytes()) + if err := h.pubsub.Publish(h.ctx, h.config.ProtocolName, msg.ToBytes()); err != nil { + h.log.Error("failed to publish", zap.Inline(&msg), zap.Error(err)) + } + } + h.tracer.OnMessageSent(out.message) + h.log.Debug("round output", + zap.Uint32("lid", session.lid.Uint32()), + zap.Uint8("iter", ir.Iter), zap.Stringer("round", ir.Round), + zap.Inline(&out), + ) + if out.coin != nil { + select { + case <-h.ctx.Done(): + return h.ctx.Err() + case h.coins <- WeakCoinOutput{Layer: session.lid, Coin: *out.coin}: + } + sessionCoin.Inc() + } + if out.result != nil { + select { + case <-h.ctx.Done(): + return h.ctx.Err() + case h.results <- ConsensusOutput{Layer: session.lid, Proposals: out.result}: + } + sessionResult.Inc() + } + return nil +} + +func (h *Hare) selectProposals(session *session) []types.ProposalID { + h.log.Debug("requested proposals", + zap.Uint32("lid", session.lid.Uint32()), + zap.Stringer("beacon", session.beacon), + ) + + var ( + result []types.ProposalID + min *atxsdata.ATX + ) + target := session.lid.GetEpoch() + publish := target - 1 + for _, signer := range session.signers { + atxid, err := atxs.GetIDByEpochAndNodeID(h.db, publish, signer.NodeID()) + switch { + case errors.Is(err, sql.ErrNotFound): + // if atx is not registered for identity we will get sql.ErrNotFound + case err != nil: + h.log.Error("failed to get atx id by epoch and node id", zap.Error(err)) + return []types.ProposalID{} + default: + own := h.atxsdata.Get(target, atxid) + if min == nil || (min != nil && own != nil && own.Height < min.Height) { + min = own + } + } + } + if min == nil { + h.log.Debug("no atxs in the requested epoch", zap.Uint32("epoch", session.lid.GetEpoch().Uint32()-1)) + return []types.ProposalID{} + } + + candidates := h.proposals.GetForLayer(session.lid) + atxs := map[types.ATXID]int{} + for _, p := range candidates { + atxs[p.AtxID]++ + } + for _, p := range candidates { + if h.atxsdata.IsMalicious(p.SmesherID) || p.IsMalicious() { + h.log.Warn("not voting on proposal from malicious identity", + zap.Stringer("id", p.ID()), + ) + continue + } + // double check that a single smesher is not included twice + // theoretically it should never happen as it is covered + // by the malicious check above. + if n := atxs[p.AtxID]; n > 1 { + h.log.Error("proposal with same atx added several times in the recorded set", + zap.Int("n", n), + zap.Stringer("id", p.ID()), + zap.Stringer("atxid", p.AtxID), + ) + continue + } + header := h.atxsdata.Get(target, p.AtxID) + if header == nil { + h.log.Error("atx is not loaded", zap.Stringer("atxid", p.AtxID)) + return []types.ProposalID{} + } + if header.BaseHeight >= min.Height { + // does not vote for future proposal + h.log.Warn("proposal base tick height too high. skipping", + zap.Uint32("lid", session.lid.Uint32()), + zap.Uint64("proposal_height", header.BaseHeight), + zap.Uint64("min_height", min.Height), + ) + continue + } + + if p.Beacon() == session.beacon { + result = append(result, p.ID()) + } else { + h.log.Warn("proposal has different beacon value", + zap.Uint32("lid", session.lid.Uint32()), + zap.Stringer("id", p.ID()), + zap.String("proposal_beacon", p.Beacon().ShortString()), + zap.String("epoch_beacon", session.beacon.ShortString()), + ) + } + } + return result +} + +func (h *Hare) IsKnown(layer types.LayerID, proposal types.ProposalID) bool { + return h.proposals.Get(layer, proposal) != nil +} + +func (h *Hare) OnProposal(p *types.Proposal) error { + return h.proposals.Add(p) +} + +func (h *Hare) Stop() { + h.cancel() + h.eg.Wait() + close(h.results) + close(h.coins) + h.log.Info("stopped") +} + +type session struct { + proto *protocol + lid types.LayerID + beacon types.Beacon + signers []*signing.EdSigner + vrfs []*types.HareEligibility +} diff --git a/hare4/hare_test.go b/hare4/hare_test.go new file mode 100644 index 0000000000..dd8139ab12 --- /dev/null +++ b/hare4/hare_test.go @@ -0,0 +1,972 @@ +package hare3 + +import ( + "context" + "fmt" + "math/rand" + "os" + "runtime/pprof" + "strings" + "sync" + "testing" + "time" + + "github.com/jonboulle/clockwork" + "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest" + + "github.com/spacemeshos/go-spacemesh/atxsdata" + "github.com/spacemeshos/go-spacemesh/codec" + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/hare3/eligibility" + "github.com/spacemeshos/go-spacemesh/layerpatrol" + "github.com/spacemeshos/go-spacemesh/log/logtest" + "github.com/spacemeshos/go-spacemesh/p2p/pubsub" + pmocks "github.com/spacemeshos/go-spacemesh/p2p/pubsub/mocks" + "github.com/spacemeshos/go-spacemesh/proposals/store" + "github.com/spacemeshos/go-spacemesh/signing" + "github.com/spacemeshos/go-spacemesh/sql" + "github.com/spacemeshos/go-spacemesh/sql/atxs" + "github.com/spacemeshos/go-spacemesh/sql/ballots" + "github.com/spacemeshos/go-spacemesh/sql/beacons" + "github.com/spacemeshos/go-spacemesh/sql/identities" + smocks "github.com/spacemeshos/go-spacemesh/system/mocks" +) + +const layersPerEpoch = 4 + +func TestMain(m *testing.M) { + types.SetLayersPerEpoch(layersPerEpoch) + res := m.Run() + os.Exit(res) +} + +type tester struct { + testing.TB + + rng *rand.Rand + start time.Time + cfg Config + layerDuration time.Duration + beacon types.Beacon + genesis types.LayerID +} + +type waiter struct { + lid types.LayerID + ch chan struct{} +} + +// timesync.Nodeclock time can't be mocked nicely because of ticks. +type testNodeClock struct { + mu sync.Mutex + started types.LayerID + waiters []waiter + + genesis time.Time + layerDuration time.Duration +} + +func (t *testNodeClock) CurrentLayer() types.LayerID { + t.mu.Lock() + defer t.mu.Unlock() + return t.started +} + +func (t *testNodeClock) LayerToTime(lid types.LayerID) time.Time { + return t.genesis.Add(time.Duration(lid) * t.layerDuration) +} + +func (t *testNodeClock) StartLayer(lid types.LayerID) { + t.mu.Lock() + defer t.mu.Unlock() + t.started = lid + for _, w := range t.waiters { + if w.lid <= lid { + select { + case <-w.ch: + default: + close(w.ch) + } + } + } +} + +func (t *testNodeClock) AwaitLayer(lid types.LayerID) <-chan struct{} { + t.mu.Lock() + defer t.mu.Unlock() + ch := make(chan struct{}) + if lid <= t.started { + close(ch) + return ch + } + t.waiters = append(t.waiters, waiter{lid: lid, ch: ch}) + return ch +} + +type node struct { + t *tester + + i int + clock clockwork.FakeClock + nclock *testNodeClock + signer *signing.EdSigner + registered []*signing.EdSigner + vrfsigner *signing.VRFSigner + atx *types.ActivationTx + oracle *eligibility.Oracle + db *sql.Database + atxsdata *atxsdata.Data + proposals *store.Store + + ctrl *gomock.Controller + mpublisher *pmocks.MockPublishSubsciber + msyncer *smocks.MockSyncStateProvider + patrol *layerpatrol.LayerPatrol + tracer *testTracer + hare *Hare +} + +func (n *node) withClock() *node { + n.clock = clockwork.NewFakeClockAt(n.t.start) + return n +} + +func (n *node) withSigner() *node { + signer, err := signing.NewEdSigner(signing.WithKeyFromRand(n.t.rng)) + require.NoError(n.t, err) + n.signer = signer + n.vrfsigner = signer.VRFSigner() + return n +} + +func (n *node) reuseSigner(signer *signing.EdSigner) *node { + n.signer = signer + n.vrfsigner = signer.VRFSigner() + return n +} + +func (n *node) withDb() *node { + n.db = sql.InMemory() + n.atxsdata = atxsdata.New() + n.proposals = store.New() + return n +} + +func (n *node) withAtx(min, max int) *node { + atx := &types.ActivationTx{ + PublishEpoch: n.t.genesis.GetEpoch(), + TickCount: 1, + SmesherID: n.signer.NodeID(), + } + if max-min > 0 { + atx.NumUnits = uint32(n.t.rng.Intn(max-min) + min) + } else { + atx.NumUnits = uint32(min) + } + atx.Weight = uint64(atx.NumUnits) * atx.TickCount + id := types.ATXID{} + n.t.rng.Read(id[:]) + atx.SetID(id) + atx.SetReceived(n.t.start) + atx.VRFNonce = types.VRFPostIndex(n.t.rng.Uint64()) + + n.atx = atx + return n +} + +func (n *node) withController() *node { + n.ctrl = gomock.NewController(n.t) + return n +} + +func (n *node) withSyncer() *node { + n.msyncer = smocks.NewMockSyncStateProvider(n.ctrl) + n.msyncer.EXPECT().IsSynced(gomock.Any()).Return(true).AnyTimes() + return n +} + +func (n *node) withOracle() *node { + beaconget := smocks.NewMockBeaconGetter(n.ctrl) + beaconget.EXPECT().GetBeacon(gomock.Any()).DoAndReturn(func(epoch types.EpochID) (types.Beacon, error) { + return beacons.Get(n.db, epoch) + }).AnyTimes() + n.oracle = eligibility.New( + beaconget, + n.db, + n.atxsdata, + signing.NewVRFVerifier(), + layersPerEpoch, + ) + return n +} + +func (n *node) withPublisher() *node { + n.mpublisher = pmocks.NewMockPublishSubsciber(n.ctrl) + n.mpublisher.EXPECT().Register(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes() + return n +} + +func (n *node) withHare() *node { + logger := logtest.New(n.t).Named(fmt.Sprintf("hare=%d", n.i)) + + n.nclock = &testNodeClock{ + genesis: n.t.start, + layerDuration: n.t.layerDuration, + } + tracer := newTestTracer(n.t) + n.tracer = tracer + n.patrol = layerpatrol.New() + n.hare = New( + n.nclock, + n.mpublisher, + n.db, + n.atxsdata, + n.proposals, + signing.NewEdVerifier(), + n.oracle, + n.msyncer, + n.patrol, + WithConfig(n.t.cfg), + WithLogger(logger.Zap()), + WithWallclock(n.clock), + WithTracer(tracer), + ) + n.register(n.signer) + return n +} + +func (n *node) waitEligibility() { + n.tracer.waitEligibility() +} + +func (n *node) waitSent() { + n.tracer.waitSent() +} + +func (n *node) register(signer *signing.EdSigner) { + n.hare.Register(signer) + n.registered = append(n.registered, signer) +} + +func (n *node) storeAtx(atx *types.ActivationTx) error { + if err := atxs.Add(n.db, atx, types.AtxBlob{}); err != nil { + return err + } + n.atxsdata.AddFromAtx(atx, false) + return nil +} + +type clusterOpt func(*lockstepCluster) + +func withUnits(min, max int) clusterOpt { + return func(cluster *lockstepCluster) { + cluster.units.min = min + cluster.units.max = max + } +} + +func withProposals(fraction float64) clusterOpt { + return func(cluster *lockstepCluster) { + cluster.proposals.fraction = fraction + cluster.proposals.shuffle = true + } +} + +// withSigners creates N signers in addition to regular active nodes. +// this signeres will be partitioned in fair fashion across regular active nodes. +func withSigners(n int) clusterOpt { + return func(cluster *lockstepCluster) { + cluster.signersCount = n + } +} + +func newLockstepCluster(t *tester, opts ...clusterOpt) *lockstepCluster { + cluster := &lockstepCluster{t: t} + cluster.units.min = 10 + cluster.units.max = 10 + cluster.proposals.fraction = 1 + cluster.proposals.shuffle = false + for _, opt := range opts { + opt(cluster) + } + return cluster +} + +// lockstepCluster allows to run rounds in lockstep +// as no peer will be able to start around until test allows it. +type lockstepCluster struct { + t *tester + nodes []*node + signers []*node // nodes that active on consensus but don't run hare instance + + units struct { + min, max int + } + proposals struct { + fraction float64 + shuffle bool + } + signersCount int + + timestamp time.Time +} + +func (cl *lockstepCluster) addNode(n *node) { + n.hare.Start() + cl.t.Cleanup(func() { + n.hare.Stop() + }) + cl.nodes = append(cl.nodes, n) +} + +func (cl *lockstepCluster) partitionSigners() { + for i, signer := range cl.signers { + cl.nodes[i%len(cl.nodes)].register(signer.signer) + } +} + +func (cl *lockstepCluster) addSigner(n int) *lockstepCluster { + last := len(cl.signers) + for i := last; i < last+n; i++ { + n := (&node{t: cl.t, i: i}).withSigner().withAtx(cl.units.min, cl.units.max) + cl.signers = append(cl.signers, n) + } + return cl +} + +func (cl *lockstepCluster) addActive(n int) *lockstepCluster { + last := len(cl.nodes) + for i := last; i < last+n; i++ { + cl.addNode((&node{t: cl.t, i: i}). + withController().withSyncer().withPublisher(). + withClock().withDb().withSigner().withAtx(cl.units.min, cl.units.max). + withOracle().withHare()) + } + return cl +} + +func (cl *lockstepCluster) addInactive(n int) *lockstepCluster { + last := len(cl.nodes) + for i := last; i < last+n; i++ { + cl.addNode((&node{t: cl.t, i: i}). + withController().withSyncer().withPublisher(). + withClock().withDb().withSigner(). + withOracle().withHare()) + } + return cl +} + +func (cl *lockstepCluster) addEquivocators(n int) *lockstepCluster { + require.LessOrEqual(cl.t, n, len(cl.nodes)) + last := len(cl.nodes) + for i := last; i < last+n; i++ { + cl.addNode((&node{t: cl.t, i: i}). + reuseSigner(cl.nodes[i-last].signer). + withController().withSyncer().withPublisher(). + withClock().withDb().withAtx(cl.units.min, cl.units.max). + withOracle().withHare()) + } + return cl +} + +func (cl *lockstepCluster) nogossip() { + for _, n := range cl.nodes { + require.NoError(cl.t, beacons.Add(n.db, cl.t.genesis.GetEpoch()+1, cl.t.beacon)) + n.mpublisher.EXPECT().Publish(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes() + } +} + +func (cl *lockstepCluster) activeSet() types.ATXIDList { + var ids []types.ATXID + unique := map[types.ATXID]struct{}{} + for _, n := range append(cl.nodes, cl.signers...) { + if n.atx == nil { + continue + } + if _, exists := unique[n.atx.ID()]; exists { + continue + } + unique[n.atx.ID()] = struct{}{} + ids = append(ids, n.atx.ID()) + } + return ids +} + +func (cl *lockstepCluster) genProposals(lid types.LayerID) { + active := cl.activeSet() + all := []*types.Proposal{} + for _, n := range append(cl.nodes, cl.signers...) { + if n.atx == nil { + continue + } + proposal := &types.Proposal{} + proposal.Layer = lid + proposal.EpochData = &types.EpochData{ + Beacon: cl.t.beacon, + ActiveSetHash: active.Hash(), + } + proposal.AtxID = n.atx.ID() + proposal.SmesherID = n.signer.NodeID() + id := types.ProposalID{} + cl.t.rng.Read(id[:]) + bid := types.BallotID{} + cl.t.rng.Read(bid[:]) + proposal.SetID(id) + proposal.Ballot.SetID(bid) + proposal.SetBeacon(proposal.EpochData.Beacon) + all = append(all, proposal) + } + for _, other := range cl.nodes { + cp := make([]*types.Proposal, len(all)) + copy(cp, all) + if cl.proposals.shuffle { + cl.t.rng.Shuffle(len(cp), func(i, j int) { + cp[i], cp[j] = cp[j], cp[i] + }) + } + for _, proposal := range cp[:int(float64(len(cp))*cl.proposals.fraction)] { + require.NoError(cl.t, ballots.Add(other.db, &proposal.Ballot)) + other.hare.OnProposal(proposal) + } + } +} + +func (cl *lockstepCluster) setup() { + active := cl.activeSet() + for _, n := range cl.nodes { + require.NoError(cl.t, beacons.Add(n.db, cl.t.genesis.GetEpoch()+1, cl.t.beacon)) + for _, other := range append(cl.nodes, cl.signers...) { + if other.atx == nil { + continue + } + require.NoError(cl.t, n.storeAtx(other.atx)) + } + n.oracle.UpdateActiveSet(cl.t.genesis.GetEpoch()+1, active) + n.mpublisher.EXPECT(). + Publish(gomock.Any(), gomock.Any(), gomock.Any()). + Do(func(ctx context.Context, _ string, msg []byte) error { + for _, other := range cl.nodes { + other.hare.Handler(ctx, "self", msg) + } + return nil + }). + AnyTimes() + } +} + +func (cl *lockstepCluster) movePreround(layer types.LayerID) { + cl.timestamp = cl.t.start. + Add(cl.t.layerDuration * time.Duration(layer)). + Add(cl.t.cfg.PreroundDelay) + for _, n := range cl.nodes { + n.nclock.StartLayer(layer) + n.clock.Advance(cl.timestamp.Sub(n.clock.Now())) + } + for _, n := range cl.nodes { + n.waitEligibility() + } + for _, n := range cl.nodes { + n.waitSent() + } +} + +func (cl *lockstepCluster) moveRound() { + cl.timestamp = cl.timestamp.Add(cl.t.cfg.RoundDuration) + for _, n := range cl.nodes { + n.clock.Advance(cl.timestamp.Sub(n.clock.Now())) + } + for _, n := range cl.nodes { + n.waitEligibility() + } + for _, n := range cl.nodes { + n.waitSent() + } +} + +func (cl *lockstepCluster) waitStopped() { + for _, n := range cl.nodes { + n.tracer.waitStopped() + } +} + +func newTestTracer(tb testing.TB) *testTracer { + return &testTracer{ + TB: tb, + stopped: make(chan types.LayerID, 100), + eligibility: make(chan []*types.HareEligibility), + sent: make(chan *Message), + } +} + +type testTracer struct { + testing.TB + stopped chan types.LayerID + eligibility chan []*types.HareEligibility + sent chan *Message +} + +func waitForChan[T any](t testing.TB, ch <-chan T, timeout time.Duration, failureMsg string) T { + var value T + select { + case <-time.After(timeout): + builder := strings.Builder{} + pprof.Lookup("goroutine").WriteTo(&builder, 2) + t.Fatalf(failureMsg+", waited: %v, stacktraces:\n%s", timeout, builder.String()) + case value = <-ch: + } + return value +} + +func sendWithTimeout[T any](t testing.TB, value T, ch chan<- T, timeout time.Duration, failureMsg string) { + select { + case <-time.After(timeout): + builder := strings.Builder{} + pprof.Lookup("goroutine").WriteTo(&builder, 2) + t.Fatalf(failureMsg+", waited: %v, stacktraces:\n%s", timeout, builder.String()) + case ch <- value: + } +} + +func (t *testTracer) waitStopped() types.LayerID { + return waitForChan(t.TB, t.stopped, 10*time.Second, "didn't stop") +} + +func (t *testTracer) waitEligibility() []*types.HareEligibility { + return waitForChan(t.TB, t.eligibility, 10*time.Second, "no eligibility") +} + +func (t *testTracer) waitSent() *Message { + return waitForChan(t.TB, t.sent, 10*time.Second, "no message") +} + +func (*testTracer) OnStart(types.LayerID) {} + +func (t *testTracer) OnStop(lid types.LayerID) { + select { + case t.stopped <- lid: + default: + } +} + +func (t *testTracer) OnActive(el []*types.HareEligibility) { + sendWithTimeout(t.TB, el, t.eligibility, 10*time.Second, "eligibility can't be sent") +} + +func (t *testTracer) OnMessageSent(m *Message) { + sendWithTimeout(t.TB, m, t.sent, 10*time.Second, "message can't be sent") +} + +func (*testTracer) OnMessageReceived(*Message) {} + +func testHare(t *testing.T, active, inactive, equivocators int, opts ...clusterOpt) { + cfg := DefaultConfig() + cfg.LogStats = true + tst := &tester{ + TB: t, + rng: rand.New(rand.NewSource(1001)), + start: time.Now(), + cfg: cfg, + layerDuration: 5 * time.Minute, + beacon: types.Beacon{1, 1, 1, 1}, + genesis: types.GetEffectiveGenesis(), + } + cluster := newLockstepCluster(tst, opts...). + addActive(active). + addInactive(inactive). + addEquivocators(equivocators) + if cluster.signersCount > 0 { + cluster = cluster.addSigner(cluster.signersCount) + cluster.partitionSigners() + } + + layer := tst.genesis + 1 + cluster.setup() + cluster.genProposals(layer) + cluster.movePreround(layer) + for i := 0; i < 2*int(notify); i++ { + cluster.moveRound() + } + var consistent []types.ProposalID + cluster.waitStopped() + for _, n := range cluster.nodes { + select { + case coin := <-n.hare.Coins(): + require.Equal(t, coin.Layer, layer) + default: + require.FailNow(t, "no coin") + } + select { + case rst := <-n.hare.Results(): + require.Equal(t, rst.Layer, layer) + require.NotEmpty(t, rst.Proposals) + if consistent == nil { + consistent = rst.Proposals + } else { + require.Equal(t, consistent, rst.Proposals) + } + default: + require.FailNow(t, "no result") + } + require.Empty(t, n.hare.Running()) + } +} + +func TestHare(t *testing.T) { + t.Run("one", func(t *testing.T) { testHare(t, 1, 0, 0) }) + t.Run("two", func(t *testing.T) { testHare(t, 2, 0, 0) }) + t.Run("small", func(t *testing.T) { testHare(t, 5, 0, 0) }) + t.Run("with proposals subsets", func(t *testing.T) { testHare(t, 5, 0, 0, withProposals(0.5)) }) + t.Run("with units", func(t *testing.T) { testHare(t, 5, 0, 0, withUnits(10, 50)) }) + t.Run("with inactive", func(t *testing.T) { testHare(t, 3, 2, 0) }) + t.Run("equivocators", func(t *testing.T) { testHare(t, 4, 0, 1, withProposals(0.75)) }) + t.Run("one active multi signers", func(t *testing.T) { testHare(t, 1, 0, 0, withSigners(2)) }) + t.Run("three active multi signers", func(t *testing.T) { testHare(t, 3, 0, 0, withSigners(10)) }) +} + +func TestIterationLimit(t *testing.T) { + t.Parallel() + tst := &tester{ + TB: t, + rng: rand.New(rand.NewSource(1001)), + start: time.Now(), + cfg: DefaultConfig(), + layerDuration: 5 * time.Minute, + beacon: types.Beacon{1, 1, 1, 1}, + genesis: types.GetEffectiveGenesis(), + } + tst.cfg.IterationsLimit = 3 + + layer := tst.genesis + 1 + cluster := newLockstepCluster(tst) + cluster.addActive(1) + cluster.nogossip() + cluster.movePreround(layer) + for i := 0; i < int(tst.cfg.IterationsLimit)*int(notify); i++ { + cluster.moveRound() + } + cluster.waitStopped() + require.Empty(t, cluster.nodes[0].hare.Running()) + require.False(t, cluster.nodes[0].patrol.IsHareInCharge(layer)) +} + +func TestConfigMarshal(t *testing.T) { + enc := zapcore.NewMapObjectEncoder() + cfg := &Config{} + require.NoError(t, cfg.MarshalLogObject(enc)) +} + +func TestHandler(t *testing.T) { + t.Parallel() + tst := &tester{ + TB: t, + rng: rand.New(rand.NewSource(1001)), + start: time.Now(), + cfg: DefaultConfig(), + layerDuration: 5 * time.Minute, + beacon: types.Beacon{1, 1, 1, 1}, + genesis: types.GetEffectiveGenesis(), + } + cluster := newLockstepCluster(tst) + cluster.addActive(1) + n := cluster.nodes[0] + require.NoError(t, beacons.Add(n.db, tst.genesis.GetEpoch()+1, tst.beacon)) + require.NoError(t, n.storeAtx(n.atx)) + n.oracle.UpdateActiveSet(tst.genesis.GetEpoch()+1, []types.ATXID{n.atx.ID()}) + n.mpublisher.EXPECT().Publish(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes() + layer := tst.genesis + 1 + n.nclock.StartLayer(layer) + n.clock.Advance((tst.start. + Add(tst.layerDuration * time.Duration(layer)). + Add(tst.cfg.PreroundDelay)).Sub(n.clock.Now())) + elig := n.tracer.waitEligibility()[0] + + n.tracer.waitSent() + n.tracer.waitEligibility() + + t.Run("malformed", func(t *testing.T) { + require.ErrorIs(t, n.hare.Handler(context.Background(), "", []byte("malformed")), + pubsub.ErrValidationReject) + require.ErrorContains(t, n.hare.Handler(context.Background(), "", []byte("malformed")), + "decoding") + }) + t.Run("invalidated", func(t *testing.T) { + msg := &Message{} + msg.Round = commit + require.ErrorIs(t, n.hare.Handler(context.Background(), "", codec.MustEncode(msg)), + pubsub.ErrValidationReject) + require.ErrorContains(t, n.hare.Handler(context.Background(), "", codec.MustEncode(msg)), + "validation reference") + }) + t.Run("unregistered", func(t *testing.T) { + msg := &Message{} + require.ErrorContains(t, n.hare.Handler(context.Background(), "", codec.MustEncode(msg)), + "is not registered") + }) + t.Run("invalid signature", func(t *testing.T) { + msg := &Message{} + msg.Layer = layer + msg.Sender = n.signer.NodeID() + msg.Signature = n.signer.Sign(signing.HARE+1, msg.ToMetadata().ToBytes()) + require.ErrorIs(t, n.hare.Handler(context.Background(), "", codec.MustEncode(msg)), + pubsub.ErrValidationReject) + require.ErrorContains(t, n.hare.Handler(context.Background(), "", codec.MustEncode(msg)), + "invalid signature") + }) + t.Run("zero grade", func(t *testing.T) { + signer, err := signing.NewEdSigner() + require.NoError(t, err) + msg := &Message{} + msg.Layer = layer + msg.Sender = signer.NodeID() + msg.Signature = signer.Sign(signing.HARE, msg.ToMetadata().ToBytes()) + require.ErrorContains(t, n.hare.Handler(context.Background(), "", codec.MustEncode(msg)), + "zero grade") + }) + t.Run("equivocation", func(t *testing.T) { + msg1 := &Message{} + msg1.Layer = layer + msg1.Value.Proposals = []types.ProposalID{{1}} + msg1.Eligibility = *elig + msg1.Sender = n.signer.NodeID() + msg1.Signature = n.signer.Sign(signing.HARE, msg1.ToMetadata().ToBytes()) + + msg2 := &Message{} + msg2.Layer = layer + msg2.Value.Proposals = []types.ProposalID{{2}} + msg2.Eligibility = *elig + msg2.Sender = n.signer.NodeID() + msg2.Signature = n.signer.Sign(signing.HARE, msg2.ToMetadata().ToBytes()) + + require.NoError(t, n.hare.Handler(context.Background(), "", codec.MustEncode(msg1))) + require.NoError(t, n.hare.Handler(context.Background(), "", codec.MustEncode(msg2))) + + malicious, err := identities.IsMalicious(n.db, n.signer.NodeID()) + require.NoError(t, err) + require.True(t, malicious) + + require.ErrorContains(t, + n.hare.Handler(context.Background(), "", codec.MustEncode(msg2)), + "dropped by graded", + ) + }) +} + +func gatx(id types.ATXID, epoch types.EpochID, smesher types.NodeID, base, height uint64) types.ActivationTx { + atx := &types.ActivationTx{ + NumUnits: 10, + PublishEpoch: epoch, + VRFNonce: 1, + BaseTickHeight: base, + TickCount: height - base, + SmesherID: smesher, + } + atx.SetID(id) + atx.SetReceived(time.Time{}.Add(1)) + return *atx +} + +func gproposal( + id types.ProposalID, + atxid types.ATXID, + smesher types.NodeID, + layer types.LayerID, + beacon types.Beacon, +) *types.Proposal { + proposal := types.Proposal{} + proposal.Layer = layer + proposal.EpochData = &types.EpochData{ + Beacon: beacon, + } + proposal.AtxID = atxid + proposal.SmesherID = smesher + proposal.Ballot.SmesherID = smesher + proposal.SetID(id) + proposal.Ballot.SetID(types.BallotID(id)) + proposal.SetBeacon(beacon) + return &proposal +} + +func TestProposals(t *testing.T) { + atxids := [3]types.ATXID{} + pids := [3]types.ProposalID{} + ids := [3]types.NodeID{} + for i := range atxids { + atxids[i][0] = byte(i) + 1 + pids[i][0] = byte(i) + 1 + ids[i][0] = byte(i) + 1 + } + publish := types.EpochID(1) + layer := (publish + 1).FirstLayer() + goodBeacon := types.Beacon{1} + badBeacon := types.Beacon{2} + + signer, err := signing.NewEdSigner() + require.NoError(t, err) + for _, tc := range []struct { + desc string + atxs []types.ActivationTx + proposals []*types.Proposal + malicious []types.NodeID + layer types.LayerID + beacon types.Beacon + expect []types.ProposalID + }{ + { + desc: "sanity", + layer: layer, + beacon: goodBeacon, + atxs: []types.ActivationTx{ + gatx(atxids[0], publish, ids[0], 10, 100), + gatx(atxids[1], publish, ids[1], 10, 100), + gatx(atxids[2], publish, signer.NodeID(), 10, 100), + }, + proposals: []*types.Proposal{ + gproposal(pids[0], atxids[0], ids[0], layer, goodBeacon), + gproposal(pids[1], atxids[1], ids[1], layer, goodBeacon), + }, + expect: []types.ProposalID{pids[0], pids[1]}, + }, + { + desc: "mismatched beacon", + layer: layer, + beacon: goodBeacon, + atxs: []types.ActivationTx{ + gatx(atxids[0], publish, ids[0], 10, 100), + gatx(atxids[1], publish, ids[1], 10, 100), + gatx(atxids[2], publish, signer.NodeID(), 10, 100), + }, + proposals: []*types.Proposal{ + gproposal(pids[0], atxids[0], ids[0], layer, goodBeacon), + gproposal(pids[1], atxids[1], ids[1], layer, badBeacon), + }, + expect: []types.ProposalID{pids[0]}, + }, + { + desc: "multiproposals", + layer: layer, + beacon: goodBeacon, + atxs: []types.ActivationTx{ + gatx(atxids[0], publish, ids[0], 10, 100), + gatx(atxids[1], publish, ids[1], 10, 100), + gatx(atxids[2], publish, signer.NodeID(), 10, 100), + }, + proposals: []*types.Proposal{ + gproposal(pids[0], atxids[0], ids[0], layer, goodBeacon), + gproposal(pids[1], atxids[1], ids[1], layer, goodBeacon), + gproposal(pids[2], atxids[1], ids[1], layer, goodBeacon), + }, + expect: []types.ProposalID{pids[0]}, + }, + { + desc: "future proposal", + layer: layer, + beacon: goodBeacon, + atxs: []types.ActivationTx{ + gatx(atxids[0], publish, ids[0], 101, 1000), + gatx(atxids[1], publish, signer.NodeID(), 10, 100), + }, + proposals: []*types.Proposal{ + gproposal(pids[0], atxids[0], ids[0], layer, goodBeacon), + gproposal(pids[1], atxids[1], ids[1], layer, goodBeacon), + }, + expect: []types.ProposalID{pids[1]}, + }, + { + desc: "malicious", + layer: layer, + beacon: goodBeacon, + atxs: []types.ActivationTx{ + gatx(atxids[0], publish, ids[0], 10, 100), + gatx(atxids[1], publish, ids[1], 10, 100), + gatx(atxids[2], publish, signer.NodeID(), 10, 100), + }, + proposals: []*types.Proposal{ + gproposal(pids[0], atxids[0], ids[0], layer, goodBeacon), + gproposal(pids[1], atxids[1], ids[1], layer, goodBeacon), + }, + malicious: []types.NodeID{ids[0]}, + expect: []types.ProposalID{pids[1]}, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + db := sql.InMemory() + atxsdata := atxsdata.New() + proposals := store.New() + hare := New( + nil, + nil, + db, + atxsdata, + proposals, + nil, + nil, + nil, + layerpatrol.New(), + WithLogger(zaptest.NewLogger(t)), + ) + for _, atx := range tc.atxs { + require.NoError(t, atxs.Add(db, &atx, types.AtxBlob{})) + atxsdata.AddFromAtx(&atx, false) + } + for _, proposal := range tc.proposals { + proposals.Add(proposal) + } + for _, id := range tc.malicious { + require.NoError(t, identities.SetMalicious(db, id, []byte("non empty"), time.Time{})) + atxsdata.SetMalicious(id) + } + require.ElementsMatch(t, tc.expect, hare.selectProposals(&session{ + lid: tc.layer, + beacon: tc.beacon, + signers: []*signing.EdSigner{signer}, + })) + }) + } +} + +func TestHare_AddProposal(t *testing.T) { + t.Parallel() + proposals := store.New() + hare := New(nil, nil, nil, nil, proposals, nil, nil, nil, nil) + + p := gproposal( + types.RandomProposalID(), + types.RandomATXID(), + types.RandomNodeID(), + types.LayerID(0), + types.RandomBeacon(), + ) + require.False(t, hare.IsKnown(p.Layer, p.ID())) + require.NoError(t, hare.OnProposal(p)) + require.True(t, proposals.Has(p.ID())) + + require.True(t, hare.IsKnown(p.Layer, p.ID())) + require.ErrorIs(t, hare.OnProposal(p), store.ErrProposalExists) +} + +func TestHareConfig_CommitteeUpgrade(t *testing.T) { + t.Parallel() + t.Run("no upgrade", func(t *testing.T) { + cfg := Config{ + Committee: 400, + } + require.Equal(t, cfg.Committee, cfg.CommitteeFor(0)) + require.Equal(t, cfg.Committee, cfg.CommitteeFor(100)) + }) + t.Run("upgrade", func(t *testing.T) { + cfg := Config{ + Committee: 400, + CommitteeUpgrade: &CommitteeUpgrade{ + Layer: 16, + Size: 50, + }, + } + require.EqualValues(t, cfg.Committee, cfg.CommitteeFor(0)) + require.EqualValues(t, cfg.Committee, cfg.CommitteeFor(15)) + require.EqualValues(t, 50, cfg.CommitteeFor(16)) + require.EqualValues(t, 50, cfg.CommitteeFor(100)) + }) +} diff --git a/hare4/legacy_oracle.go b/hare4/legacy_oracle.go new file mode 100644 index 0000000000..07be7f77a4 --- /dev/null +++ b/hare4/legacy_oracle.go @@ -0,0 +1,70 @@ +package hare3 + +import ( + "context" + "errors" + + "go.uber.org/zap" + + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/hare3/eligibility" + "github.com/spacemeshos/go-spacemesh/signing" +) + +type oracle interface { + Validate(context.Context, types.LayerID, uint32, int, types.NodeID, types.VrfSignature, uint16) (bool, error) + CalcEligibility(context.Context, types.LayerID, uint32, int, types.NodeID, types.VrfSignature) (uint16, error) +} + +type legacyOracle struct { + log *zap.Logger + oracle oracle + config Config +} + +func (lg *legacyOracle) validate(msg *Message) grade { + if msg.Eligibility.Count == 0 { + return grade0 + } + committee := int(lg.config.CommitteeFor(msg.Layer)) + if msg.Round == propose { + committee = int(lg.config.Leaders) + } + valid, err := lg.oracle.Validate(context.Background(), + msg.Layer, msg.Absolute(), committee, msg.Sender, + msg.Eligibility.Proof, msg.Eligibility.Count) + if err != nil { + lg.log.Warn("failed proof validation", zap.Error(err)) + return grade0 + } + if !valid { + return grade0 + } + return grade5 +} + +func (lg *legacyOracle) active( + signer *signing.EdSigner, + beacon types.Beacon, + layer types.LayerID, + ir IterRound, +) *types.HareEligibility { + vrf := eligibility.GenVRF(context.Background(), signer.VRFSigner(), beacon, layer, ir.Absolute()) + committee := int(lg.config.CommitteeFor(layer)) + if ir.Round == propose { + committee = int(lg.config.Leaders) + } + count, err := lg.oracle.CalcEligibility(context.Background(), layer, ir.Absolute(), committee, signer.NodeID(), vrf) + if err != nil { + if !errors.Is(err, eligibility.ErrNotActive) { + lg.log.Error("failed to compute eligibilities", zap.Error(err)) + } else { + lg.log.Debug("identity is not active") + } + return nil + } + if count == 0 { + return nil + } + return &types.HareEligibility{Proof: vrf, Count: count} +} diff --git a/hare4/malfeasance.go b/hare4/malfeasance.go new file mode 100644 index 0000000000..e0037d7ef3 --- /dev/null +++ b/hare4/malfeasance.go @@ -0,0 +1,96 @@ +package hare3 + +import ( + "context" + "errors" + "fmt" + + "github.com/prometheus/client_golang/prometheus" + "go.uber.org/zap" + + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/log" + "github.com/spacemeshos/go-spacemesh/malfeasance/wire" + "github.com/spacemeshos/go-spacemesh/signing" + "github.com/spacemeshos/go-spacemesh/sql" + "github.com/spacemeshos/go-spacemesh/sql/atxs" +) + +const ( + hareEquivocate = "hare_eq" +) + +type MalfeasanceHandler struct { + logger *zap.Logger + db sql.Executor + + edVerifier *signing.EdVerifier +} + +type MalfeasanceOpt func(*MalfeasanceHandler) + +func WithMalfeasanceLogger(logger *zap.Logger) MalfeasanceOpt { + return func(mh *MalfeasanceHandler) { + mh.logger = logger + } +} + +func NewMalfeasanceHandler( + db sql.Executor, + edVerifier *signing.EdVerifier, + opt ...MalfeasanceOpt, +) *MalfeasanceHandler { + mh := &MalfeasanceHandler{ + logger: zap.NewNop(), + db: db, + + edVerifier: edVerifier, + } + for _, o := range opt { + o(mh) + } + return mh +} + +func (mh *MalfeasanceHandler) Validate(ctx context.Context, data wire.ProofData) (types.NodeID, error) { + hp, ok := data.(*wire.HareProof) + if !ok { + return types.EmptyNodeID, errors.New("wrong message type for hare equivocation") + } + for _, msg := range hp.Messages { + if !mh.edVerifier.Verify(signing.HARE, msg.SmesherID, msg.SignedBytes(), msg.Signature) { + return types.EmptyNodeID, errors.New("invalid signature") + } + } + msg1, msg2 := hp.Messages[0], hp.Messages[1] + ok, err := atxs.IdentityExists(mh.db, msg1.SmesherID) + if err != nil { + return types.EmptyNodeID, fmt.Errorf("check identity in hare malfeasance %v: %w", msg1.SmesherID, err) + } + if !ok { + return types.EmptyNodeID, fmt.Errorf("identity does not exist: %v", msg1.SmesherID) + } + + if msg1.SmesherID == msg2.SmesherID && + msg1.InnerMsg.Layer == msg2.InnerMsg.Layer && + msg1.InnerMsg.Round == msg2.InnerMsg.Round && + msg1.InnerMsg.MsgHash != msg2.InnerMsg.MsgHash { + return msg1.SmesherID, nil + } + mh.logger.Warn("received invalid hare malfeasance proof", + log.ZContext(ctx), + zap.Stringer("first_smesher", hp.Messages[0].SmesherID), + zap.Object("first_proof", &hp.Messages[0].InnerMsg), + zap.Stringer("second_smesher", hp.Messages[1].SmesherID), + zap.Object("second_proof", &hp.Messages[1].InnerMsg), + ) + return types.EmptyNodeID, errors.New("invalid hare malfeasance proof") +} + +func (mh *MalfeasanceHandler) ReportProof(numProofs *prometheus.CounterVec) { + numProofs.WithLabelValues(hareEquivocate).Inc() +} + +func (mh *MalfeasanceHandler) ReportInvalidProof(numInvalidProofs *prometheus.CounterVec) { + numInvalidProofs.WithLabelValues(hareEquivocate).Inc() +} diff --git a/hare4/malfeasance_test.go b/hare4/malfeasance_test.go new file mode 100644 index 0000000000..0f2a0f1491 --- /dev/null +++ b/hare4/malfeasance_test.go @@ -0,0 +1,310 @@ +package hare3 + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest" + "go.uber.org/zap/zaptest/observer" + + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/malfeasance/wire" + "github.com/spacemeshos/go-spacemesh/signing" + "github.com/spacemeshos/go-spacemesh/sql" + "github.com/spacemeshos/go-spacemesh/sql/atxs" +) + +type testMalfeasanceHandler struct { + *MalfeasanceHandler + + observedLogs *observer.ObservedLogs + db *sql.Database +} + +func newTestMalfeasanceHandler(tb testing.TB) *testMalfeasanceHandler { + db := sql.InMemory() + observer, observedLogs := observer.New(zapcore.WarnLevel) + logger := zaptest.NewLogger(tb, zaptest.WrapOptions(zap.WrapCore( + func(core zapcore.Core) zapcore.Core { + return zapcore.NewTee(core, observer) + }, + ))) + + h := NewMalfeasanceHandler(db, signing.NewEdVerifier(), WithMalfeasanceLogger(logger)) + + return &testMalfeasanceHandler{ + MalfeasanceHandler: h, + + observedLogs: observedLogs, + db: db, + } +} + +func createIdentity(tb testing.TB, db sql.Executor, sig *signing.EdSigner) { + tb.Helper() + atx := &types.ActivationTx{ + PublishEpoch: types.EpochID(1), + NumUnits: 1, + SmesherID: sig.NodeID(), + } + atx.SetReceived(time.Now()) + atx.SetID(types.RandomATXID()) + atx.TickCount = 1 + require.NoError(tb, atxs.Add(db, atx, types.AtxBlob{})) +} + +func TestHandler_Validate(t *testing.T) { + t.Run("unknown identity", func(t *testing.T) { + h := newTestMalfeasanceHandler(t) + + sig, err := signing.NewEdSigner() + require.NoError(t, err) + // identity is unknown to handler + + hp := wire.HareProof{ + Messages: [2]wire.HareProofMsg{ + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(11), + Round: 3, + MsgHash: types.RandomHash(), + }, + }, + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(11), + Round: 3, + MsgHash: types.RandomHash(), + }, + }, + }, + } + hp.Messages[0].Signature = sig.Sign(signing.HARE, hp.Messages[0].SignedBytes()) + hp.Messages[0].SmesherID = sig.NodeID() + hp.Messages[1].Signature = sig.Sign(signing.HARE, hp.Messages[1].SignedBytes()) + hp.Messages[1].SmesherID = sig.NodeID() + + nodeID, err := h.Validate(context.Background(), &hp) + require.ErrorContains(t, err, "identity does not exist") + require.Equal(t, types.EmptyNodeID, nodeID) + }) + + t.Run("invalid signature", func(t *testing.T) { + h := newTestMalfeasanceHandler(t) + + sig, err := signing.NewEdSigner() + require.NoError(t, err) + createIdentity(t, h.db, sig) + + hp := wire.HareProof{ + Messages: [2]wire.HareProofMsg{ + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(11), + Round: 3, + MsgHash: types.RandomHash(), + }, + }, + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(11), + Round: 3, + MsgHash: types.RandomHash(), + }, + }, + }, + } + hp.Messages[0].Signature = sig.Sign(signing.HARE, hp.Messages[0].SignedBytes()) + hp.Messages[0].SmesherID = sig.NodeID() + hp.Messages[1].Signature = types.RandomEdSignature() + hp.Messages[1].SmesherID = sig.NodeID() + + nodeID, err := h.Validate(context.Background(), &hp) + require.ErrorContains(t, err, "invalid signature") + require.Equal(t, types.EmptyNodeID, nodeID) + }) + + t.Run("same msg hash", func(t *testing.T) { + h := newTestMalfeasanceHandler(t) + + sig, err := signing.NewEdSigner() + require.NoError(t, err) + createIdentity(t, h.db, sig) + + msgHash := types.RandomHash() + hp := wire.HareProof{ + Messages: [2]wire.HareProofMsg{ + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(11), + Round: 3, + MsgHash: msgHash, + }, + }, + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(11), + Round: 3, + MsgHash: msgHash, + }, + }, + }, + } + hp.Messages[0].Signature = sig.Sign(signing.HARE, hp.Messages[0].SignedBytes()) + hp.Messages[0].SmesherID = sig.NodeID() + hp.Messages[1].Signature = sig.Sign(signing.HARE, hp.Messages[1].SignedBytes()) + hp.Messages[1].SmesherID = sig.NodeID() + + nodeID, err := h.Validate(context.Background(), &hp) + require.ErrorContains(t, err, "invalid hare malfeasance proof") + require.Equal(t, types.EmptyNodeID, nodeID) + }) + + t.Run("different layer", func(t *testing.T) { + h := newTestMalfeasanceHandler(t) + + sig, err := signing.NewEdSigner() + require.NoError(t, err) + createIdentity(t, h.db, sig) + + hp := wire.HareProof{ + Messages: [2]wire.HareProofMsg{ + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(11), + Round: 3, + MsgHash: types.RandomHash(), + }, + }, + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(10), + Round: 3, + MsgHash: types.RandomHash(), + }, + }, + }, + } + hp.Messages[0].Signature = sig.Sign(signing.HARE, hp.Messages[0].SignedBytes()) + hp.Messages[0].SmesherID = sig.NodeID() + hp.Messages[1].Signature = sig.Sign(signing.HARE, hp.Messages[1].SignedBytes()) + hp.Messages[1].SmesherID = sig.NodeID() + + nodeID, err := h.Validate(context.Background(), &hp) + require.ErrorContains(t, err, "invalid hare malfeasance proof") + require.Equal(t, types.EmptyNodeID, nodeID) + }) + + t.Run("different round", func(t *testing.T) { + h := newTestMalfeasanceHandler(t) + + sig, err := signing.NewEdSigner() + require.NoError(t, err) + createIdentity(t, h.db, sig) + + hp := wire.HareProof{ + Messages: [2]wire.HareProofMsg{ + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(11), + Round: 3, + MsgHash: types.RandomHash(), + }, + }, + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(10), + Round: 4, + MsgHash: types.RandomHash(), + }, + }, + }, + } + hp.Messages[0].Signature = sig.Sign(signing.HARE, hp.Messages[0].SignedBytes()) + hp.Messages[0].SmesherID = sig.NodeID() + hp.Messages[1].Signature = sig.Sign(signing.HARE, hp.Messages[1].SignedBytes()) + hp.Messages[1].SmesherID = sig.NodeID() + + nodeID, err := h.Validate(context.Background(), &hp) + require.ErrorContains(t, err, "invalid hare malfeasance proof") + require.Equal(t, types.EmptyNodeID, nodeID) + }) + + t.Run("different signer", func(t *testing.T) { + h := newTestMalfeasanceHandler(t) + + sig, err := signing.NewEdSigner() + require.NoError(t, err) + createIdentity(t, h.db, sig) + + sig2, err := signing.NewEdSigner() + require.NoError(t, err) + createIdentity(t, h.db, sig2) + + hp := wire.HareProof{ + Messages: [2]wire.HareProofMsg{ + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(11), + Round: 3, + MsgHash: types.RandomHash(), + }, + }, + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(10), + Round: 4, + MsgHash: types.RandomHash(), + }, + }, + }, + } + hp.Messages[0].Signature = sig.Sign(signing.HARE, hp.Messages[0].SignedBytes()) + hp.Messages[0].SmesherID = sig.NodeID() + hp.Messages[1].Signature = sig2.Sign(signing.HARE, hp.Messages[1].SignedBytes()) + hp.Messages[1].SmesherID = sig2.NodeID() + + nodeID, err := h.Validate(context.Background(), &hp) + require.ErrorContains(t, err, "invalid hare malfeasance proof") + require.Equal(t, types.EmptyNodeID, nodeID) + }) + + t.Run("valid", func(t *testing.T) { + h := newTestMalfeasanceHandler(t) + + sig, err := signing.NewEdSigner() + require.NoError(t, err) + createIdentity(t, h.db, sig) + + hp := wire.HareProof{ + Messages: [2]wire.HareProofMsg{ + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(11), + Round: 3, + MsgHash: types.RandomHash(), + }, + }, + { + InnerMsg: wire.HareMetadata{ + Layer: types.LayerID(11), + Round: 3, + MsgHash: types.RandomHash(), + }, + }, + }, + } + hp.Messages[0].Signature = sig.Sign(signing.HARE, hp.Messages[0].SignedBytes()) + hp.Messages[0].SmesherID = sig.NodeID() + hp.Messages[1].Signature = sig.Sign(signing.HARE, hp.Messages[1].SignedBytes()) + hp.Messages[1].SmesherID = sig.NodeID() + + nodeID, err := h.Validate(context.Background(), &hp) + require.NoError(t, err) + require.Equal(t, sig.NodeID(), nodeID) + }) +} diff --git a/hare4/metrics.go b/hare4/metrics.go new file mode 100644 index 0000000000..e4182c7e49 --- /dev/null +++ b/hare4/metrics.go @@ -0,0 +1,66 @@ +package hare3 + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/spacemeshos/go-spacemesh/metrics" +) + +const namespace = "hare" + +var ( + processCounter = metrics.NewCounter( + "session", + namespace, + "number of hare sessions at different stages", + []string{"stage"}, + ) + sessionStart = processCounter.WithLabelValues("started") + sessionTerminated = processCounter.WithLabelValues("terminated") + sessionCoin = processCounter.WithLabelValues("weakcoin") + sessionResult = processCounter.WithLabelValues("result") + + exitErrors = metrics.NewCounter( + "exit_errors", + namespace, + "number of unexpected exit errors. should remain at zero", + []string{}, + ).WithLabelValues() + validationError = metrics.NewCounter( + "validation_errors", + namespace, + "number of validation errors. not expected to be at zero", + []string{"error"}, + ) + notRegisteredError = validationError.WithLabelValues("not_registered") + malformedError = validationError.WithLabelValues("malformed") + signatureError = validationError.WithLabelValues("signature") + oracleError = validationError.WithLabelValues("oracle") + + droppedMessages = metrics.NewCounter( + "dropped_msgs", + namespace, + "number of messages dropped by gossip", + []string{}, + ).WithLabelValues() + + validationLatency = metrics.NewHistogramWithBuckets( + "validation_seconds", + namespace, + "validation time in seconds", + []string{"step"}, + prometheus.ExponentialBuckets(0.01, 2, 10), + ) + oracleLatency = validationLatency.WithLabelValues("oracle") + submitLatency = validationLatency.WithLabelValues("submit") + + protocolLatency = metrics.NewHistogramWithBuckets( + "protocol_seconds", + namespace, + "protocol time in seconds", + []string{"step"}, + prometheus.ExponentialBuckets(0.01, 2, 10), + ) + proposalsLatency = protocolLatency.WithLabelValues("proposals") + activeLatency = protocolLatency.WithLabelValues("active") +) diff --git a/hare4/protocol.go b/hare4/protocol.go new file mode 100644 index 0000000000..be89eeb7bc --- /dev/null +++ b/hare4/protocol.go @@ -0,0 +1,630 @@ +package hare3 + +import ( + "bytes" + "fmt" + "slices" + "sync" + + "go.uber.org/zap/zapcore" + "golang.org/x/exp/maps" + + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/log" + "github.com/spacemeshos/go-spacemesh/malfeasance/wire" +) + +type grade uint8 + +const ( + grade0 grade = iota + grade1 + grade2 + grade3 + grade4 + grade5 +) + +func isSubset(s, v []types.ProposalID) bool { + set := map[types.ProposalID]struct{}{} + for _, id := range v { + set[id] = struct{}{} + } + for _, id := range s { + if _, exist := set[id]; !exist { + return false + } + } + return true +} + +func toHash(proposals []types.ProposalID) types.Hash32 { + return types.CalcProposalHash32Presorted(proposals, nil) +} + +type messageKey struct { + IterRound + Sender types.NodeID +} + +type input struct { + *Message + atxgrade grade + malicious bool + msgHash types.Hash32 +} + +func (i *input) MarshalLogObject(encoder zapcore.ObjectEncoder) error { + if i.Message != nil { + i.Message.MarshalLogObject(encoder) + } + encoder.AddUint8("atxgrade", uint8(i.atxgrade)) + encoder.AddBool("malicious", i.malicious) + encoder.AddString("hash", i.msgHash.ShortString()) + return nil +} + +type output struct { + coin *bool // set based on preround messages right after preround completes in 0 iter + result []types.ProposalID // set based on notify messages at the start of next iter + terminated bool // protocol participates in one more iteration after outputing result + message *Message +} + +func (o *output) MarshalLogObject(encoder zapcore.ObjectEncoder) error { + encoder.AddBool("terminated", o.terminated) + if o.coin != nil { + encoder.AddBool("coin", *o.coin) + } + if o.result != nil { + encoder.AddArray("result", zapcore.ArrayMarshalerFunc(func(encoder log.ArrayEncoder) error { + for _, id := range o.result { + encoder.AppendString(types.Hash20(id).ShortString()) + } + return nil + })) + } + if o.message != nil { + encoder.AddObject("msg", o.message) + } + return nil +} + +func newProtocol(threshold uint16) *protocol { + return &protocol{ + validProposals: map[types.Hash32][]types.ProposalID{}, + gossip: gossip{threshold: threshold, state: map[messageKey]*gossipInput{}}, + } +} + +type protocol struct { + mu sync.Mutex + IterRound + coinout bool + coin *types.VrfSignature // smallest vrf from preround messages. not a part of paper + initial []types.ProposalID // Si + result *types.Hash32 // set after waiting for notify messages. Case 1 + locked *types.Hash32 // Li + hardLocked bool + validProposals map[types.Hash32][]types.ProposalID // Ti + gossip gossip +} + +func (p *protocol) OnInitial(proposals []types.ProposalID) { + p.mu.Lock() + defer p.mu.Unlock() + p.initial = proposals +} + +func (p *protocol) OnInput(msg *input) (bool, *wire.HareProof) { + p.mu.Lock() + defer p.mu.Unlock() + + gossip, equivocation := p.gossip.receive(p.IterRound, msg) + if !gossip { + return false, equivocation + } + if msg.Round == preround && + (p.coin == nil || (p.coin != nil && msg.Eligibility.Proof.Cmp(p.coin) == -1)) { + p.coin = &msg.Eligibility.Proof + } + return gossip, equivocation +} + +func (p *protocol) thresholdProposals(ir IterRound, grade grade) (*types.Hash32, []types.ProposalID) { + for _, ref := range p.gossip.thresholdGossipRef(ir, grade) { + valid, exist := p.validProposals[ref] + if exist { + return &ref, valid + } + } + return nil, nil +} + +func (p *protocol) commitExists(iter uint8, match types.Hash32, grade grade) bool { + for _, ref := range p.gossip.thresholdGossipRef(IterRound{Iter: iter, Round: commit}, grade) { + if ref == match { + return true + } + } + return false +} + +func (p *protocol) execution(out *output) { + // 4.3 Protocol Execution + switch p.Round { + case preround: + out.message = &Message{Body: Body{ + IterRound: p.IterRound, + Value: Value{Proposals: p.initial}, + }} + case hardlock: + if p.Iter > 0 { + if p.result != nil { + out.terminated = true + } + ref, values := p.thresholdProposals(IterRound{Iter: p.Iter - 1, Round: notify}, grade5) + if ref != nil && p.result == nil { + p.result = ref + out.result = values + if values == nil { + // receiver expects non-nil result + out.result = []types.ProposalID{} + } + } + if ref, _ := p.thresholdProposals(IterRound{Iter: p.Iter - 1, Round: commit}, grade4); ref != nil { + p.locked = ref + p.hardLocked = true + } else { + p.locked = nil + p.hardLocked = false + } + } + case softlock: + if p.Iter > 0 && !p.hardLocked { + if ref, _ := p.thresholdProposals(IterRound{Iter: p.Iter - 1, Round: commit}, grade3); ref != nil { + p.locked = ref + } else { + p.locked = nil + } + } + case propose: + values := p.gossip.thresholdGossip(IterRound{Round: preround}, grade4) + if p.Iter > 0 { + ref, overwrite := p.thresholdProposals(IterRound{Iter: p.Iter - 1, Round: commit}, grade2) + if ref != nil { + values = overwrite + } + } + out.message = &Message{Body: Body{ + IterRound: p.IterRound, + Value: Value{Proposals: values}, + }} + case commit: + // condition (d) is realized by ordering proposals by vrf + proposed := p.gossip.gradecast(IterRound{Iter: p.Iter, Round: propose}) + g2values := p.gossip.thresholdGossip(IterRound{Round: preround}, grade2) + for _, graded := range proposed { + // condition (a) and (b) + // grade0 proposals are not added to the set + if !isSubset(graded.values, g2values) { + continue + } + p.validProposals[toHash(graded.values)] = graded.values + } + if p.hardLocked && p.locked != nil { + out.message = &Message{Body: Body{ + IterRound: p.IterRound, + Value: Value{Reference: p.locked}, + }} + } else { + g3values := p.gossip.thresholdGossip(IterRound{Round: preround}, grade3) + g5values := p.gossip.thresholdGossip(IterRound{Round: preround}, grade5) + for _, graded := range proposed { + id := toHash(graded.values) + // condition (c) + if _, exist := p.validProposals[id]; !exist { + continue + } + // condition (e) + if graded.grade != grade2 { + continue + } + // condition (f) + if !isSubset(graded.values, g3values) { + continue + } + // condition (g) + if !isSubset(g5values, graded.values) && !p.commitExists(p.Iter-1, id, grade1) { + continue + } + // condition (h) + if p.locked != nil && *p.locked != id { + continue + } + out.message = &Message{Body: Body{ + IterRound: p.IterRound, + Value: Value{Reference: &id}, + }} + break + } + } + case notify: + ref := p.result + if ref == nil { + ref, _ = p.thresholdProposals(IterRound{Iter: p.Iter, Round: commit}, grade5) + } + if ref != nil { + out.message = &Message{Body: Body{ + IterRound: p.IterRound, + Value: Value{Reference: ref}, + }} + } + } +} + +func (p *protocol) Next() output { + p.mu.Lock() + defer p.mu.Unlock() + + out := output{} + p.execution(&out) + if p.Round >= softlock && p.coin != nil && !p.coinout { + coin := p.coin.LSB() != 0 + out.coin = &coin + p.coinout = true + } + if p.Round == preround && p.Iter == 0 { + // skips hardlock unlike softlock in the paper. + // this makes no practical difference from correctness. + // but allows to simplify assignment in validValues + p.Round = softlock + } else if p.Round == notify { + p.Round = hardlock + p.Iter++ + } else { + p.Round++ + } + return out +} + +func (p *protocol) Stats() *stats { + p.mu.Lock() + defer p.mu.Unlock() + s := &stats{ + iter: p.Iter - 1, + threshold: p.gossip.threshold, + } + // preround messages that are received after the very first iteration + // has no impact on protocol + if s.iter == 0 { + for grade := grade1; grade <= grade5; grade++ { + s.preround = append(s.preround, preroundStats{ + grade: grade, + tallies: maps.Values( + thresholdTallies(p.gossip.state, IterRound{Round: preround}, grade, tallyProposals), + ), + }) + } + } + proposals := p.gossip.gradecast(IterRound{Iter: p.Iter - 1, Round: propose}) + for _, graded := range proposals { + s.propose = append(s.propose, proposeStats{ + grade: graded.grade, + ref: toHash(graded.values), + proposals: graded.values, + }) + } + // stats are collected at the start of current iteration (p.Iter) + // we expect 2 network delays to pass since commit messages were broadcasted + for grade := grade4; grade <= grade5; grade++ { + s.commit = append(s.commit, commitStats{ + grade: grade, + tallies: maps.Values( + thresholdTallies(p.gossip.state, IterRound{Iter: p.Iter - 1, Round: commit}, grade, tallyRefs), + ), + }) + } + // we are not interested in any other grade for notify message as they have no impact on protocol execution + s.notify = append(s.notify, notifyStats{ + grade: grade5, + tallies: maps.Values( + thresholdTallies(p.gossip.state, IterRound{Iter: p.Iter - 1, Round: notify}, grade5, tallyRefs), + ), + }) + return s +} + +type gossipInput struct { + *input + received IterRound + otherReceived *IterRound +} + +// Protocol 1. graded-gossip. page 10. +type gossip struct { + threshold uint16 + state map[messageKey]*gossipInput +} + +func (g *gossip) receive(current IterRound, input *input) (bool, *wire.HareProof) { + // Case 1: will be discarded earlier + other, exist := g.state[input.key()] + if exist { + if other.msgHash != input.msgHash && !other.malicious { + // Protocol 3. thresh-gossip. keep one with the maximal grade. + if input.atxgrade > other.atxgrade { + input.malicious = true + g.state[input.key()] = &gossipInput{ + input: input, + received: current, + otherReceived: &other.received, + } + } else { + // Case 3 + other.malicious = true + other.otherReceived = ¤t + } + return true, &wire.HareProof{Messages: [2]wire.HareProofMsg{ + other.ToMalfeasanceProof(), input.ToMalfeasanceProof(), + }} + } + // Case 2. but also we filter duplicates from p2p layer here + return false, nil + } + // Case 4 + g.state[input.key()] = &gossipInput{input: input, received: current} + return true, nil +} + +type gset struct { + values []types.ProposalID + grade grade + smallest types.VrfSignature +} + +// Protocol 2. gradecast. page 13. +func (g *gossip) gradecast(target IterRound) []gset { + // unlike paper we use 5-graded gossip for gradecast as well + var rst []gset + for key, value := range g.state { + if key.IterRound == target && (!value.malicious || value.otherReceived != nil) { + if value.atxgrade == grade5 && value.received.Delay(target) <= 1 && + // 2 (a) + (value.otherReceived == nil || value.otherReceived.Delay(target) > 3) { + // 2 (b) + rst = append(rst, gset{ + grade: grade2, + values: value.Value.Proposals, + smallest: value.Eligibility.Proof, + }) + } else if value.atxgrade >= grade4 && value.received.Delay(target) <= 2 && + // 3 (a) + (value.otherReceived == nil || value.otherReceived.Delay(target) > 2) { + // 3 (b) + rst = append(rst, gset{ + grade: grade1, + values: value.Value.Proposals, + smallest: value.Eligibility.Proof, + }) + } + } + } + // hare expects to receive multiple proposals. expected number of leaders is set to 5. + // we need to choose the same one for commit across the cluster. + // we do that by ordering them by vrf value, and picking one that passes other checks (see commit in execution). + // in hare3 paper look for p-Weak leader election property. + slices.SortFunc(rst, func(i, j gset) int { + return i.smallest.Cmp(&j.smallest) + }) + return rst +} + +func tallyProposals(all map[types.ProposalID]proposalTally, inp *gossipInput) { + for _, id := range inp.Value.Proposals { + tally, exist := all[id] + if !exist { + tally = proposalTally{id: id} + } + tally.total += inp.Eligibility.Count + if !inp.malicious { + tally.valid += inp.Eligibility.Count + } + all[id] = tally + } +} + +// Protocol 3. thresh-gossip. Page 15. +// output returns union of sorted proposals received +// in the given round with minimal specified grade. +func (g *gossip) thresholdGossip(filter IterRound, grade grade) []types.ProposalID { + rst := thresholdGossip(thresholdTallies(g.state, filter, grade, tallyProposals), g.threshold) + slices.SortFunc(rst, func(i, j types.ProposalID) int { + return bytes.Compare(i.Bytes(), j.Bytes()) + }) + return rst +} + +func tallyRefs(all map[types.Hash32]refTally, inp *gossipInput) { + tally, exist := all[*inp.Value.Reference] + if !exist { + tally = refTally{id: *inp.Value.Reference} + } + tally.total += inp.Eligibility.Count + if !inp.malicious { + tally.valid += inp.Eligibility.Count + } + all[*inp.Value.Reference] = tally +} + +// thresholdGossipRef returns all references to proposals in the given round with minimal grade. +func (g *gossip) thresholdGossipRef(filter IterRound, grade grade) []types.Hash32 { + return thresholdGossip(thresholdTallies(g.state, filter, grade, tallyRefs), g.threshold) +} + +func thresholdGossip[T interface { + comparable + fmt.Stringer +}]( + tallies map[T]tallyStats[T], threshold uint16, +) []T { + rst := []T{} + for _, item := range tallies { + // valid > 0 and total >= f + // atleast one non-equivocating vote and crossed committee/2 + 1 + if item.total >= threshold && item.valid > 0 { + rst = append(rst, item.id) + } + } + return rst +} + +func thresholdTallies[T interface { + comparable + fmt.Stringer +}]( + state map[messageKey]*gossipInput, + filter IterRound, + msgGrade grade, + tally func(tally map[T]tallyStats[T], inp *gossipInput), +) map[T]tallyStats[T] { + all := map[T]tallyStats[T]{} + min := grade5 + // pick min atx grade from non equivocating identity. + for key, value := range state { + if key.IterRound == filter && value.atxgrade < min && !value.malicious && + value.received.Grade(filter) >= msgGrade { + min = value.atxgrade + } + } + // tally votes for valid and malicious messages + for key, value := range state { + if key.IterRound == filter && value.atxgrade >= min && value.received.Grade(filter) >= msgGrade { + tally(all, value) + } + } + return all +} + +type preroundStats struct { + grade grade + tallies []proposalTally +} + +func (s *preroundStats) MarshalLogObject(encoder zapcore.ObjectEncoder) error { + encoder.AddUint16("grade", uint16(s.grade)) + encoder.AddArray("tallies", zapcore.ArrayMarshalerFunc(func(enc zapcore.ArrayEncoder) error { + for _, tally := range s.tallies { + enc.AppendObject(&tally) + } + return nil + })) + return nil +} + +type tallyStats[T fmt.Stringer] struct { + id T + total uint16 + valid uint16 +} + +func (s *tallyStats[T]) MarshalLogObject(encoder zapcore.ObjectEncoder) error { + encoder.AddUint16("total", s.total) + encoder.AddUint16("valid", s.valid) + encoder.AddString("id", s.id.String()) + return nil +} + +type ( + proposalTally = tallyStats[types.ProposalID] + refTally = tallyStats[types.Hash32] +) + +type proposeStats struct { + grade grade + ref types.Hash32 + proposals []types.ProposalID +} + +func (s *proposeStats) MarshalLogObject(encoder zapcore.ObjectEncoder) error { + encoder.AddString("ref", s.ref.ShortString()) + encoder.AddUint16("grade", uint16(s.grade)) + encoder.AddArray("proposals", zapcore.ArrayMarshalerFunc(func(encoder zapcore.ArrayEncoder) error { + for _, id := range s.proposals { + encoder.AppendString(id.String()) + } + return nil + })) + return nil +} + +type commitStats struct { + grade grade + tallies []refTally +} + +func (s *commitStats) MarshalLogObject(encoder zapcore.ObjectEncoder) error { + encoder.AddUint16("grade", uint16(s.grade)) + encoder.AddArray("tallies", zapcore.ArrayMarshalerFunc(func(encoder zapcore.ArrayEncoder) error { + for _, tally := range s.tallies { + encoder.AppendObject(&tally) + } + return nil + })) + return nil +} + +type notifyStats struct { + grade grade + tallies []refTally +} + +func (n *notifyStats) MarshalLogObject(encoder zapcore.ObjectEncoder) error { + encoder.AddUint16("grade", uint16(n.grade)) + encoder.AddArray("tallies", zapcore.ArrayMarshalerFunc(func(encoder zapcore.ArrayEncoder) error { + for _, tally := range n.tallies { + encoder.AppendObject(&tally) + } + return nil + })) + return nil +} + +type stats struct { + iter uint8 + threshold uint16 + preround []preroundStats + propose []proposeStats + commit []commitStats + notify []notifyStats +} + +func (s *stats) MarshalLogObject(encoder zapcore.ObjectEncoder) error { + encoder.AddUint8("iter", s.iter) + encoder.AddUint16("threshold", s.threshold) + encoder.AddArray("preround", zapcore.ArrayMarshalerFunc(func(encoder zapcore.ArrayEncoder) error { + for _, stat := range s.preround { + encoder.AppendObject(&stat) + } + return nil + })) + encoder.AddArray("propose", zapcore.ArrayMarshalerFunc(func(encoder zapcore.ArrayEncoder) error { + for _, stat := range s.propose { + encoder.AppendObject(&stat) + } + return nil + })) + encoder.AddArray("commit", zapcore.ArrayMarshalerFunc(func(encoder zapcore.ArrayEncoder) error { + for _, stat := range s.commit { + encoder.AppendObject(&stat) + } + return nil + })) + encoder.AddArray("notify", zapcore.ArrayMarshalerFunc(func(encoder zapcore.ArrayEncoder) error { + for _, stat := range s.notify { + encoder.AppendObject(&stat) + } + return nil + })) + return nil +} diff --git a/hare4/protocol_test.go b/hare4/protocol_test.go new file mode 100644 index 0000000000..611dec4470 --- /dev/null +++ b/hare4/protocol_test.go @@ -0,0 +1,584 @@ +package hare3 + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest" + + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/malfeasance/wire" +) + +func castIds(strings ...string) []types.ProposalID { + ids := []types.ProposalID{} + for _, p := range strings { + var id types.ProposalID + copy(id[:], p) + ids = append(ids, id) + } + return ids +} + +type tinput struct { + input + expect *response +} + +type response struct { + gossip bool + equivocation *wire.HareProof +} + +func (t *tinput) ensureMsg() { + if t.Message == nil { + t.Message = &Message{} + } +} + +func (t *tinput) ensureResponse() { + if t.expect == nil { + t.expect = &response{} + } +} + +func (t *tinput) round(r Round) *tinput { + t.ensureMsg() + t.Round = r + return t +} + +func (t *tinput) iter(i uint8) *tinput { + t.ensureMsg() + t.Iter = i + return t +} + +func (t *tinput) proposals(proposals ...string) *tinput { + t.ensureMsg() + t.Value.Proposals = castIds(proposals...) + return t +} + +func (t *tinput) ref(proposals ...string) *tinput { + t.ensureMsg() + hs := types.CalcProposalHash32Presorted(castIds(proposals...), nil) + t.Value.Reference = &hs + return t +} + +func (t *tinput) vrf(vrf ...byte) *tinput { + t.ensureMsg() + copy(t.Eligibility.Proof[:], vrf) + return t +} + +func (t *tinput) vrfcount(c uint16) *tinput { + t.ensureMsg() + t.Eligibility.Count = c + return t +} + +func (t *tinput) sender(name string) *tinput { + t.ensureMsg() + copy(t.Sender[:], name) + return t +} + +func (t *tinput) mshHash(h string) *tinput { + copy(t.input.msgHash[:], h) + return t +} + +func (t *tinput) malicious() *tinput { + t.input.malicious = true + return t +} + +func (t *tinput) gossip() *tinput { + t.ensureResponse() + t.expect.gossip = true + return t +} + +func (t *tinput) equi() *tinput { + // TODO(dshulyak) do i want to test that it constructed correctly here? + t.ensureResponse() + t.expect.equivocation = &wire.HareProof{} + return t +} + +func (t *tinput) nogossip() *tinput { + t.ensureResponse() + t.expect.gossip = false + return t +} + +func (t *tinput) g(g grade) *tinput { + t.atxgrade = g + return t +} + +type toutput struct { + act bool + output +} + +func (t *toutput) ensureMsg() { + if t.message == nil { + t.message = &Message{} + } +} + +func (t *toutput) active() *toutput { + t.act = true + return t +} + +func (t *toutput) round(r Round) *toutput { + t.ensureMsg() + t.message.Round = r + return t +} + +func (t *toutput) iter(i uint8) *toutput { + t.ensureMsg() + t.message.Iter = i + return t +} + +func (t *toutput) proposals(proposals ...string) *toutput { + t.ensureMsg() + t.message.Value.Proposals = castIds(proposals...) + return t +} + +func (t *toutput) ref(proposals ...string) *toutput { + t.ensureMsg() + hs := types.CalcProposalHash32Presorted(castIds(proposals...), nil) + t.message.Value.Reference = &hs + return t +} + +func (t *toutput) terminated() *toutput { + t.output.terminated = true + return t +} + +func (t *toutput) coin(c bool) *toutput { + t.output.coin = &c + return t +} + +func (t *toutput) result(proposals ...string) *toutput { + t.output.result = castIds(proposals...) + return t +} + +type setup struct { + threshold uint16 + proposals []types.ProposalID +} + +func (s *setup) thresh(v uint16) *setup { + s.threshold = v + return s +} + +func (s *setup) initial(proposals ...string) *setup { + s.proposals = castIds(proposals...) + return s +} + +type testCase struct { + desc string + steps []any +} + +func gen(desc string, steps ...any) testCase { + return testCase{desc: desc, steps: steps} +} + +func TestProtocol(t *testing.T) { + for _, tc := range []testCase{ + gen("sanity", // simplest e2e protocol run + new(setup).thresh(10).initial("a", "b"), + new(toutput).active().round(preround).proposals("a", "b"), + new(tinput).sender("1").round(preround).proposals("b", "a").vrfcount(3).g(grade5), + new(tinput).sender("2").round(preround).proposals("a", "c").vrfcount(9).g(grade5), + new(tinput).sender("3").round(preround).proposals("c").vrfcount(6).g(grade5), + new(toutput).coin(false), + new(toutput).active().round(propose).proposals("a", "c"), + new(tinput).sender("1").round(propose).proposals("a", "c").g(grade5).vrf(2), + new(tinput).sender("2").round(propose).proposals("b", "d").g(grade5).vrf(1), + new(toutput), + new(toutput), + new(toutput).active().round(commit).ref("a", "c"), + new(tinput).sender("1").round(commit).ref("a", "c").vrfcount(4).g(grade5), + new(tinput).sender("2").round(commit).ref("a", "c").vrfcount(8).g(grade5), + new(toutput).active().round(notify).ref("a", "c"), + new(tinput).sender("1").round(notify).ref("a", "c").vrfcount(5).g(grade5), + new(tinput).sender("2").round(notify).ref("a", "c").vrfcount(6).g(grade5), + new(toutput).result("a", "c"), // hardlock + new(toutput), // softlock + // propose, commit, notify messages are built based on prervious state + new(toutput).active().round(propose).iter(1).proposals("a", "c"), // propose + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput).active().round(commit).iter(1).ref("a", "c"), // commit + new(toutput).active().round(notify).iter(1).ref("a", "c"), // notify + new(toutput).terminated(), + ), + gen("commit on softlock", + new(setup).thresh(10).initial("a", "b"), + new(toutput), + new(tinput).sender("1").round(preround).proposals("a", "b").vrfcount(11).g(grade5), + new(toutput).coin(false), + new(toutput), + new(tinput).sender("1").round(propose).proposals("a", "b").g(grade5), + new(tinput).sender("2").round(propose).proposals("b").g(grade5), + new(toutput), + new(toutput), + new(toutput), + new(tinput).sender("1").round(commit).ref("b").vrfcount(11).g(grade3), + new(toutput), + new(toutput), // hardlock + new(toutput), // softlock + // propose, commit, notify messages are built based on prervious state + new(toutput), // propose + new(tinput).sender("1").iter(1).round(propose).proposals("b").g(grade5), + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput).active().round(commit).iter(1).ref("b"), // commit + ), + gen("empty 0 iteration", // test that protocol can complete not only in 0st iteration + new(setup).thresh(10).initial("a", "b"), + new(toutput), // preround + new(tinput).sender("1").round(preround).proposals("a", "b").vrfcount(3).g(grade5), + new(tinput).sender("2").round(preround).proposals("a", "b").vrfcount(9).g(grade5), + new(toutput).coin(false), // softlock + new(toutput), // propose + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput), // commit + new(toutput), // notify + new(toutput), // 2nd hardlock + new(toutput), // 2nd softlock + new(toutput).active().iter(1).round(propose).proposals("a", "b"), // 2nd propose + new(tinput).sender("1").iter(1).round(propose).proposals("a", "b").g(grade5), + new(toutput), // 2nd wait1 + new(toutput), // 2nd wait2 + new(toutput).active().iter(1).round(commit).ref("a", "b"), + new(tinput).sender("1").iter(1).round(commit).ref("a", "b").g(grade5).vrfcount(11), + new(toutput).active().iter(1).round(notify).ref("a", "b"), + new(tinput).sender("1").iter(1).round(notify).ref("a", "b").g(grade5).vrfcount(11), + new(toutput).result("a", "b"), // 3rd hardlock + new(toutput), // 3rd softlock + new(toutput), // 3rd propose + new(toutput), // 3rd wait1 + new(toutput), // 3rd wait2 + new(toutput), // 3rd commit + new(toutput), // 3rd notify + new(toutput).terminated(), // 4th softlock + ), + gen("empty proposal", + new(setup).thresh(10), + new(toutput), // preround + new(tinput).sender("1").round(preround).vrfcount(11).g(grade5), + new(toutput).coin(false), // softlock + new(toutput).active().round(propose).proposals(), // propose + new(tinput).sender("1").round(propose).g(grade5).vrf(2), + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput).active().round(commit).ref(), // commit + new(tinput).sender("1").round(commit).ref().vrfcount(11).g(grade5), + new(toutput).active().round(notify).ref(), // notify + new(tinput).sender("1").round(notify).ref().vrfcount(11).g(grade5), + new(toutput).result(), // hardlock + ), + gen("coin true", + new(setup).thresh(10), + new(toutput), + new(tinput).sender("2").round(preround).vrf(2).g(grade5), + new(tinput).sender("1").round(preround).vrf(1).g(grade5), + new(tinput).sender("3").round(preround).vrf(2).g(grade5), + new(toutput).coin(true), + ), + gen("coin false", + new(setup).thresh(10), + new(toutput), + new(tinput).sender("2").round(preround).vrf(1, 2).g(grade5), + new(tinput).sender("1").round(preround).vrf(0, 1).g(grade5), + new(tinput).sender("3").round(preround).vrf(2).g(grade5), + new(toutput).coin(false), + ), + gen("coin delayed", + new(setup).thresh(10), + new(toutput), + new(toutput), + new(tinput).sender("2").round(preround).vrf(1, 2).g(grade5), + new(tinput).sender("1").round(preround).vrf(2, 3).g(grade5), + new(toutput).coin(true), + ), + gen("duplicates don't affect thresholds", + new(setup).thresh(10), + new(toutput), + new(tinput).sender("1").round(preround).proposals("a", "b").vrfcount(5).g(grade5), + new(tinput).sender("3").round(preround).proposals("d").vrfcount(6).g(grade5).gossip(), + new(tinput).sender("2").round(preround).proposals("a", "b").vrfcount(6).g(grade5), + new(tinput).sender("3").round(preround).proposals("d").vrfcount(6).g(grade5), + new(toutput).coin(false), + new(toutput).active().round(propose).proposals("a", "b"), // assert that `d` doesn't cross + new(tinput).sender("1").round(propose).proposals("a").vrf(2).g(grade5), + // this one would be preferred if duplicates were counted + new(tinput).sender("2").round(propose).proposals("b").vrf(1).g(grade5), + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput), // commit + new(tinput).sender("1").round(commit).ref("a").vrfcount(4).g(grade5), + new(tinput).sender("2").round(commit).ref("b").vrfcount(6).g(grade5), + new(tinput).sender("2").round(commit).ref("b").vrfcount(6).g(grade5), + new(tinput).sender("3").round(commit).ref("a").vrfcount(7).g(grade5), + new(toutput).active().round(notify).ref("a"), // duplicates commits were ignored + ), + gen("malicious preround", + new(setup).thresh(10), + new(toutput), + new(tinput).sender("1").round(preround).proposals("a", "b").vrfcount(9).g(grade5), + new(tinput).sender("2").malicious().gossip(). + round(preround).proposals("b", "d").vrfcount(11).g(grade5), + new(toutput).coin(false), + // d would be added if from non-malicious + new(toutput).active().round(propose).proposals("b"), + ), + gen("malicious proposal", + new(setup).thresh(10), + new(toutput), + new(toutput), // softlock + new(tinput).sender("5"). + round(preround).proposals("a", "b", "c").vrfcount(11).g(grade5), + new(toutput).coin(false), // propose + new(tinput).sender("1").round(propose).proposals("a", "c").vrf(2).g(grade5), + // this one would be preferred if from non-malicious + new(tinput).sender("2").malicious().gossip(). + round(propose).proposals("b").vrf(1).g(grade5), + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput).active().round(commit).ref("a", "c"), // commit + ), + gen("malicious commit", + new(setup).thresh(10), + new(toutput), + new(tinput).sender("5"). + round(preround).proposals("a").vrfcount(11).g(grade5), + new(toutput).coin(false), // softlock + new(toutput), // propose + new(tinput).sender("1").round(propose).proposals("a").g(grade5), + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput), // commit + new(tinput).sender("1").malicious().gossip(). + round(commit).ref("a").vrfcount(11).g(grade5), + new(toutput).active(), // notify outputs nothing + ), + gen("malicious notify", + new(setup).thresh(10), + new(toutput), + new(tinput).sender("5"). + round(preround).proposals("a").vrfcount(11).g(grade5), + new(toutput).coin(false), // softlock + new(toutput), // propose + new(tinput).sender("1").round(propose).proposals("a").g(grade5), + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput), // commit + new(tinput).sender("1").round(commit).ref("a").vrfcount(11).g(grade5), + new(toutput), // notify + new(tinput).sender("1").malicious().gossip(). + round(notify).ref("a").vrfcount(11).g(grade5), + new(toutput), // no result as the only notify is malicious + ), + gen("equivocation preround", + new(setup).thresh(10), + new(toutput), + new(tinput).sender("7").mshHash("m0"). + round(preround).proposals("a").vrfcount(11).g(grade4), + new(tinput).sender("7").gossip().mshHash("m1").equi(). + round(preround).proposals("b").vrfcount(11).g(grade5), + new(tinput).sender("7").nogossip().mshHash("m2"). + round(preround).proposals("c").vrfcount(11).g(grade3), + ), + gen("multiple malicious not broadcasted", + new(setup).thresh(10), + new(toutput), + new(tinput).sender("7").malicious().gossip(). + round(preround).proposals("a").vrfcount(11).g(grade5), + new(tinput).sender("7").malicious().nogossip(). + round(preround).proposals("b").vrfcount(11).g(grade5), + new(tinput).sender("7").malicious().nogossip(). + round(preround).proposals("c").vrfcount(11).g(grade5), + ), + gen("no commit for grade1", + new(setup).thresh(10), + new(toutput), + new(tinput).sender("5"). + round(preround).proposals("a").vrfcount(11).g(grade5), + new(toutput).coin(false), // softlock + new(toutput), // propose + new(toutput), // wait1 + new(tinput).sender("1").round(propose).proposals("a").g(grade5), + new(toutput), // wait2 + new(toutput).active(), // commit + ), + gen("other gradecast was received", + new(setup).thresh(10), + new(toutput), + new(tinput).sender("5"). + round(preround).proposals("a").vrfcount(11).g(grade5), + new(toutput).coin(false), // softlock + new(toutput), // propose + new(tinput).sender("1").round(propose).proposals("a").g(grade5), + new(toutput), // wait1 + new(tinput).sender("1").mshHash("a").gossip().round(propose).proposals("b").g(grade3), + new(toutput), // wait2 + new(toutput).active(), // commit + ), + gen("no commit if not subset of grade3", + new(setup).thresh(10), + new(toutput), // preround + new(toutput), // softlock + new(toutput), // propose + new(tinput).sender("1").round(propose).proposals("a").g(grade5), + new(toutput), // wait1 + new(tinput).sender("1"). + round(preround).proposals("a").vrfcount(11).g(grade2), + new(toutput).coin(false), // wait2 + new(toutput).active(), // commit + ), + gen("grade5 proposals are not in propose", + new(setup).thresh(10), + new(toutput), // preround + new(tinput).sender("1"). + round(preround).proposals("a").vrfcount(11).g(grade5), + new(toutput).coin(false), // softlock + new(tinput).sender("2"). + round(preround).proposals("b").vrfcount(11).g(grade5), + new(toutput), // propose + new(tinput).sender("1").round(propose).proposals("b").g(grade5), + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput).active(), // commit + ), + gen("commit locked", + new(setup).thresh(10), + new(toutput), // preround + new(tinput).sender("1"). + round(preround).proposals("a", "b").vrfcount(11).g(grade5), + new(toutput).coin(false), // softlock + new(toutput), // propose + new(tinput).sender("1").round(propose).proposals("a").g(grade5), + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput), // commit + new(toutput), // notify + new(toutput), // hardlock + // commit on a will have grade3, so no hardlock + new(tinput).sender("1").round(commit).ref("a").vrfcount(11).g(grade5), + new(toutput), // softlock + new(toutput), // propose + // commit on b will have grade1, to satisfy condition (g) + new(tinput).sender("2").round(commit).ref("b").vrfcount(11).g(grade5), + new(tinput).sender("1").iter(1).round(propose).proposals("a").g(grade5).vrf(2), + new(tinput).sender("2").iter(1).round(propose).proposals("b").g(grade5).vrf(1), + new(toutput), // wait1 + new(toutput), // wait2 + // condition (h) ensures that we commit on locked value, even though proposal for b + // is first in the order + new(toutput).active().round(commit).iter(1).ref("a"), // commit + ), + gen("early proposal by one", + new(setup).thresh(10), + new(tinput).sender("1").round(preround).proposals("a", "b").vrfcount(11).g(grade5), + new(toutput).coin(false), // preround + new(toutput), // softlock + new(tinput).sender("1").round(propose).proposals("a", "b").g(grade5).vrf(1), + new(toutput), // propose + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput).active().round(commit).ref("a", "b"), + ), + gen("early proposal by two", + new(setup).thresh(10), + new(tinput).sender("1").round(preround).proposals("a", "b").vrfcount(11).g(grade5), + new(toutput).coin(false), // preround + new(tinput).sender("1").round(propose).proposals("a", "b").g(grade5).vrf(1), + new(toutput), // softlock + new(toutput), // propose + new(toutput), // wait1 + new(toutput), // wait2 + new(toutput).active().round(commit).ref("a", "b"), + ), + } { + t.Run(tc.desc, func(t *testing.T) { + var ( + proto *protocol + logger = zaptest.NewLogger(t) + ) + for i, step := range tc.steps { + if i != 0 && proto == nil { + require.FailNow(t, "step with setup should be the first one") + } + switch casted := step.(type) { + case *setup: + proto = newProtocol(casted.threshold) + proto.OnInitial(casted.proposals) + case *tinput: + logger.Debug("input", zap.Int("i", i), zap.Inline(casted)) + gossip, equivocation := proto.OnInput(&casted.input) + if casted.expect != nil { + require.Equal(t, casted.expect.gossip, gossip, "%d", i) + if casted.expect.equivocation != nil { + require.NotEmpty(t, equivocation) + } + } + case *toutput: + before := proto.Round + out := proto.Next() + if casted.act { + require.Equal(t, casted.output, out, "%d", i) + } + logger.Debug("output", + zap.Int("i", i), + zap.Inline(casted), + zap.Stringer("before", before), + zap.Stringer("after", proto.Round), + ) + stats := proto.Stats() + enc := zapcore.NewMapObjectEncoder() + require.NoError(t, stats.MarshalLogObject(enc)) + } + } + }) + } +} + +func TestInputMarshall(t *testing.T) { + enc := zapcore.NewMapObjectEncoder() + inp := &input{ + Message: &Message{}, + } + require.NoError(t, inp.MarshalLogObject(enc)) +} + +func TestOutputMarshall(t *testing.T) { + enc := zapcore.NewMapObjectEncoder() + coin := true + out := &output{ + coin: &coin, + result: []types.ProposalID{{}}, + message: &Message{}, + } + require.NoError(t, out.MarshalLogObject(enc)) +} diff --git a/hare4/tracer.go b/hare4/tracer.go new file mode 100644 index 0000000000..861c5f9e9a --- /dev/null +++ b/hare4/tracer.go @@ -0,0 +1,25 @@ +package hare3 + +import "github.com/spacemeshos/go-spacemesh/common/types" + +type Tracer interface { + OnStart(types.LayerID) + OnStop(types.LayerID) + OnActive([]*types.HareEligibility) + OnMessageSent(*Message) + OnMessageReceived(*Message) +} + +var _ Tracer = noopTracer{} + +type noopTracer struct{} + +func (noopTracer) OnStart(types.LayerID) {} + +func (noopTracer) OnStop(types.LayerID) {} + +func (noopTracer) OnActive([]*types.HareEligibility) {} + +func (noopTracer) OnMessageSent(*Message) {} + +func (noopTracer) OnMessageReceived(*Message) {} diff --git a/hare4/types.go b/hare4/types.go new file mode 100644 index 0000000000..b94800c207 --- /dev/null +++ b/hare4/types.go @@ -0,0 +1,172 @@ +package hare3 + +import ( + "errors" + "fmt" + + "go.uber.org/zap/zapcore" + + "github.com/spacemeshos/go-spacemesh/codec" + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/hash" + "github.com/spacemeshos/go-spacemesh/log" + "github.com/spacemeshos/go-spacemesh/malfeasance/wire" +) + +type Round uint8 + +var roundNames = [...]string{"preround", "hardlock", "softlock", "propose", "wait1", "wait2", "commit", "notify"} + +func (r Round) String() string { + return roundNames[r] +} + +// NOTE(dshulyak) changes in order is a breaking change. +const ( + preround Round = iota + hardlock + softlock + propose + wait1 + wait2 + commit + notify +) + +//go:generate scalegen + +type IterRound struct { + Iter uint8 + Round Round +} + +// Delay returns number of network delays since specified iterround. +func (ir IterRound) Delay(since IterRound) uint32 { + if ir.Absolute() > since.Absolute() { + delay := ir.Absolute() - since.Absolute() + // we skip hardlock round in 0th iteration. + if since.Iter == 0 && since.Round == preround && delay != 0 { + delay-- + } + return delay + } + return 0 +} + +func (ir IterRound) Grade(since IterRound) grade { + return max(grade(6-ir.Delay(since)), grade0) +} + +func (ir IterRound) IsMessageRound() bool { + switch ir.Round { + case preround: + return true + case propose: + return true + case commit: + return true + case notify: + return true + } + return false +} + +func (ir IterRound) Absolute() uint32 { + return uint32(ir.Iter)*uint32(notify) + uint32(ir.Round) +} + +type Value struct { + // Proposals is set in messages for preround and propose rounds. + // + // Worst case scenario is that a single smesher identity has > 99.97% of the total weight of the network. + // In this case they will get all 50 available slots in all 4032 layers of the epoch. + // Additionally every other identity on the network that successfully published an ATX will get 1 slot. + // + // If we expect 7.0 Mio ATXs that would be a total of 7.0 Mio + 50 * 4032 = 8 201 600 slots. + // Since these are randomly distributed across the epoch, we can expect an average of n * p = + // 8 201 600 / 4032 = 2034.1 eligibilities in a layer with a standard deviation of sqrt(n * p * (1 - p)) = + // sqrt(8 201 600 * 1/4032 * 4031/4032) = 45.1 + // + // This means that we can expect a maximum of 2034.1 + 6*45.1 = 2304.7 eligibilities in a layer with + // > 99.9997% probability. + Proposals []types.ProposalID `scale:"max=2350"` + // Reference is set in messages for commit and notify rounds. + Reference *types.Hash32 +} + +type Body struct { + Layer types.LayerID + IterRound + Value Value + Eligibility types.HareEligibility +} + +type Message struct { + Body + Sender types.NodeID + Signature types.EdSignature +} + +func (m *Message) ToHash() types.Hash32 { + h := hash.GetHasher() + defer hash.PutHasher(h) + codec.MustEncodeTo(h, &m.Body) + var rst types.Hash32 + h.Sum(rst[:0]) + return rst +} + +func (m *Message) ToMetadata() wire.HareMetadata { + return wire.HareMetadata{ + Layer: m.Layer, + Round: m.Absolute(), + MsgHash: m.ToHash(), + } +} + +func (m *Message) ToMalfeasanceProof() wire.HareProofMsg { + return wire.HareProofMsg{ + InnerMsg: m.ToMetadata(), + SmesherID: m.Sender, + Signature: m.Signature, + } +} + +func (m *Message) key() messageKey { + return messageKey{ + Sender: m.Sender, + IterRound: m.IterRound, + } +} + +func (m *Message) ToBytes() []byte { + return codec.MustEncode(m) +} + +func (m *Message) Validate() error { + if (m.Round == commit || m.Round == notify) && m.Value.Reference == nil { + return errors.New("reference can't be nil in commit or notify rounds") + } else if (m.Round == preround || m.Round == propose) && m.Value.Reference != nil { + return fmt.Errorf("reference is set to not nil in round %s", m.Round) + } + return nil +} + +func (m *Message) MarshalLogObject(encoder zapcore.ObjectEncoder) error { + encoder.AddUint32("lid", m.Layer.Uint32()) + encoder.AddUint8("iter", m.Iter) + encoder.AddString("round", m.Round.String()) + encoder.AddString("sender", m.Sender.ShortString()) + if m.Value.Proposals != nil { + encoder.AddArray("full", zapcore.ArrayMarshalerFunc(func(encoder log.ArrayEncoder) error { + for _, id := range m.Value.Proposals { + encoder.AppendString(types.Hash20(id).ShortString()) + } + return nil + })) + } else if m.Value.Reference != nil { + encoder.AddString("ref", m.Value.Reference.ShortString()) + } + encoder.AddUint16("vrf_count", m.Eligibility.Count) + return nil +} diff --git a/hare4/types_scale.go b/hare4/types_scale.go new file mode 100644 index 0000000000..28e8b64035 --- /dev/null +++ b/hare4/types_scale.go @@ -0,0 +1,200 @@ +// Code generated by github.com/spacemeshos/go-scale/scalegen. DO NOT EDIT. + +// nolint +package hare3 + +import ( + "github.com/spacemeshos/go-scale" + "github.com/spacemeshos/go-spacemesh/common/types" +) + +func (t *IterRound) EncodeScale(enc *scale.Encoder) (total int, err error) { + { + n, err := scale.EncodeCompact8(enc, uint8(t.Iter)) + if err != nil { + return total, err + } + total += n + } + { + n, err := scale.EncodeCompact8(enc, uint8(t.Round)) + if err != nil { + return total, err + } + total += n + } + return total, nil +} + +func (t *IterRound) DecodeScale(dec *scale.Decoder) (total int, err error) { + { + field, n, err := scale.DecodeCompact8(dec) + if err != nil { + return total, err + } + total += n + t.Iter = uint8(field) + } + { + field, n, err := scale.DecodeCompact8(dec) + if err != nil { + return total, err + } + total += n + t.Round = Round(field) + } + return total, nil +} + +func (t *Value) EncodeScale(enc *scale.Encoder) (total int, err error) { + { + n, err := scale.EncodeStructSliceWithLimit(enc, t.Proposals, 2350) + if err != nil { + return total, err + } + total += n + } + { + n, err := scale.EncodeOption(enc, t.Reference) + if err != nil { + return total, err + } + total += n + } + return total, nil +} + +func (t *Value) DecodeScale(dec *scale.Decoder) (total int, err error) { + { + field, n, err := scale.DecodeStructSliceWithLimit[types.ProposalID](dec, 2350) + if err != nil { + return total, err + } + total += n + t.Proposals = field + } + { + field, n, err := scale.DecodeOption[types.Hash32](dec) + if err != nil { + return total, err + } + total += n + t.Reference = field + } + return total, nil +} + +func (t *Body) EncodeScale(enc *scale.Encoder) (total int, err error) { + { + n, err := scale.EncodeCompact32(enc, uint32(t.Layer)) + if err != nil { + return total, err + } + total += n + } + { + n, err := t.IterRound.EncodeScale(enc) + if err != nil { + return total, err + } + total += n + } + { + n, err := t.Value.EncodeScale(enc) + if err != nil { + return total, err + } + total += n + } + { + n, err := t.Eligibility.EncodeScale(enc) + if err != nil { + return total, err + } + total += n + } + return total, nil +} + +func (t *Body) DecodeScale(dec *scale.Decoder) (total int, err error) { + { + field, n, err := scale.DecodeCompact32(dec) + if err != nil { + return total, err + } + total += n + t.Layer = types.LayerID(field) + } + { + n, err := t.IterRound.DecodeScale(dec) + if err != nil { + return total, err + } + total += n + } + { + n, err := t.Value.DecodeScale(dec) + if err != nil { + return total, err + } + total += n + } + { + n, err := t.Eligibility.DecodeScale(dec) + if err != nil { + return total, err + } + total += n + } + return total, nil +} + +func (t *Message) EncodeScale(enc *scale.Encoder) (total int, err error) { + { + n, err := t.Body.EncodeScale(enc) + if err != nil { + return total, err + } + total += n + } + { + n, err := scale.EncodeByteArray(enc, t.Sender[:]) + if err != nil { + return total, err + } + total += n + } + { + n, err := scale.EncodeByteArray(enc, t.Signature[:]) + if err != nil { + return total, err + } + total += n + } + return total, nil +} + +func (t *Message) DecodeScale(dec *scale.Decoder) (total int, err error) { + { + n, err := t.Body.DecodeScale(dec) + if err != nil { + return total, err + } + total += n + } + { + n, err := scale.DecodeByteArray(dec, t.Sender[:]) + if err != nil { + return total, err + } + total += n + } + { + n, err := scale.DecodeByteArray(dec, t.Signature[:]) + if err != nil { + return total, err + } + total += n + } + return total, nil +} diff --git a/hare4/types_test.go b/hare4/types_test.go new file mode 100644 index 0000000000..ded67b1c6a --- /dev/null +++ b/hare4/types_test.go @@ -0,0 +1,41 @@ +package hare3 + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.uber.org/zap/zapcore" + + "github.com/spacemeshos/go-spacemesh/codec" + "github.com/spacemeshos/go-spacemesh/common/types" +) + +func TestAbsoluteMaxValue(t *testing.T) { + ir := IterRound{Iter: 40, Round: notify} + require.EqualValues(t, 41*7, ir.Absolute()) +} + +func TestMessageMarshall(t *testing.T) { + enc := zapcore.NewMapObjectEncoder() + msg := &Message{Body: Body{Value: Value{Proposals: []types.ProposalID{{}}}}} + require.NoError(t, msg.MarshalLogObject(enc)) + msg = &Message{Body: Body{Value: Value{Reference: &types.Hash32{}}}} + require.NoError(t, msg.MarshalLogObject(enc)) +} + +func FuzzMessageDecode(f *testing.F) { + for _, buf := range [][]byte{ + {}, + {0}, + {0, 1, 1}, + {0, 1, 1, 0, 10}, + } { + f.Add(buf) + } + f.Fuzz(func(t *testing.T, buf []byte) { + var msg Message + if err := codec.Decode(buf, &msg); err == nil { + _ = msg.Validate() + } + }) +} From 3ad6a2d6b3212a5d3778a75378e411ccfe8742b5 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:23:28 -0600 Subject: [PATCH 02/17] feat: use shorter ids (eligibility proofs) for proposals --- blocks/generator.go | 8 +- blocks/generator_test.go | 46 ++-- common/types/proposal.go | 12 +- common/types/testutil.go | 4 + config/config.go | 3 + config/mainnet.go | 9 + config/presets/fastnet.go | 11 +- config/presets/testnet.go | 10 +- go.mod | 1 - go.sum | 2 - hare3/compat/weakcoin.go | 4 +- hare3/hare.go | 26 +- hare4/compat/weakcoin.go | 4 +- hare4/hare.go | 405 +++++++++++++++++++++++++---- hare4/hare_test.go | 525 ++++++++++++++++++++++++++++++++++---- hare4/interface.go | 20 ++ hare4/legacy_oracle.go | 6 +- hare4/malfeasance.go | 2 +- hare4/malfeasance_test.go | 2 +- hare4/metrics.go | 38 ++- hare4/mocks/mocks.go | 148 +++++++++++ hare4/protocol.go | 3 +- hare4/protocol_test.go | 2 +- hare4/tracer.go | 8 +- hare4/types.go | 13 +- hare4/types_scale.go | 62 ++++- hare4/types_test.go | 2 +- metrics/common.go | 8 + node/node.go | 129 ++++++++-- node/node_test.go | 3 + p2p/server/server.go | 4 +- 31 files changed, 1332 insertions(+), 188 deletions(-) create mode 100644 hare4/interface.go create mode 100644 hare4/mocks/mocks.go diff --git a/blocks/generator.go b/blocks/generator.go index 0d30d7f40f..2f17125f45 100644 --- a/blocks/generator.go +++ b/blocks/generator.go @@ -13,8 +13,8 @@ import ( "github.com/spacemeshos/go-spacemesh/atxsdata" "github.com/spacemeshos/go-spacemesh/common/types" - "github.com/spacemeshos/go-spacemesh/hare3" "github.com/spacemeshos/go-spacemesh/hare3/eligibility" + "github.com/spacemeshos/go-spacemesh/hare4" "github.com/spacemeshos/go-spacemesh/log" "github.com/spacemeshos/go-spacemesh/proposals/store" "github.com/spacemeshos/go-spacemesh/sql" @@ -39,7 +39,7 @@ type Generator struct { cert certifier patrol layerPatrol - hareCh <-chan hare3.ConsensusOutput + hareCh <-chan hare4.ConsensusOutput optimisticOutput map[types.LayerID]*proposalMetadata } @@ -76,7 +76,7 @@ func WithGeneratorLogger(logger *zap.Logger) GeneratorOpt { } // WithHareOutputChan sets the chan to listen to hare output. -func WithHareOutputChan(ch <-chan hare3.ConsensusOutput) GeneratorOpt { +func WithHareOutputChan(ch <-chan hare4.ConsensusOutput) GeneratorOpt { return func(g *Generator) { g.hareCh = ch } @@ -178,7 +178,7 @@ func (g *Generator) run(ctx context.Context) error { } } -func (g *Generator) processHareOutput(ctx context.Context, out hare3.ConsensusOutput) (*types.Block, error) { +func (g *Generator) processHareOutput(ctx context.Context, out hare4.ConsensusOutput) (*types.Block, error) { var md *proposalMetadata if len(out.Proposals) > 0 { getMetadata := func() error { diff --git a/blocks/generator_test.go b/blocks/generator_test.go index 4145f3ff29..f77d0fbc30 100644 --- a/blocks/generator_test.go +++ b/blocks/generator_test.go @@ -20,8 +20,8 @@ import ( "github.com/spacemeshos/go-spacemesh/blocks/mocks" "github.com/spacemeshos/go-spacemesh/common/types" "github.com/spacemeshos/go-spacemesh/genvm/sdk/wallet" - "github.com/spacemeshos/go-spacemesh/hare3" "github.com/spacemeshos/go-spacemesh/hare3/eligibility" + "github.com/spacemeshos/go-spacemesh/hare4" "github.com/spacemeshos/go-spacemesh/proposals/store" "github.com/spacemeshos/go-spacemesh/signing" "github.com/spacemeshos/go-spacemesh/sql" @@ -56,13 +56,13 @@ type testGenerator struct { mockFetch *smocks.MockProposalFetcher mockCert *mocks.Mockcertifier mockPatrol *mocks.MocklayerPatrol - hareCh chan hare3.ConsensusOutput + hareCh chan hare4.ConsensusOutput } func createTestGenerator(t *testing.T) *testGenerator { types.SetLayersPerEpoch(3) ctrl := gomock.NewController(t) - ch := make(chan hare3.ConsensusOutput, 100) + ch := make(chan hare4.ConsensusOutput, 100) tg := &testGenerator{ mockMesh: mocks.NewMockmeshProvider(ctrl), mockExec: mocks.NewMockexecutor(ctrl), @@ -271,7 +271,7 @@ func genData( store *store.Store, lid types.LayerID, optimistic bool, -) hare3.ConsensusOutput { +) hare4.ConsensusOutput { numTXs := 1000 numProposals := 10 txIDs := createAndSaveTxs(t, numTXs, db) @@ -283,7 +283,7 @@ func genData( } require.NoError(t, layers.SetMeshHash(db, lid.Sub(1), meshHash)) plist := createProposals(t, db, store, lid, meshHash, signers, activeSet, txIDs) - return hare3.ConsensusOutput{ + return hare4.ConsensusOutput{ Layer: lid, Proposals: types.ToProposalIDs(plist), } @@ -448,7 +448,7 @@ func Test_run(t *testing.T) { }) tg.mockPatrol.EXPECT().CompleteHare(layerID) tg.Start(context.Background()) - tg.hareCh <- hare3.ConsensusOutput{Layer: layerID, Proposals: pids} + tg.hareCh <- hare4.ConsensusOutput{Layer: layerID, Proposals: pids} require.Eventually(t, func() bool { return len(tg.hareCh) == 0 }, time.Second, 100*time.Millisecond) tg.Stop() }) @@ -464,7 +464,7 @@ func Test_processHareOutput_EmptyOutput(t *testing.T) { tg.mockCert.EXPECT().CertifyIfEligible(gomock.Any(), layerID, types.EmptyBlockID) tg.mockMesh.EXPECT().ProcessLayerPerHareOutput(gomock.Any(), layerID, types.EmptyBlockID, false) tg.mockPatrol.EXPECT().CompleteHare(layerID) - tg.hareCh <- hare3.ConsensusOutput{Layer: layerID} + tg.hareCh <- hare4.ConsensusOutput{Layer: layerID} require.Eventually(t, func() bool { return len(tg.hareCh) == 0 }, time.Second, 100*time.Millisecond) tg.Stop() } @@ -480,7 +480,7 @@ func Test_run_FetchFailed(t *testing.T) { return errors.New("unknown") }) tg.mockPatrol.EXPECT().CompleteHare(layerID) - tg.hareCh <- hare3.ConsensusOutput{Layer: layerID, Proposals: pids} + tg.hareCh <- hare4.ConsensusOutput{Layer: layerID, Proposals: pids} require.Eventually(t, func() bool { return len(tg.hareCh) == 0 }, time.Second, 100*time.Millisecond) tg.Stop() } @@ -502,7 +502,7 @@ func Test_run_DiffHasFromConsensus(t *testing.T) { tg.mockFetch.EXPECT().GetProposals(gomock.Any(), pids) tg.mockPatrol.EXPECT().CompleteHare(layerID) - tg.hareCh <- hare3.ConsensusOutput{Layer: layerID, Proposals: pids} + tg.hareCh <- hare4.ConsensusOutput{Layer: layerID, Proposals: pids} require.Eventually(t, func() bool { return len(tg.hareCh) == 0 }, time.Second, 100*time.Millisecond) tg.Stop() } @@ -535,7 +535,7 @@ func Test_run_ExecuteFailed(t *testing.T) { return nil, errors.New("unknown") }) tg.mockPatrol.EXPECT().CompleteHare(layerID) - tg.hareCh <- hare3.ConsensusOutput{Layer: layerID, Proposals: pids} + tg.hareCh <- hare4.ConsensusOutput{Layer: layerID, Proposals: pids} require.Eventually(t, func() bool { return len(tg.hareCh) == 0 }, time.Second, 100*time.Millisecond) tg.Stop() } @@ -560,7 +560,7 @@ func Test_run_AddBlockFailed(t *testing.T) { Return(block, nil) tg.mockMesh.EXPECT().AddBlockWithTXs(gomock.Any(), gomock.Any()).Return(errors.New("unknown")) tg.mockPatrol.EXPECT().CompleteHare(layerID) - tg.hareCh <- hare3.ConsensusOutput{Layer: layerID, Proposals: pids} + tg.hareCh <- hare4.ConsensusOutput{Layer: layerID, Proposals: pids} require.Eventually(t, func() bool { return len(tg.hareCh) == 0 }, time.Second, 100*time.Millisecond) tg.Stop() } @@ -588,7 +588,7 @@ func Test_run_RegisterCertFailureIgnored(t *testing.T) { tg.mockCert.EXPECT().CertifyIfEligible(gomock.Any(), layerID, gomock.Any()) tg.mockMesh.EXPECT().ProcessLayerPerHareOutput(gomock.Any(), layerID, block.ID(), true) tg.mockPatrol.EXPECT().CompleteHare(layerID) - tg.hareCh <- hare3.ConsensusOutput{Layer: layerID, Proposals: pids} + tg.hareCh <- hare4.ConsensusOutput{Layer: layerID, Proposals: pids} require.Eventually(t, func() bool { return len(tg.hareCh) == 0 }, time.Second, 100*time.Millisecond) tg.Stop() } @@ -616,7 +616,7 @@ func Test_run_CertifyFailureIgnored(t *testing.T) { tg.mockCert.EXPECT().CertifyIfEligible(gomock.Any(), layerID, gomock.Any()).Return(errors.New("unknown")) tg.mockMesh.EXPECT().ProcessLayerPerHareOutput(gomock.Any(), layerID, block.ID(), true) tg.mockPatrol.EXPECT().CompleteHare(layerID) - tg.hareCh <- hare3.ConsensusOutput{Layer: layerID, Proposals: pids} + tg.hareCh <- hare4.ConsensusOutput{Layer: layerID, Proposals: pids} require.Eventually(t, func() bool { return len(tg.hareCh) == 0 }, time.Second, 100*time.Millisecond) tg.Stop() } @@ -646,7 +646,7 @@ func Test_run_ProcessLayerFailed(t *testing.T) { ProcessLayerPerHareOutput(gomock.Any(), layerID, block.ID(), true). Return(errors.New("unknown")) tg.mockPatrol.EXPECT().CompleteHare(layerID) - tg.hareCh <- hare3.ConsensusOutput{Layer: layerID, Proposals: pids} + tg.hareCh <- hare4.ConsensusOutput{Layer: layerID, Proposals: pids} require.Eventually(t, func() bool { return len(tg.hareCh) == 0 }, time.Second, 100*time.Millisecond) tg.Stop() } @@ -673,7 +673,7 @@ func Test_processHareOutput_UnequalHeight(t *testing.T) { activeSet := types.ToATXIDs(atxes) pList := createProposals(t, tg.db, tg.proposals, layerID, types.Hash32{}, signers, activeSet, nil) ctx := context.Background() - ho := hare3.ConsensusOutput{ + ho := hare4.ConsensusOutput{ Layer: layerID, Proposals: types.ToProposalIDs(pList), } @@ -730,7 +730,7 @@ func Test_processHareOutput_bad_state(t *testing.T) { []types.TransactionID{types.RandomTransactionID()}, 1, ) - ho := hare3.ConsensusOutput{ + ho := hare4.ConsensusOutput{ Layer: layerID, Proposals: types.ToProposalIDs([]*types.Proposal{p}), } @@ -759,7 +759,7 @@ func Test_processHareOutput_bad_state(t *testing.T) { 1, ) ctx := context.Background() - ho := hare3.ConsensusOutput{ + ho := hare4.ConsensusOutput{ Layer: layerID, Proposals: types.ToProposalIDs([]*types.Proposal{p}), } @@ -783,7 +783,7 @@ func Test_processHareOutput_EmptyProposals(t *testing.T) { plist = append(plist, p) } ctx := context.Background() - ho := hare3.ConsensusOutput{ + ho := hare4.ConsensusOutput{ Layer: lid, Proposals: types.ToProposalIDs(plist), } @@ -832,7 +832,7 @@ func Test_processHareOutput_StableBlockID(t *testing.T) { activeSet := types.ToATXIDs(atxes) plist := createProposals(t, tg.db, tg.proposals, layerID, types.Hash32{}, signers, activeSet, txIDs) ctx := context.Background() - ho1 := hare3.ConsensusOutput{ + ho1 := hare4.ConsensusOutput{ Layer: layerID, Proposals: types.ToProposalIDs(plist), } @@ -852,7 +852,7 @@ func Test_processHareOutput_StableBlockID(t *testing.T) { ordered := plist[numProposals/2 : numProposals] ordered = append(ordered, plist[0:numProposals/2]...) require.NotEqual(t, plist, ordered) - ho2 := hare3.ConsensusOutput{ + ho2 := hare4.ConsensusOutput{ Layer: layerID, Proposals: types.ToProposalIDs(ordered), } @@ -881,7 +881,7 @@ func Test_processHareOutput_SameATX(t *testing.T) { createProposal(t, tg.db, tg.proposals, activeSet, layerID, types.Hash32{}, atxID, signers[0], txIDs[0:500], 1), createProposal(t, tg.db, tg.proposals, activeSet, layerID, types.Hash32{}, atxID, signers[0], txIDs[400:], 1), } - ho := hare3.ConsensusOutput{ + ho := hare4.ConsensusOutput{ Layer: layerID, Proposals: types.ToProposalIDs(plist), } @@ -905,7 +905,7 @@ func Test_processHareOutput_EmptyATXID(t *testing.T) { txIDs, 1, ) plist = append(plist, p) - ho := hare3.ConsensusOutput{ + ho := hare4.ConsensusOutput{ Layer: layerID, Proposals: types.ToProposalIDs(plist), } @@ -928,7 +928,7 @@ func Test_processHareOutput_MultipleEligibilities(t *testing.T) { createProposal(t, tg.db, tg.proposals, activeSet, layerID, types.Hash32{}, atxes[2].ID(), signers[2], ids, 5), } ctx := context.Background() - ho := hare3.ConsensusOutput{ + ho := hare4.ConsensusOutput{ Layer: layerID, Proposals: types.ToProposalIDs(plist), } diff --git a/common/types/proposal.go b/common/types/proposal.go index dfb9238a91..806ef4b149 100644 --- a/common/types/proposal.go +++ b/common/types/proposal.go @@ -28,7 +28,17 @@ type ProposalID Hash20 // EmptyProposalID is a canonical empty ProposalID. var EmptyProposalID = ProposalID{} -type CompactProposalID [2]byte +type CompactProposalID [4]byte + +// EncodeScale implements scale codec interface. +func (id *CompactProposalID) EncodeScale(e *scale.Encoder) (int, error) { + return scale.EncodeByteArray(e, id[:]) +} + +// DecodeScale implements scale codec interface. +func (id *CompactProposalID) DecodeScale(d *scale.Decoder) (int, error) { + return scale.DecodeByteArray(d, id[:]) +} // EncodeScale implements scale codec interface. func (id *ProposalID) EncodeScale(e *scale.Encoder) (int, error) { diff --git a/common/types/testutil.go b/common/types/testutil.go index fae823da49..9018dbabb6 100644 --- a/common/types/testutil.go +++ b/common/types/testutil.go @@ -101,12 +101,16 @@ func RandomTransactionID() TransactionID { // RandomBallot generates a Ballot with random content for testing. func RandomBallot() *Ballot { + var vrf VrfSignature + _, _ = rand.Read(vrf[:]) + return &Ballot{ InnerBallot: InnerBallot{ Layer: LayerID(10), AtxID: RandomATXID(), RefBallot: RandomBallotID(), }, + EligibilityProofs: []VotingEligibility{{Sig: vrf}}, Votes: Votes{ Base: RandomBallotID(), Support: []Vote{{ID: RandomBlockID()}, {ID: RandomBlockID()}}, diff --git a/config/config.go b/config/config.go index 230d9a9a8a..83a3832a49 100644 --- a/config/config.go +++ b/config/config.go @@ -22,6 +22,7 @@ import ( vm "github.com/spacemeshos/go-spacemesh/genvm" "github.com/spacemeshos/go-spacemesh/hare3" "github.com/spacemeshos/go-spacemesh/hare3/eligibility" + "github.com/spacemeshos/go-spacemesh/hare4" "github.com/spacemeshos/go-spacemesh/miner" "github.com/spacemeshos/go-spacemesh/p2p" "github.com/spacemeshos/go-spacemesh/syncer" @@ -54,6 +55,7 @@ type Config struct { P2P p2p.Config `mapstructure:"p2p"` API grpcserver.Config `mapstructure:"api"` HARE3 hare3.Config `mapstructure:"hare3"` + HARE4 hare4.Config `mapstructure:"hare4"` HareEligibility eligibility.Config `mapstructure:"hare-eligibility"` Certificate blocks.CertConfig `mapstructure:"certificate"` Beacon beacon.Config `mapstructure:"beacon"` @@ -191,6 +193,7 @@ func DefaultConfig() Config { P2P: p2p.DefaultConfig(), API: grpcserver.DefaultConfig(), HARE3: hare3.DefaultConfig(), + HARE4: hare4.DefaultConfig(), HareEligibility: eligibility.DefaultConfig(), Beacon: beacon.DefaultConfig(), TIME: timeConfig.DefaultConfig(), diff --git a/config/mainnet.go b/config/mainnet.go index 40b123edba..cdf5bb8a9d 100644 --- a/config/mainnet.go +++ b/config/mainnet.go @@ -21,6 +21,7 @@ import ( "github.com/spacemeshos/go-spacemesh/fetch" "github.com/spacemeshos/go-spacemesh/hare3" "github.com/spacemeshos/go-spacemesh/hare3/eligibility" + "github.com/spacemeshos/go-spacemesh/hare4" "github.com/spacemeshos/go-spacemesh/miner" "github.com/spacemeshos/go-spacemesh/p2p" "github.com/spacemeshos/go-spacemesh/syncer" @@ -61,6 +62,7 @@ func MainnetConfig() Config { logging.TrtlLoggerLevel = zapcore.WarnLevel.String() logging.AtxHandlerLevel = zapcore.WarnLevel.String() logging.ProposalListenerLevel = zapcore.WarnLevel.String() + forkLayer := types.LayerID(111_111_111) // TODO THIS NEEDS A NUMBER hare3conf := hare3.DefaultConfig() hare3conf.Committee = 400 hare3conf.Enable = true @@ -69,6 +71,12 @@ func MainnetConfig() Config { Layer: 105_720, // July 15, 2024, 10:00:00 AM UTC Size: 50, } + hare3conf.DisableLayer = forkLayer + + hare4conf := hare4.DefaultConfig() + hare4conf.Committee = 50 + hare4conf.Enable = true + hare4conf.EnableLayer = forkLayer return Config{ BaseConfig: BaseConfig{ DataDirParent: defaultDataDir, @@ -137,6 +145,7 @@ func MainnetConfig() Config { }, }, HARE3: hare3conf, + HARE4: hare4conf, HareEligibility: eligibility.Config{ ConfidenceParam: 200, }, diff --git a/config/presets/fastnet.go b/config/presets/fastnet.go index de09ac0d19..aabc1bb236 100644 --- a/config/presets/fastnet.go +++ b/config/presets/fastnet.go @@ -33,13 +33,22 @@ func fastnet() config.Config { conf.ATXGradeDelay = 1 * time.Second conf.HARE3.Enable = true - conf.HARE3.DisableLayer = types.LayerID(math.MaxUint32) + conf.HARE3.DisableLayer = 22 conf.HARE3.Committee = 800 conf.HARE3.Leaders = 10 conf.HARE3.PreroundDelay = 3 * time.Second conf.HARE3.RoundDuration = 700 * time.Millisecond conf.HARE3.IterationsLimit = 2 + conf.HARE4.Enable = true + conf.HARE4.EnableLayer = types.LayerID(22) + conf.HARE4.DisableLayer = types.LayerID(math.MaxUint32) + conf.HARE4.Committee = 800 + conf.HARE4.Leaders = 10 + conf.HARE4.PreroundDelay = 3 * time.Second + conf.HARE4.RoundDuration = 700 * time.Millisecond + conf.HARE4.IterationsLimit = 2 + conf.P2P.MinPeers = 10 conf.Genesis = config.GenesisConfig{ diff --git a/config/presets/testnet.go b/config/presets/testnet.go index 892d924aaf..4b9133cd40 100644 --- a/config/presets/testnet.go +++ b/config/presets/testnet.go @@ -22,6 +22,7 @@ import ( "github.com/spacemeshos/go-spacemesh/fetch" "github.com/spacemeshos/go-spacemesh/hare3" "github.com/spacemeshos/go-spacemesh/hare3/eligibility" + "github.com/spacemeshos/go-spacemesh/hare4" "github.com/spacemeshos/go-spacemesh/miner" "github.com/spacemeshos/go-spacemesh/p2p" "github.com/spacemeshos/go-spacemesh/syncer" @@ -51,10 +52,16 @@ func testnet() config.Config { } hare3conf := hare3.DefaultConfig() hare3conf.Enable = true - hare3conf.EnableLayer = 7366 + hare3conf.EnableLayer = 0 + hare3conf.DisableLayer = 50 // NOTE(dshulyak) i forgot to set protocol name for testnet when we configured it manually. // we can't do rolling upgrade if protocol name changes, so lets keep it like that temporarily. hare3conf.ProtocolName = "" + hare4conf := hare4.DefaultConfig() + hare4conf.Enable = true + hare4conf.EnableLayer = 50 // TODO THIS NEEDS A VALUE + hare4conf.DisableLayer = math.MaxUint32 + defaultdir := filepath.Join(home, "spacemesh-testnet", "/") return config.Config{ Preset: "testnet", @@ -95,6 +102,7 @@ func testnet() config.Config { MinimalActiveSetWeight: []types.EpochMinimalActiveWeight{{Weight: 10_000}}, }, HARE3: hare3conf, + HARE4: hare4conf, HareEligibility: eligibility.Config{ ConfidenceParam: 20, }, diff --git a/go.mod b/go.mod index ab5853e45e..ac50366248 100644 --- a/go.mod +++ b/go.mod @@ -74,7 +74,6 @@ require ( cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect cloud.google.com/go/compute/metadata v0.3.0 // indirect cloud.google.com/go/iam v1.1.8 // indirect - github.com/aead/siphash v1.0.1 // indirect github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect github.com/anacrolix/chansync v0.3.0 // indirect github.com/anacrolix/missinggo v1.2.1 // indirect diff --git a/go.sum b/go.sum index 5a4bd90e5d..2ddced53c9 100644 --- a/go.sum +++ b/go.sum @@ -25,8 +25,6 @@ github.com/ALTree/bigfloat v0.2.0 h1:AwNzawrpFuw55/YDVlcPw0F0cmmXrmngBHhVrvdXPvM github.com/ALTree/bigfloat v0.2.0/go.mod h1:+NaH2gLeY6RPBPPQf4aRotPPStg+eXc8f9ZaE4vRfD4= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/RoaringBitmap/roaring v0.4.7/go.mod h1:8khRDP4HmeXns4xIj9oGrKSz7XTQiJx2zgh7AcNke4w= -github.com/aead/siphash v1.0.1 h1:FwHfE/T45KPKYuuSAKyyvE+oPWcaQ+CUmFW0bPlM+kg= -github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= github.com/anacrolix/chansync v0.3.0 h1:lRu9tbeuw3wl+PhMu/r+JJCRu5ArFXIluOgdF0ao6/U= diff --git a/hare3/compat/weakcoin.go b/hare3/compat/weakcoin.go index 79404aceb9..6b8758454a 100644 --- a/hare3/compat/weakcoin.go +++ b/hare3/compat/weakcoin.go @@ -6,14 +6,14 @@ import ( "go.uber.org/zap" "github.com/spacemeshos/go-spacemesh/common/types" - "github.com/spacemeshos/go-spacemesh/hare3" + "github.com/spacemeshos/go-spacemesh/hare4" ) type weakCoin interface { Set(types.LayerID, bool) error } -func ReportWeakcoin(ctx context.Context, logger *zap.Logger, from <-chan hare3.WeakCoinOutput, to weakCoin) { +func ReportWeakcoin(ctx context.Context, logger *zap.Logger, from <-chan hare4.WeakCoinOutput, to weakCoin) { for { select { case <-ctx.Done(): diff --git a/hare3/hare.go b/hare3/hare.go index fcd049a926..8e6ce46a65 100644 --- a/hare3/hare.go +++ b/hare3/hare.go @@ -17,6 +17,7 @@ import ( "github.com/spacemeshos/go-spacemesh/atxsdata" "github.com/spacemeshos/go-spacemesh/codec" "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/hare4" "github.com/spacemeshos/go-spacemesh/layerpatrol" "github.com/spacemeshos/go-spacemesh/log" "github.com/spacemeshos/go-spacemesh/metrics" @@ -154,6 +155,14 @@ func WithTracer(tracer Tracer) Opt { } } +// WithResultsChan overrides the default result channel with a different one. +// This is only needed for the migration period between hare3 and hare4. +func WithResultsChan(c chan hare4.ConsensusOutput) Opt { + return func(hr *Hare) { + hr.results = c + } +} + type nodeclock interface { AwaitLayer(types.LayerID) <-chan struct{} CurrentLayer() types.LayerID @@ -176,8 +185,8 @@ func New( hr := &Hare{ ctx: ctx, cancel: cancel, - results: make(chan ConsensusOutput, 32), - coins: make(chan WeakCoinOutput, 32), + results: make(chan hare4.ConsensusOutput, 32), + coins: make(chan hare4.WeakCoinOutput, 32), signers: map[string]*signing.EdSigner{}, sessions: map[types.LayerID]*protocol{}, @@ -211,8 +220,8 @@ type Hare struct { ctx context.Context cancel context.CancelFunc eg errgroup.Group - results chan ConsensusOutput - coins chan WeakCoinOutput + results chan hare4.ConsensusOutput + coins chan hare4.WeakCoinOutput mu sync.Mutex signers map[string]*signing.EdSigner sessions map[types.LayerID]*protocol @@ -242,11 +251,11 @@ func (h *Hare) Register(sig *signing.EdSigner) { h.signers[string(sig.NodeID().Bytes())] = sig } -func (h *Hare) Results() <-chan ConsensusOutput { +func (h *Hare) Results() <-chan hare4.ConsensusOutput { return h.results } -func (h *Hare) Coins() <-chan WeakCoinOutput { +func (h *Hare) Coins() <-chan hare4.WeakCoinOutput { return h.coins } @@ -508,7 +517,7 @@ func (h *Hare) onOutput(session *session, ir IterRound, out output) error { select { case <-h.ctx.Done(): return h.ctx.Err() - case h.coins <- WeakCoinOutput{Layer: session.lid, Coin: *out.coin}: + case h.coins <- hare4.WeakCoinOutput{Layer: session.lid, Coin: *out.coin}: } sessionCoin.Inc() } @@ -516,7 +525,7 @@ func (h *Hare) onOutput(session *session, ir IterRound, out output) error { select { case <-h.ctx.Done(): return h.ctx.Err() - case h.results <- ConsensusOutput{Layer: session.lid, Proposals: out.result}: + case h.results <- hare4.ConsensusOutput{Layer: session.lid, Proposals: out.result}: } sessionResult.Inc() } @@ -618,7 +627,6 @@ func (h *Hare) OnProposal(p *types.Proposal) error { func (h *Hare) Stop() { h.cancel() h.eg.Wait() - close(h.results) close(h.coins) h.log.Info("stopped") } diff --git a/hare4/compat/weakcoin.go b/hare4/compat/weakcoin.go index 79404aceb9..6b8758454a 100644 --- a/hare4/compat/weakcoin.go +++ b/hare4/compat/weakcoin.go @@ -6,14 +6,14 @@ import ( "go.uber.org/zap" "github.com/spacemeshos/go-spacemesh/common/types" - "github.com/spacemeshos/go-spacemesh/hare3" + "github.com/spacemeshos/go-spacemesh/hare4" ) type weakCoin interface { Set(types.LayerID, bool) error } -func ReportWeakcoin(ctx context.Context, logger *zap.Logger, from <-chan hare3.WeakCoinOutput, to weakCoin) { +func ReportWeakcoin(ctx context.Context, logger *zap.Logger, from <-chan hare4.WeakCoinOutput, to weakCoin) { for { select { case <-ctx.Done(): diff --git a/hare4/hare.go b/hare4/hare.go index fcd049a926..9ce3539fe3 100644 --- a/hare4/hare.go +++ b/hare4/hare.go @@ -1,10 +1,13 @@ -package hare3 +package hare4 import ( + "bytes" "context" "errors" "fmt" + "io" "math" + "slices" "sync" "time" @@ -22,6 +25,7 @@ import ( "github.com/spacemeshos/go-spacemesh/metrics" "github.com/spacemeshos/go-spacemesh/p2p" "github.com/spacemeshos/go-spacemesh/p2p/pubsub" + "github.com/spacemeshos/go-spacemesh/p2p/server" "github.com/spacemeshos/go-spacemesh/proposals/store" "github.com/spacemeshos/go-spacemesh/signing" "github.com/spacemeshos/go-spacemesh/sql" @@ -31,34 +35,35 @@ import ( "github.com/spacemeshos/go-spacemesh/system" ) -type CommitteeUpgrade struct { - Layer types.LayerID - Size uint16 -} +const ( + PROTOCOL_NAME = "hare4/full_exchange" + MAX_EXCHANGE_SIZE = 1_000_000 // protect against a malicious allocation of too much space. +) + +var ( + errNoLayerProposals = errors.New("no proposals for layer") + errCannotMatchProposals = errors.New("cannot match proposals to compacted form") + errResponseTooBig = errors.New("response too big") + errCannotFindProposal = errors.New("cannot find proposal") + errNoEligibilityProofs = errors.New("no eligibility proofs") + fetchFullTimeout = 5 * time.Second +) type Config struct { - Enable bool `mapstructure:"enable"` - EnableLayer types.LayerID `mapstructure:"enable-layer"` - DisableLayer types.LayerID `mapstructure:"disable-layer"` - Committee uint16 `mapstructure:"committee"` - CommitteeUpgrade *CommitteeUpgrade - Leaders uint16 `mapstructure:"leaders"` - IterationsLimit uint8 `mapstructure:"iterations-limit"` - PreroundDelay time.Duration `mapstructure:"preround-delay"` - RoundDuration time.Duration `mapstructure:"round-duration"` + Enable bool `mapstructure:"enable"` + EnableLayer types.LayerID `mapstructure:"enable-layer"` + DisableLayer types.LayerID `mapstructure:"disable-layer"` + Committee uint16 `mapstructure:"committee"` + Leaders uint16 `mapstructure:"leaders"` + IterationsLimit uint8 `mapstructure:"iterations-limit"` + PreroundDelay time.Duration `mapstructure:"preround-delay"` + RoundDuration time.Duration `mapstructure:"round-duration"` // LogStats if true will log iteration statistics with INFO level at the start of the next iteration. // This requires additional computation and should be used for debugging only. LogStats bool `mapstructure:"log-stats"` ProtocolName string `mapstructure:"protocolname"` } -func (cfg *Config) CommitteeFor(layer types.LayerID) uint16 { - if cfg.CommitteeUpgrade != nil && layer >= cfg.CommitteeUpgrade.Layer { - return cfg.CommitteeUpgrade.Size - } - return cfg.Committee -} - func (cfg *Config) Validate(zdist time.Duration) error { terminates := cfg.roundStart(IterRound{Iter: cfg.IterationsLimit, Round: hardlock}) if terminates > zdist { @@ -76,10 +81,6 @@ func (cfg *Config) MarshalLogObject(encoder zapcore.ObjectEncoder) error { encoder.AddUint32("enabled layer", cfg.EnableLayer.Uint32()) encoder.AddUint32("disabled layer", cfg.DisableLayer.Uint32()) encoder.AddUint16("committee", cfg.Committee) - if cfg.CommitteeUpgrade != nil { - encoder.AddUint32("committee upgrade layer", cfg.CommitteeUpgrade.Layer.Uint32()) - encoder.AddUint16("committee upgrade size", cfg.CommitteeUpgrade.Size) - } encoder.AddUint16("leaders", cfg.Leaders) encoder.AddUint8("iterations limit", cfg.IterationsLimit) encoder.AddDuration("preround delay", cfg.PreroundDelay) @@ -105,14 +106,13 @@ func DefaultConfig() Config { // this is what the Chernoff bound gives you; the actual value is a bit lower, // so we can probably get away with a smaller committee). For a committee of size 400, // the Chernoff bound gives 2^{-20} probability of a dishonest majority when 1/3 of the population is dishonest. - Committee: 800, + Committee: 50, Leaders: 5, IterationsLimit: 4, PreroundDelay: 25 * time.Second, RoundDuration: 12 * time.Second, - // can be bumped to 3.1 when oracle upgrades - ProtocolName: "/h/3.0", - DisableLayer: math.MaxUint32, + ProtocolName: "/h/4.0", + DisableLayer: math.MaxUint32, } } @@ -128,6 +128,12 @@ type WeakCoinOutput struct { type Opt func(*Hare) +func WithServer(s streamRequester) Opt { + return func(hr *Hare) { + hr.p2p = s + } +} + func WithWallclock(clock clockwork.Clock) Opt { return func(hr *Hare) { hr.wallclock = clock @@ -154,6 +160,14 @@ func WithTracer(tracer Tracer) Opt { } } +// WithResultsChan overrides the default result channel with a different one. +// This is only needed for the migration period between hare3 and hare4. +func WithResultsChan(c chan ConsensusOutput) Opt { + return func(hr *Hare) { + hr.results = c + } +} + type nodeclock interface { AwaitLayer(types.LayerID) <-chan struct{} CurrentLayer() types.LayerID @@ -166,20 +180,22 @@ func New( db *sql.Database, atxsdata *atxsdata.Data, proposals *store.Store, - verifier *signing.EdVerifier, + verif verifier, oracle oracle, sync system.SyncStateProvider, patrol *layerpatrol.LayerPatrol, + host server.Host, opts ...Opt, ) *Hare { ctx, cancel := context.WithCancel(context.Background()) hr := &Hare{ - ctx: ctx, - cancel: cancel, - results: make(chan ConsensusOutput, 32), - coins: make(chan WeakCoinOutput, 32), - signers: map[string]*signing.EdSigner{}, - sessions: map[types.LayerID]*protocol{}, + ctx: ctx, + cancel: cancel, + results: make(chan ConsensusOutput, 32), + coins: make(chan WeakCoinOutput, 32), + signers: make(map[string]*signing.EdSigner), + sessions: make(map[types.LayerID]*protocol), + messageCache: make(map[types.Hash32]Message), config: DefaultConfig(), log: zap.NewNop(), @@ -190,7 +206,8 @@ func New( db: db, atxsdata: atxsdata, proposals: proposals, - verifier: verifier, + verifier: verif, + compactFn: compactTruncate, oracle: &legacyOracle{ log: zap.NewNop(), oracle: oracle, @@ -203,19 +220,24 @@ func New( for _, opt := range opts { opt(hr) } + + if host != nil { + hr.p2p = server.New(host, PROTOCOL_NAME, hr.handleProposalsStream) + } return hr } type Hare struct { // state - ctx context.Context - cancel context.CancelFunc - eg errgroup.Group - results chan ConsensusOutput - coins chan WeakCoinOutput - mu sync.Mutex - signers map[string]*signing.EdSigner - sessions map[types.LayerID]*protocol + ctx context.Context + cancel context.CancelFunc + eg errgroup.Group + results chan ConsensusOutput + coins chan WeakCoinOutput + mu sync.Mutex + signers map[string]*signing.EdSigner + sessions map[types.LayerID]*protocol + messageCache map[types.Hash32]Message // options config Config @@ -228,10 +250,12 @@ type Hare struct { db *sql.Database atxsdata *atxsdata.Data proposals *store.Store - verifier *signing.EdVerifier + verifier verifier + compactFn func([]byte) []byte oracle *legacyOracle sync system.SyncStateProvider patrol *layerpatrol.LayerPatrol + p2p streamRequester tracer Tracer } @@ -267,8 +291,9 @@ func (h *Hare) Start() { for next := enabled; next < disabled; next++ { select { case <-h.nodeclock.AwaitLayer(next): - h.log.Debug("notified", zap.Uint32("lid", next.Uint32())) + h.log.Debug("notified", zap.Uint32("layer id", next.Uint32())) h.onLayer(next) + h.cleanMessageCache(next - 1) case <-h.ctx.Done(): return nil } @@ -283,6 +308,135 @@ func (h *Hare) Running() int { return len(h.sessions) } +// fetchFull will fetch the full list of proposal IDs from the provided peer. +func (h *Hare) fetchFull(ctx context.Context, peer p2p.Peer, msgId types.Hash32) ( + []types.ProposalID, error, +) { + ctx, cancel := context.WithTimeout(ctx, fetchFullTimeout) + defer cancel() + + requestCompactCounter.Inc() + req := &CompactIdRequest{MsgId: msgId} + reqBytes := codec.MustEncode(req) + resp := &CompactIdResponse{} + cb := func(ctx context.Context, rw io.ReadWriter) error { + respLen, _, err := codec.DecodeLen(rw) + if err != nil { + return fmt.Errorf("decode length: %w", err) + } + if respLen >= MAX_EXCHANGE_SIZE { + return errResponseTooBig + } + buff := make([]byte, respLen) + _, err = io.ReadFull(rw, buff) + if err != nil { + return fmt.Errorf("read response buffer: %w", err) + } + err = codec.Decode(buff, resp) + if err != nil { + return fmt.Errorf("decode response: %w", err) + } + return nil + } + + err := h.p2p.StreamRequest(ctx, peer, reqBytes, cb) + if err != nil { + requestCompactErrorCounter.Inc() + return nil, fmt.Errorf("stream request: %w", err) + } + + h.tracer.OnCompactIdResponse(resp) + + return resp.Ids, nil +} + +func (h *Hare) handleProposalsStream(ctx context.Context, msg []byte, s io.ReadWriter) error { + requestCompactHandlerCounter.Inc() + compactProps := &CompactIdRequest{} + if err := codec.Decode(msg, compactProps); err != nil { + malformedError.Inc() + return fmt.Errorf("%w: decoding error %s", pubsub.ErrValidationReject, err.Error()) + } + h.tracer.OnCompactIdRequest(compactProps) + h.mu.Lock() + m, ok := h.messageCache[compactProps.MsgId] + h.mu.Unlock() + if !ok { + messageCacheMiss.Inc() + return fmt.Errorf("message %s: cache miss", compactProps.MsgId) + } + resp := &CompactIdResponse{Ids: m.Body.Value.Proposals} + respBytes := codec.MustEncode(resp) + if _, err := codec.EncodeLen(s, uint32(len(respBytes))); err != nil { + return fmt.Errorf("encode length: %w", err) + } + + if _, err := s.Write(respBytes); err != nil { + return fmt.Errorf("write response: %w", err) + } + + return nil +} + +// reconstructProposals tries to reconstruct the full list of proposals from a peer based on a delivered +// set of compact IDs. +func (h *Hare) reconstructProposals(ctx context.Context, peer p2p.Peer, msgId types.Hash32, msg *Message) error { + proposals := h.proposals.GetForLayer(msg.Layer) + if len(proposals) == 0 { + return errNoLayerProposals + } + compacted := h.compactProposals(h.compactFn, msg.Layer, proposals) + proposalIds := make([]proposalTuple, len(proposals)) + for i := range proposals { + proposalIds[i] = proposalTuple{id: proposals[i].ID(), compact: compacted[i]} + } + slices.SortFunc(proposalIds, sortProposalsTuple) + + taken := make([]bool, len(proposals)) + findProp := func(id types.CompactProposalID) (bool, types.ProposalID) { + for i := 0; i < len(proposalIds); i++ { + if id != proposalIds[i].compact { + continue + } + if taken[i] { + continue + } + // item is both not taken and equals to looked up ID + taken[i] = true + return true, proposalIds[i].id + } + return false, types.EmptyProposalID + } + + msg.Value.Proposals = make([]types.ProposalID, len(msg.Value.CompactProposals)) + ctr := 0 + + for i, compact := range msg.Value.CompactProposals { + // try to see if we can match it to the proposals we have + // if we do, add the proposal ID to the list of hashes in the proposals on the message + found, id := findProp(compact) + if !found { + // if we can't find it, we can already assume that we cannot match the whole + // set and just fail fast + return errCannotMatchProposals + } + msg.Value.Proposals[i] = id + ctr++ + } + + if ctr != len(msg.Value.CompactProposals) { + // this will force the calling context to do a fetchFull + return errCannotMatchProposals + } + + // sort the found proposals and unset the compact proposals + // field before trying to check the signature + // since it would add unnecessary data to the hasher + slices.SortFunc(msg.Value.Proposals, sortProposalIds) + msg.Value.CompactProposals = []types.CompactProposalID{} + return nil +} + func (h *Hare) Handler(ctx context.Context, peer p2p.Peer, buf []byte) error { msg := &Message{} if err := codec.Decode(buf, msg); err != nil { @@ -301,10 +455,67 @@ func (h *Hare) Handler(ctx context.Context, peer p2p.Peer, buf []byte) error { notRegisteredError.Inc() return fmt.Errorf("layer %d is not registered", msg.Layer) } + + var ( + compacts []types.CompactProposalID + msgId = msg.ToHash() + fetched = false + ) + + if msg.IterRound.Round == preround { + // this will mutate the message to conform to the (hopefully) + // original sent message for signature validation to occur + compacts = msg.Value.CompactProposals + messageCompactsCounter.Add(float64(len(compacts))) + err := h.reconstructProposals(ctx, peer, msgId, msg) + switch { + case errors.Is(err, errCannotMatchProposals): + msg.Value.Proposals, err = h.fetchFull(ctx, peer, msgId) + if err != nil { + return fmt.Errorf("fetch full: %w", err) + } + slices.SortFunc(msg.Value.Proposals, sortProposalIds) + msg.Value.CompactProposals = []types.CompactProposalID{} + fetched = true + case err != nil: + return fmt.Errorf("reconstruct proposals: %w", err) + } + } if !h.verifier.Verify(signing.HARE, msg.Sender, msg.ToMetadata().ToBytes(), msg.Signature) { - signatureError.Inc() - return fmt.Errorf("%w: invalid signature", pubsub.ErrValidationReject) + if msg.IterRound.Round == preround && !fetched { + preroundSigFailCounter.Inc() + // we might have a bad signature because of a local hash collision + // of a proposal that has the same short hash that the node sent us. + // in this case we try to ask for a full exchange of all full proposal + // ids and try to validate again + var err error + msg.Body.Value.Proposals, err = h.fetchFull(ctx, peer, msgId) + if err != nil { + return fmt.Errorf("signature verify: fetch full: %w", err) + } + if len(msg.Body.Value.Proposals) != len(compacts) { + return fmt.Errorf("signature verify: proposals mismatch: %w", err) + } + if !h.verifier.Verify(signing.HARE, msg.Sender, msg.ToMetadata().ToBytes(), msg.Signature) { + signatureError.Inc() + return fmt.Errorf("%w: signature verify: invalid signature", pubsub.ErrValidationReject) + } + } else { + signatureError.Inc() + return fmt.Errorf("%w: invalid signature", pubsub.ErrValidationReject) + } } + + if msg.IterRound.Round == preround { + h.mu.Lock() + if _, ok := h.messageCache[msgId]; !ok { + newMsg := *msg + newMsg.Body.Value.CompactProposals = compacts + h.messageCache[msgId] = newMsg + } + h.mu.Unlock() + } + malicious := h.atxsdata.IsMalicious(msg.Sender) start := time.Now() @@ -369,7 +580,7 @@ func (h *Hare) onLayer(layer types.LayerID) { beacon: beacon, signers: maps.Values(h.signers), vrfs: make([]*types.HareEligibility, len(h.signers)), - proto: newProtocol(h.config.CommitteeFor(layer)/2 + 1), + proto: newProtocol(h.config.Committee/2 + 1), } h.sessions[layer] = s.proto h.mu.Unlock() @@ -494,6 +705,23 @@ func (h *Hare) onOutput(session *session, ir IterRound, out output) error { msg.Eligibility = *vrf msg.Sender = session.signers[i].NodeID() msg.Signature = session.signers[i].Sign(signing.HARE, msg.ToMetadata().ToBytes()) + if ir.Round == preround { + var err error + msg.Body.Value.CompactProposals, err = h.compactProposalIds(h.compactFn, msg.Layer, + out.message.Body.Value.Proposals) + if err != nil { + h.log.Debug("failed to compact proposals", zap.Error(err)) + continue + } + fullProposals := msg.Body.Value.Proposals + msg.Body.Value.Proposals = []types.ProposalID{} + id := msg.ToHash() + msg.Body.Value.Proposals = fullProposals + h.mu.Lock() + h.messageCache[id] = msg + h.mu.Unlock() + msg.Body.Value.Proposals = []types.ProposalID{} + } if err := h.pubsub.Publish(h.ctx, h.config.ProtocolName, msg.ToBytes()); err != nil { h.log.Error("failed to publish", zap.Inline(&msg), zap.Error(err)) } @@ -611,14 +839,31 @@ func (h *Hare) IsKnown(layer types.LayerID, proposal types.ProposalID) bool { return h.proposals.Get(layer, proposal) != nil } +// OnProposal is a hook which gets called when we get a proposal. func (h *Hare) OnProposal(p *types.Proposal) error { return h.proposals.Add(p) } +// cleanMessageCache cleans old cached preround messages +// once the layers become irrelevant. +func (h *Hare) cleanMessageCache(l types.LayerID) { + var keys []types.Hash32 + h.mu.Lock() + defer h.mu.Unlock() + for k, item := range h.messageCache { + if item.Layer < l { + // mark key for deletion + keys = append(keys, k) + } + } + for _, v := range keys { + delete(h.messageCache, v) + } +} + func (h *Hare) Stop() { h.cancel() h.eg.Wait() - close(h.results) close(h.coins) h.log.Info("stopped") } @@ -630,3 +875,61 @@ type session struct { signers []*signing.EdSigner vrfs []*types.HareEligibility } + +type compactFunc func([]byte) []byte + +// compactFunc will truncate a given byte slice to a shorter +// byte slice by reslicing. +func compactTruncate(b []byte) []byte { + return b[:4] +} + +func compactVrf(compacter compactFunc, v types.VrfSignature) (c types.CompactProposalID) { + b := compacter(v[:]) + copy(c[:], b) + return c +} + +func (h *Hare) compactProposals(compacter compactFunc, layer types.LayerID, + proposals []*types.Proposal, +) []types.CompactProposalID { + compactProposals := make([]types.CompactProposalID, len(proposals)) + for i, prop := range proposals { + vrf := prop.EligibilityProofs[0].Sig + compactProposals[i] = compactVrf(compacter, vrf) + } + return compactProposals +} + +func (h *Hare) compactProposalIds(compacter compactFunc, layer types.LayerID, + proposals []types.ProposalID, +) ([]types.CompactProposalID, error) { + compactProposals := make([]types.CompactProposalID, len(proposals)) + for i, prop := range proposals { + fp := h.proposals.Get(layer, prop) + if fp == nil { + return nil, errCannotFindProposal + } + + // we must handle this explicitly or we risk a panic on + // a nil slice access below + if len(fp.EligibilityProofs) == 0 { + return nil, errNoEligibilityProofs + } + compactProposals[i] = compactVrf(compacter, fp.EligibilityProofs[0].Sig) + } + return compactProposals, nil +} + +type proposalTuple struct { + id types.ProposalID + compact types.CompactProposalID +} + +func sortProposalsTuple(i, j proposalTuple) int { + return sortProposalIds(i.id, j.id) +} + +func sortProposalIds(i, j types.ProposalID) int { + return bytes.Compare(i[:], j[:]) +} diff --git a/hare4/hare_test.go b/hare4/hare_test.go index dd8139ab12..b8927cf07f 100644 --- a/hare4/hare_test.go +++ b/hare4/hare_test.go @@ -1,11 +1,14 @@ -package hare3 +package hare4 import ( + "bytes" "context" "fmt" "math/rand" "os" "runtime/pprof" + "slices" + "strconv" "strings" "sync" "testing" @@ -14,17 +17,21 @@ import ( "github.com/jonboulle/clockwork" "github.com/stretchr/testify/require" "go.uber.org/mock/gomock" + "go.uber.org/zap" "go.uber.org/zap/zapcore" "go.uber.org/zap/zaptest" "github.com/spacemeshos/go-spacemesh/atxsdata" "github.com/spacemeshos/go-spacemesh/codec" "github.com/spacemeshos/go-spacemesh/common/types" - "github.com/spacemeshos/go-spacemesh/hare3/eligibility" + "github.com/spacemeshos/go-spacemesh/hare4/eligibility" + hmock "github.com/spacemeshos/go-spacemesh/hare4/mocks" "github.com/spacemeshos/go-spacemesh/layerpatrol" "github.com/spacemeshos/go-spacemesh/log/logtest" + "github.com/spacemeshos/go-spacemesh/p2p" "github.com/spacemeshos/go-spacemesh/p2p/pubsub" pmocks "github.com/spacemeshos/go-spacemesh/p2p/pubsub/mocks" + "github.com/spacemeshos/go-spacemesh/p2p/server" "github.com/spacemeshos/go-spacemesh/proposals/store" "github.com/spacemeshos/go-spacemesh/signing" "github.com/spacemeshos/go-spacemesh/sql" @@ -37,6 +44,8 @@ import ( const layersPerEpoch = 4 +var wait = 10 * time.Second + func TestMain(m *testing.M) { types.SetLayersPerEpoch(layersPerEpoch) res := m.Run() @@ -121,12 +130,14 @@ type node struct { atxsdata *atxsdata.Data proposals *store.Store - ctrl *gomock.Controller - mpublisher *pmocks.MockPublishSubsciber - msyncer *smocks.MockSyncStateProvider - patrol *layerpatrol.LayerPatrol - tracer *testTracer - hare *Hare + ctrl *gomock.Controller + mpublisher *pmocks.MockPublishSubsciber + msyncer *smocks.MockSyncStateProvider + mverifier *hmock.Mockverifier + mockStreamRequester *hmock.MockstreamRequester + patrol *layerpatrol.LayerPatrol + tracer *testTracer + hare *Hare } func (n *node) withClock() *node { @@ -188,6 +199,11 @@ func (n *node) withSyncer() *node { return n } +func (n *node) withVerifier() *node { + n.mverifier = hmock.NewMockverifier(n.ctrl) + return n +} + func (n *node) withOracle() *node { beaconget := smocks.NewMockBeaconGetter(n.ctrl) beaconget.EXPECT().GetBeacon(gomock.Any()).DoAndReturn(func(epoch types.EpochID) (types.Beacon, error) { @@ -209,9 +225,13 @@ func (n *node) withPublisher() *node { return n } +func (n *node) withStreamRequester() *node { + n.mockStreamRequester = hmock.NewMockstreamRequester(n.ctrl) + return n +} + func (n *node) withHare() *node { logger := logtest.New(n.t).Named(fmt.Sprintf("hare=%d", n.i)) - n.nclock = &testNodeClock{ genesis: n.t.start, layerDuration: n.t.layerDuration, @@ -219,20 +239,30 @@ func (n *node) withHare() *node { tracer := newTestTracer(n.t) n.tracer = tracer n.patrol = layerpatrol.New() + var verify verifier + if n.mverifier != nil { + verify = n.mverifier + } else { + verify = signing.NewEdVerifier() + } + z, _ := zap.NewDevelopment() n.hare = New( n.nclock, n.mpublisher, n.db, n.atxsdata, n.proposals, - signing.NewEdVerifier(), + verify, n.oracle, n.msyncer, n.patrol, + nil, WithConfig(n.t.cfg), WithLogger(logger.Zap()), WithWallclock(n.clock), WithTracer(tracer), + WithServer(n.mockStreamRequester), + WithLogger(z), ) n.register(n.signer) return n @@ -259,6 +289,10 @@ func (n *node) storeAtx(atx *types.ActivationTx) error { return nil } +func (n *node) peerId() p2p.Peer { + return p2p.Peer(strconv.Itoa(n.i)) +} + type clusterOpt func(*lockstepCluster) func withUnits(min, max int) clusterOpt { @@ -268,6 +302,18 @@ func withUnits(min, max int) clusterOpt { } } +func withMockVerifier() clusterOpt { + return func(cluster *lockstepCluster) { + cluster.mockVerify = true + } +} + +func withMockCompactFn(f func([]byte) []byte) clusterOpt { + return func(cluster *lockstepCluster) { + cluster.mockCompactFn = f + } +} + func withProposals(fraction float64) clusterOpt { return func(cluster *lockstepCluster) { cluster.proposals.fraction = fraction @@ -284,6 +330,7 @@ func withSigners(n int) clusterOpt { } func newLockstepCluster(t *tester, opts ...clusterOpt) *lockstepCluster { + t.Helper() cluster := &lockstepCluster{t: t} cluster.units.min = 10 cluster.units.max = 10 @@ -302,7 +349,9 @@ type lockstepCluster struct { nodes []*node signers []*node // nodes that active on consensus but don't run hare instance - units struct { + mockVerify bool + mockCompactFn func([]byte) []byte + units struct { min, max int } proposals struct { @@ -340,10 +389,17 @@ func (cl *lockstepCluster) addSigner(n int) *lockstepCluster { func (cl *lockstepCluster) addActive(n int) *lockstepCluster { last := len(cl.nodes) for i := last; i < last+n; i++ { - cl.addNode((&node{t: cl.t, i: i}). + nn := (&node{t: cl.t, i: i}). withController().withSyncer().withPublisher(). withClock().withDb().withSigner().withAtx(cl.units.min, cl.units.max). - withOracle().withHare()) + withStreamRequester().withOracle().withHare() + if cl.mockVerify { + nn = nn.withVerifier() + } + if cl.mockCompactFn != nil { + nn.hare.compactFn = cl.mockCompactFn + } + cl.addNode(nn) } return cl } @@ -354,7 +410,7 @@ func (cl *lockstepCluster) addInactive(n int) *lockstepCluster { cl.addNode((&node{t: cl.t, i: i}). withController().withSyncer().withPublisher(). withClock().withDb().withSigner(). - withOracle().withHare()) + withStreamRequester().withOracle().withHare()) } return cl } @@ -367,7 +423,7 @@ func (cl *lockstepCluster) addEquivocators(n int) *lockstepCluster { reuseSigner(cl.nodes[i-last].signer). withController().withSyncer().withPublisher(). withClock().withDb().withAtx(cl.units.min, cl.units.max). - withOracle().withHare()) + withStreamRequester().withOracle().withHare()) } return cl } @@ -395,11 +451,40 @@ func (cl *lockstepCluster) activeSet() types.ATXIDList { return ids } -func (cl *lockstepCluster) genProposals(lid types.LayerID) { +func (cl *lockstepCluster) genProposalNode(lid types.LayerID, node int) { + active := cl.activeSet() + n := cl.nodes[node] + if n.atx == nil { + panic("shouldnt happen") + } + proposal := &types.Proposal{} + proposal.Layer = lid + proposal.EpochData = &types.EpochData{ + Beacon: cl.t.beacon, + ActiveSetHash: active.Hash(), + } + proposal.AtxID = n.atx.ID() + proposal.SmesherID = n.signer.NodeID() + id := types.ProposalID{} + cl.t.rng.Read(id[:]) + bid := types.BallotID{} + cl.t.rng.Read(bid[:]) + proposal.SetID(id) + proposal.Ballot.SetID(bid) + var vrf types.VrfSignature + cl.t.rng.Read(vrf[:]) + proposal.Ballot.EligibilityProofs = append(proposal.Ballot.EligibilityProofs, types.VotingEligibility{Sig: vrf}) + + proposal.SetBeacon(proposal.EpochData.Beacon) + require.NoError(cl.t, ballots.Add(n.db, &proposal.Ballot)) + n.hare.OnProposal(proposal) +} + +func (cl *lockstepCluster) genProposals(lid types.LayerID, skipNodes ...int) { active := cl.activeSet() all := []*types.Proposal{} - for _, n := range append(cl.nodes, cl.signers...) { - if n.atx == nil { + for i, n := range append(cl.nodes, cl.signers...) { + if n.atx == nil || slices.Contains(skipNodes, i) { continue } proposal := &types.Proposal{} @@ -416,6 +501,10 @@ func (cl *lockstepCluster) genProposals(lid types.LayerID) { cl.t.rng.Read(bid[:]) proposal.SetID(id) proposal.Ballot.SetID(bid) + var vrf types.VrfSignature + cl.t.rng.Read(vrf[:]) + proposal.Ballot.EligibilityProofs = append(proposal.Ballot.EligibilityProofs, types.VotingEligibility{Sig: vrf}) + proposal.SetBeacon(proposal.EpochData.Beacon) all = append(all, proposal) } @@ -449,11 +538,24 @@ func (cl *lockstepCluster) setup() { Publish(gomock.Any(), gomock.Any(), gomock.Any()). Do(func(ctx context.Context, _ string, msg []byte) error { for _, other := range cl.nodes { - other.hare.Handler(ctx, "self", msg) + other.hare.Handler(ctx, n.peerId(), msg) } return nil }). AnyTimes() + n.mockStreamRequester.EXPECT().StreamRequest(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Do( + func(ctx context.Context, p p2p.Peer, msg []byte, cb server.StreamRequestCallback, _ ...string) error { + for _, other := range cl.nodes { + if other.peerId() == p { + b := make([]byte, 0, 1024) + buf := bytes.NewBuffer(b) + other.hare.handleProposalsStream(ctx, msg, buf) + cb(ctx, buf) + } + } + return nil + }, + ).AnyTimes() } } @@ -492,12 +594,29 @@ func (cl *lockstepCluster) waitStopped() { } } +// drainInteractiveMessages will make sure that the channels that signal +// that interactive messages came in on the tracer are read from. +func (cl *lockstepCluster) drainInteractiveMessages() { + for _, n := range cl.nodes { + go func() { + for { + select { + case <-n.tracer.compactReq: + case <-n.tracer.compactResp: + } + } + }() + } +} + func newTestTracer(tb testing.TB) *testTracer { return &testTracer{ TB: tb, stopped: make(chan types.LayerID, 100), eligibility: make(chan []*types.HareEligibility), sent: make(chan *Message), + compactReq: make(chan struct{}), + compactResp: make(chan struct{}), } } @@ -506,6 +625,8 @@ type testTracer struct { stopped chan types.LayerID eligibility chan []*types.HareEligibility sent chan *Message + compactReq chan struct{} + compactResp chan struct{} } func waitForChan[T any](t testing.TB, ch <-chan T, timeout time.Duration, failureMsg string) T { @@ -531,15 +652,15 @@ func sendWithTimeout[T any](t testing.TB, value T, ch chan<- T, timeout time.Dur } func (t *testTracer) waitStopped() types.LayerID { - return waitForChan(t.TB, t.stopped, 10*time.Second, "didn't stop") + return waitForChan(t.TB, t.stopped, wait, "didn't stop") } func (t *testTracer) waitEligibility() []*types.HareEligibility { - return waitForChan(t.TB, t.eligibility, 10*time.Second, "no eligibility") + return waitForChan(t.TB, t.eligibility, wait, "no eligibility") } func (t *testTracer) waitSent() *Message { - return waitForChan(t.TB, t.sent, 10*time.Second, "no message") + return waitForChan(t.TB, t.sent, wait, "no message") } func (*testTracer) OnStart(types.LayerID) {} @@ -552,16 +673,25 @@ func (t *testTracer) OnStop(lid types.LayerID) { } func (t *testTracer) OnActive(el []*types.HareEligibility) { - sendWithTimeout(t.TB, el, t.eligibility, 10*time.Second, "eligibility can't be sent") + sendWithTimeout(t.TB, el, t.eligibility, wait, "eligibility can't be sent") } func (t *testTracer) OnMessageSent(m *Message) { - sendWithTimeout(t.TB, m, t.sent, 10*time.Second, "message can't be sent") + sendWithTimeout(t.TB, m, t.sent, wait, "message can't be sent") } func (*testTracer) OnMessageReceived(*Message) {} +func (t *testTracer) OnCompactIdRequest(*CompactIdRequest) { + sendWithTimeout(t.TB, struct{}{}, t.compactReq, wait, "compact req can't be sent") +} + +func (t *testTracer) OnCompactIdResponse(*CompactIdResponse) { + sendWithTimeout(t.TB, struct{}{}, t.compactResp, wait, "compact resp can't be sent") +} + func testHare(t *testing.T, active, inactive, equivocators int, opts ...clusterOpt) { + t.Helper() cfg := DefaultConfig() cfg.LogStats = true tst := &tester{ @@ -581,6 +711,7 @@ func testHare(t *testing.T, active, inactive, equivocators int, opts ...clusterO cluster = cluster.addSigner(cluster.signersCount) cluster.partitionSigners() } + cluster.drainInteractiveMessages() layer := tst.genesis + 1 cluster.setup() @@ -707,6 +838,7 @@ func TestHandler(t *testing.T) { }) t.Run("invalid signature", func(t *testing.T) { msg := &Message{} + msg.Body.IterRound.Round = propose msg.Layer = layer msg.Sender = n.signer.NodeID() msg.Signature = n.signer.Sign(signing.HARE+1, msg.ToMetadata().ToBytes()) @@ -719,6 +851,7 @@ func TestHandler(t *testing.T) { signer, err := signing.NewEdSigner() require.NoError(t, err) msg := &Message{} + msg.Body.IterRound.Round = propose msg.Layer = layer msg.Sender = signer.NodeID() msg.Signature = signer.Sign(signing.HARE, msg.ToMetadata().ToBytes()) @@ -726,19 +859,57 @@ func TestHandler(t *testing.T) { "zero grade") }) t.Run("equivocation", func(t *testing.T) { + b := types.RandomBallot() + b.InnerBallot.Layer = layer + b.Layer = layer + p1 := &types.Proposal{ + InnerProposal: types.InnerProposal{ + Ballot: *b, + TxIDs: []types.TransactionID{types.RandomTransactionID(), types.RandomTransactionID()}, + }, + } + b2 := types.RandomBallot() + + b2.InnerBallot.Layer = layer + b.Layer = layer + p2 := &types.Proposal{ + InnerProposal: types.InnerProposal{ + Ballot: *b2, + TxIDs: []types.TransactionID{types.RandomTransactionID(), types.RandomTransactionID()}, + }, + } + + p1.Initialize() + p2.Initialize() + + if err := n.hare.OnProposal(p1); err != nil { + panic(err) + } + + if err := n.hare.OnProposal(p2); err != nil { + panic(err) + } msg1 := &Message{} msg1.Layer = layer - msg1.Value.Proposals = []types.ProposalID{{1}} + msg1.Value.Proposals = []types.ProposalID{p1.ID()} msg1.Eligibility = *elig msg1.Sender = n.signer.NodeID() msg1.Signature = n.signer.Sign(signing.HARE, msg1.ToMetadata().ToBytes()) + msg1.Value.Proposals = nil + msg1.Value.CompactProposals = []types.CompactProposalID{ + compactVrf(compactTruncate, p1.Ballot.EligibilityProofs[0].Sig), + } msg2 := &Message{} msg2.Layer = layer - msg2.Value.Proposals = []types.ProposalID{{2}} + msg2.Value.Proposals = []types.ProposalID{p2.ID()} msg2.Eligibility = *elig msg2.Sender = n.signer.NodeID() msg2.Signature = n.signer.Sign(signing.HARE, msg2.ToMetadata().ToBytes()) + msg2.Value.Proposals = nil + msg2.Value.CompactProposals = []types.CompactProposalID{ + compactVrf(compactTruncate, p2.Ballot.EligibilityProofs[0].Sig), + } require.NoError(t, n.hare.Handler(context.Background(), "", codec.MustEncode(msg1))) require.NoError(t, n.hare.Handler(context.Background(), "", codec.MustEncode(msg2))) @@ -901,10 +1072,11 @@ func TestProposals(t *testing.T) { db, atxsdata, proposals, - nil, + signing.NewEdVerifier(), nil, nil, layerpatrol.New(), + nil, WithLogger(zaptest.NewLogger(t)), ) for _, atx := range tc.atxs { @@ -912,7 +1084,9 @@ func TestProposals(t *testing.T) { atxsdata.AddFromAtx(&atx, false) } for _, proposal := range tc.proposals { - proposals.Add(proposal) + if err := proposals.Add(proposal); err != nil { + panic(err) + } } for _, id := range tc.malicious { require.NoError(t, identities.SetMalicious(db, id, []byte("non empty"), time.Time{})) @@ -930,7 +1104,7 @@ func TestProposals(t *testing.T) { func TestHare_AddProposal(t *testing.T) { t.Parallel() proposals := store.New() - hare := New(nil, nil, nil, nil, proposals, nil, nil, nil, nil) + hare := New(nil, nil, nil, nil, proposals, nil, nil, nil, nil, nil) p := gproposal( types.RandomProposalID(), @@ -947,26 +1121,281 @@ func TestHare_AddProposal(t *testing.T) { require.ErrorIs(t, hare.OnProposal(p), store.ErrProposalExists) } -func TestHareConfig_CommitteeUpgrade(t *testing.T) { - t.Parallel() - t.Run("no upgrade", func(t *testing.T) { - cfg := Config{ - Committee: 400, +func TestProposalIDSort(t *testing.T) { + var ( + a = types.ProposalID{0, 3, 2, 3, 5} + b = types.ProposalID{0, 1, 2, 3, 4} + c = types.ProposalID{11, 4, 6, 254, 0} + d = types.ProposalID{0, 1, 2, 3, 5} + ) + srt := []types.ProposalID{c, b, a, d} + slices.SortFunc(srt, sortProposalIds) + require.Equal(t, []types.ProposalID{b, d, a, c}, srt) +} + +// TestHare_ReconstructForward tests that a message +// could be reconstructed on a downstream peer that +// receives a gossipsub message from a forwarding node +// without needing a direct connection to the original sender. +func TestHare_ReconstructForward(t *testing.T) { + cfg := DefaultConfig() + cfg.LogStats = true + tst := &tester{ + TB: t, + rng: rand.New(rand.NewSource(1001)), + start: time.Now(), + cfg: cfg, + layerDuration: 5 * time.Minute, + beacon: types.Beacon{1, 1, 1, 1}, + genesis: types.GetEffectiveGenesis(), + } + cluster := newLockstepCluster(tst). + addActive(3) + if cluster.signersCount > 0 { + cluster = cluster.addSigner(cluster.signersCount) + cluster.partitionSigners() + } + cluster.drainInteractiveMessages() + layer := tst.genesis + 1 + + // cluster setup + active := cluster.activeSet() + for i, n := range cluster.nodes { + require.NoError(cluster.t, beacons.Add(n.db, cluster.t.genesis.GetEpoch()+1, cluster.t.beacon)) + for _, other := range append(cluster.nodes, cluster.signers...) { + if other.atx == nil { + continue + } + require.NoError(cluster.t, n.storeAtx(other.atx)) } - require.Equal(t, cfg.Committee, cfg.CommitteeFor(0)) - require.Equal(t, cfg.Committee, cfg.CommitteeFor(100)) - }) - t.Run("upgrade", func(t *testing.T) { - cfg := Config{ - Committee: 400, - CommitteeUpgrade: &CommitteeUpgrade{ - Layer: 16, - Size: 50, + n.oracle.UpdateActiveSet(cluster.t.genesis.GetEpoch()+1, active) + n.mpublisher.EXPECT(). + Publish(gomock.Any(), gomock.Any(), gomock.Any()). + Do(func(ctx context.Context, proto string, msg []byte) error { + // here we wanna call the handler on the second node + // but then call the handler on the third with the incoming peer id + // of the second node, this way we know the peers could resolve between + // themselves without having the connection to the original sender + // 1st publish call is for the preround, so we will hijack that and + // leave the rest to broadcast + m := &Message{} + codec.MustDecode(msg, m) + if m.Body.IterRound.Round == preround { + other := [2]int{0, 0} + switch i { + case 0: + other[0] = 1 + other[1] = 2 + case 1: + other[0] = 0 + other[1] = 2 + case 2: + other[0] = 1 + other[1] = 0 + default: + panic("bad") + } + if err := cluster.nodes[other[0]].hare. + Handler(ctx, cluster.nodes[i].peerId(), msg); err != nil { + panic(err) + } + if err := cluster.nodes[other[1]].hare. + Handler(ctx, cluster.nodes[other[0]].peerId(), msg); err != nil { + panic(err) + } + return nil + } + + for _, other := range cluster.nodes { + if err := other.hare.Handler(ctx, n.peerId(), msg); err != nil { + panic(err) + } + } + return nil + }). + AnyTimes() + n.mockStreamRequester.EXPECT().StreamRequest(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Do( + func(ctx context.Context, p p2p.Peer, msg []byte, cb server.StreamRequestCallback, _ ...string) error { + for _, other := range cluster.nodes { + if other.peerId() == p { + b := make([]byte, 0, 1024) + buf := bytes.NewBuffer(b) + if err := other.hare.handleProposalsStream(ctx, msg, buf); err != nil { + return fmt.Errorf("exec handleProposalStream: %w", err) + } + if err := cb(ctx, buf); err != nil { + return fmt.Errorf("exec callback: %w", err) + } + } + } + return nil }, + ).AnyTimes() + } + + cluster.genProposals(layer, 2) + cluster.genProposalNode(layer, 2) + cluster.movePreround(layer) + for i := 0; i < 2*int(notify); i++ { + cluster.moveRound() + } + var consistent []types.ProposalID + cluster.waitStopped() + for _, n := range cluster.nodes { + select { + case coin := <-n.hare.Coins(): + require.Equal(t, coin.Layer, layer) + default: + require.FailNow(t, "no coin") } - require.EqualValues(t, cfg.Committee, cfg.CommitteeFor(0)) - require.EqualValues(t, cfg.Committee, cfg.CommitteeFor(15)) - require.EqualValues(t, 50, cfg.CommitteeFor(16)) - require.EqualValues(t, 50, cfg.CommitteeFor(100)) - }) + select { + case rst := <-n.hare.Results(): + require.Equal(t, rst.Layer, layer) + require.NotEmpty(t, rst.Proposals) + if consistent == nil { + consistent = rst.Proposals + } else { + require.Equal(t, consistent, rst.Proposals) + } + default: + require.FailNow(t, "no result") + } + require.Empty(t, n.hare.Running()) + } +} + +// TestHare_ReconstructAll tests that the nodes go into a +// full message exchange in the case that a signature fails +// although all compact hashes and proposals match. +func TestHare_ReconstructAll(t *testing.T) { + cfg := DefaultConfig() + cfg.LogStats = true + tst := &tester{ + TB: t, + rng: rand.New(rand.NewSource(1001)), + start: time.Now(), + cfg: cfg, + layerDuration: 5 * time.Minute, + beacon: types.Beacon{1, 1, 1, 1}, + genesis: types.GetEffectiveGenesis(), + } + cluster := newLockstepCluster(tst, withMockVerifier()). + addActive(3) + if cluster.signersCount > 0 { + cluster = cluster.addSigner(cluster.signersCount) + cluster.partitionSigners() + } + layer := tst.genesis + 1 + + cluster.drainInteractiveMessages() + // cluster setup + calls := [3]int{} + for i, n := range cluster.nodes { + n.mverifier.EXPECT().Verify(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()). + DoAndReturn(func(_ signing.Domain, _ types.NodeID, _ []byte, _ types.EdSignature) bool { + calls[i] = calls[i] + 1 + // when first call return false, other + return !(calls[i] == 1) + }).AnyTimes() + } + cluster.setup() + cluster.genProposals(layer) + cluster.movePreround(layer) + for i := 0; i < 2*int(notify); i++ { + cluster.moveRound() + } + var consistent []types.ProposalID + cluster.waitStopped() + for _, n := range cluster.nodes { + select { + case coin := <-n.hare.Coins(): + require.Equal(t, coin.Layer, layer) + default: + require.FailNow(t, "no coin") + } + select { + case rst := <-n.hare.Results(): + require.Equal(t, rst.Layer, layer) + require.NotEmpty(t, rst.Proposals) + if consistent == nil { + consistent = rst.Proposals + } else { + require.Equal(t, consistent, rst.Proposals) + } + default: + t.Fatal("no result") + } + require.Empty(t, n.hare.Running()) + } +} + +// TestHare_ReconstructCollision tests that the nodes go into a +// full message exchange in the case that there's a siphash collision. +func TestHare_ReconstructCollision(t *testing.T) { + cfg := DefaultConfig() + cfg.LogStats = true + tst := &tester{ + TB: t, + rng: rand.New(rand.NewSource(1000)), + start: time.Now(), + cfg: cfg, + layerDuration: 5 * time.Minute, + beacon: types.Beacon{1, 1, 1, 1}, + genesis: types.GetEffectiveGenesis(), + } + + fn := func(_ []byte) []byte { + return []byte{0xab, 0xab, 0xab, 0xab} + } + cluster := newLockstepCluster(tst, withMockCompactFn(fn), withProposals(1)). + addActive(2) + if cluster.signersCount > 0 { + cluster = cluster.addSigner(cluster.signersCount) + cluster.partitionSigners() + } + layer := tst.genesis + 1 + + // scenario: + // node 1 has generated 1 proposal that (mocked) hash into 0xab as prefix - both nodes know the proposal + // node 2 has generated 1 proposal that hash into 0xab (but node 1 doesn't know about it) + // so the two proposals collide and then we check that the nodes actually go into a round of + // exchanging the missing/colliding hashes and then the signature verification (not mocked) + // should pass and that a full exchange of all hashes is not triggered (disambiguates case of + // failed signature vs. hashes colliding - there's a difference in number of single prefixes + // that are sent, but the response should be the same) + + go func() { <-cluster.nodes[1].tracer.compactReq }() // node 2 gets the request + go func() { <-cluster.nodes[0].tracer.compactResp }() // node 1 gets the response + + cluster.setup() + + cluster.genProposals(layer, 1) + cluster.genProposalNode(layer, 1) + cluster.movePreround(layer) + for i := 0; i < 2*int(notify); i++ { + cluster.moveRound() + } + var consistent []types.ProposalID + cluster.waitStopped() + for _, n := range cluster.nodes { + select { + case coin := <-n.hare.Coins(): + require.Equal(t, coin.Layer, layer) + default: + require.FailNow(t, "no coin") + } + select { + case rst := <-n.hare.Results(): + require.Equal(t, rst.Layer, layer) + require.NotEmpty(t, rst.Proposals) + if consistent == nil { + consistent = rst.Proposals + } else { + require.Equal(t, consistent, rst.Proposals) + } + default: + t.Fatal("no result") + } + require.Empty(t, n.hare.Running()) + } } diff --git a/hare4/interface.go b/hare4/interface.go new file mode 100644 index 0000000000..1401065dfa --- /dev/null +++ b/hare4/interface.go @@ -0,0 +1,20 @@ +package hare4 + +import ( + "context" + + "github.com/spacemeshos/go-spacemesh/common/types" + "github.com/spacemeshos/go-spacemesh/p2p" + "github.com/spacemeshos/go-spacemesh/p2p/server" + "github.com/spacemeshos/go-spacemesh/signing" +) + +//go:generate mockgen -typed -package=mocks -destination=./mocks/mocks.go -source=./interface.go + +type streamRequester interface { + StreamRequest(context.Context, p2p.Peer, []byte, server.StreamRequestCallback, ...string) error +} + +type verifier interface { + Verify(signing.Domain, types.NodeID, []byte, types.EdSignature) bool +} diff --git a/hare4/legacy_oracle.go b/hare4/legacy_oracle.go index 07be7f77a4..dbb263bdb4 100644 --- a/hare4/legacy_oracle.go +++ b/hare4/legacy_oracle.go @@ -1,4 +1,4 @@ -package hare3 +package hare4 import ( "context" @@ -26,7 +26,7 @@ func (lg *legacyOracle) validate(msg *Message) grade { if msg.Eligibility.Count == 0 { return grade0 } - committee := int(lg.config.CommitteeFor(msg.Layer)) + committee := int(lg.config.Committee) if msg.Round == propose { committee = int(lg.config.Leaders) } @@ -50,7 +50,7 @@ func (lg *legacyOracle) active( ir IterRound, ) *types.HareEligibility { vrf := eligibility.GenVRF(context.Background(), signer.VRFSigner(), beacon, layer, ir.Absolute()) - committee := int(lg.config.CommitteeFor(layer)) + committee := int(lg.config.Committee) if ir.Round == propose { committee = int(lg.config.Leaders) } diff --git a/hare4/malfeasance.go b/hare4/malfeasance.go index e0037d7ef3..52a40dfefc 100644 --- a/hare4/malfeasance.go +++ b/hare4/malfeasance.go @@ -1,4 +1,4 @@ -package hare3 +package hare4 import ( "context" diff --git a/hare4/malfeasance_test.go b/hare4/malfeasance_test.go index 0f2a0f1491..2bdaa119af 100644 --- a/hare4/malfeasance_test.go +++ b/hare4/malfeasance_test.go @@ -1,4 +1,4 @@ -package hare3 +package hare4 import ( "context" diff --git a/hare4/metrics.go b/hare4/metrics.go index e4182c7e49..14f496b938 100644 --- a/hare4/metrics.go +++ b/hare4/metrics.go @@ -1,4 +1,4 @@ -package hare3 +package hare4 import ( "github.com/prometheus/client_golang/prometheus" @@ -6,7 +6,7 @@ import ( "github.com/spacemeshos/go-spacemesh/metrics" ) -const namespace = "hare" +const namespace = "hare4" // todo change this back to `hare` var ( processCounter = metrics.NewCounter( @@ -61,6 +61,36 @@ var ( []string{"step"}, prometheus.ExponentialBuckets(0.01, 2, 10), ) - proposalsLatency = protocolLatency.WithLabelValues("proposals") - activeLatency = protocolLatency.WithLabelValues("active") + proposalsLatency = protocolLatency.WithLabelValues("proposals") + activeLatency = protocolLatency.WithLabelValues("active") + requestCompactCounter = prometheus.NewCounter(metrics.NewCounterOpts( + namespace, + "request_compact_count", + "number of times we needed to go into a clarifying round", + )) + requestCompactErrorCounter = prometheus.NewCounter(metrics.NewCounterOpts( + namespace, + "request_compact_error_count", + "number of errors got when requesting compact proposals from peer", + )) + requestCompactHandlerCounter = prometheus.NewCounter(metrics.NewCounterOpts( + namespace, + "request_compact_handler_count", + "number of requests handled on the compact stream handler", + )) + messageCacheMiss = prometheus.NewCounter(metrics.NewCounterOpts( + namespace, + "message_cache_miss", + "number of message cache misses", + )) + messageCompactsCounter = prometheus.NewCounter(metrics.NewCounterOpts( + namespace, + "message_compacts_count", + "number of compact proposals that arrived to be checked in a message", + )) + preroundSigFailCounter = prometheus.NewCounter(metrics.NewCounterOpts( + namespace, + "preround_signature_fail_count", + "counter for signature fails on preround with compact message", + )) ) diff --git a/hare4/mocks/mocks.go b/hare4/mocks/mocks.go new file mode 100644 index 0000000000..f3e813e77b --- /dev/null +++ b/hare4/mocks/mocks.go @@ -0,0 +1,148 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: ./interface.go +// +// Generated by this command: +// +// mockgen -typed -package=mocks -destination=./mocks/mocks.go -source=./interface.go +// + +// Package mocks is a generated GoMock package. +package mocks + +import ( + context "context" + reflect "reflect" + + types "github.com/spacemeshos/go-spacemesh/common/types" + p2p "github.com/spacemeshos/go-spacemesh/p2p" + server "github.com/spacemeshos/go-spacemesh/p2p/server" + signing "github.com/spacemeshos/go-spacemesh/signing" + gomock "go.uber.org/mock/gomock" +) + +// MockstreamRequester is a mock of streamRequester interface. +type MockstreamRequester struct { + ctrl *gomock.Controller + recorder *MockstreamRequesterMockRecorder +} + +// MockstreamRequesterMockRecorder is the mock recorder for MockstreamRequester. +type MockstreamRequesterMockRecorder struct { + mock *MockstreamRequester +} + +// NewMockstreamRequester creates a new mock instance. +func NewMockstreamRequester(ctrl *gomock.Controller) *MockstreamRequester { + mock := &MockstreamRequester{ctrl: ctrl} + mock.recorder = &MockstreamRequesterMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockstreamRequester) EXPECT() *MockstreamRequesterMockRecorder { + return m.recorder +} + +// StreamRequest mocks base method. +func (m *MockstreamRequester) StreamRequest(arg0 context.Context, arg1 p2p.Peer, arg2 []byte, arg3 server.StreamRequestCallback, arg4 ...string) error { + m.ctrl.T.Helper() + varargs := []any{arg0, arg1, arg2, arg3} + for _, a := range arg4 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "StreamRequest", varargs...) + ret0, _ := ret[0].(error) + return ret0 +} + +// StreamRequest indicates an expected call of StreamRequest. +func (mr *MockstreamRequesterMockRecorder) StreamRequest(arg0, arg1, arg2, arg3 any, arg4 ...any) *MockstreamRequesterStreamRequestCall { + mr.mock.ctrl.T.Helper() + varargs := append([]any{arg0, arg1, arg2, arg3}, arg4...) + call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "StreamRequest", reflect.TypeOf((*MockstreamRequester)(nil).StreamRequest), varargs...) + return &MockstreamRequesterStreamRequestCall{Call: call} +} + +// MockstreamRequesterStreamRequestCall wrap *gomock.Call +type MockstreamRequesterStreamRequestCall struct { + *gomock.Call +} + +// Return rewrite *gomock.Call.Return +func (c *MockstreamRequesterStreamRequestCall) Return(arg0 error) *MockstreamRequesterStreamRequestCall { + c.Call = c.Call.Return(arg0) + return c +} + +// Do rewrite *gomock.Call.Do +func (c *MockstreamRequesterStreamRequestCall) Do(f func(context.Context, p2p.Peer, []byte, server.StreamRequestCallback, ...string) error) *MockstreamRequesterStreamRequestCall { + c.Call = c.Call.Do(f) + return c +} + +// DoAndReturn rewrite *gomock.Call.DoAndReturn +func (c *MockstreamRequesterStreamRequestCall) DoAndReturn(f func(context.Context, p2p.Peer, []byte, server.StreamRequestCallback, ...string) error) *MockstreamRequesterStreamRequestCall { + c.Call = c.Call.DoAndReturn(f) + return c +} + +// Mockverifier is a mock of verifier interface. +type Mockverifier struct { + ctrl *gomock.Controller + recorder *MockverifierMockRecorder +} + +// MockverifierMockRecorder is the mock recorder for Mockverifier. +type MockverifierMockRecorder struct { + mock *Mockverifier +} + +// NewMockverifier creates a new mock instance. +func NewMockverifier(ctrl *gomock.Controller) *Mockverifier { + mock := &Mockverifier{ctrl: ctrl} + mock.recorder = &MockverifierMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *Mockverifier) EXPECT() *MockverifierMockRecorder { + return m.recorder +} + +// Verify mocks base method. +func (m *Mockverifier) Verify(arg0 signing.Domain, arg1 types.NodeID, arg2 []byte, arg3 types.EdSignature) bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Verify", arg0, arg1, arg2, arg3) + ret0, _ := ret[0].(bool) + return ret0 +} + +// Verify indicates an expected call of Verify. +func (mr *MockverifierMockRecorder) Verify(arg0, arg1, arg2, arg3 any) *MockverifierVerifyCall { + mr.mock.ctrl.T.Helper() + call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Verify", reflect.TypeOf((*Mockverifier)(nil).Verify), arg0, arg1, arg2, arg3) + return &MockverifierVerifyCall{Call: call} +} + +// MockverifierVerifyCall wrap *gomock.Call +type MockverifierVerifyCall struct { + *gomock.Call +} + +// Return rewrite *gomock.Call.Return +func (c *MockverifierVerifyCall) Return(arg0 bool) *MockverifierVerifyCall { + c.Call = c.Call.Return(arg0) + return c +} + +// Do rewrite *gomock.Call.Do +func (c *MockverifierVerifyCall) Do(f func(signing.Domain, types.NodeID, []byte, types.EdSignature) bool) *MockverifierVerifyCall { + c.Call = c.Call.Do(f) + return c +} + +// DoAndReturn rewrite *gomock.Call.DoAndReturn +func (c *MockverifierVerifyCall) DoAndReturn(f func(signing.Domain, types.NodeID, []byte, types.EdSignature) bool) *MockverifierVerifyCall { + c.Call = c.Call.DoAndReturn(f) + return c +} diff --git a/hare4/protocol.go b/hare4/protocol.go index be89eeb7bc..5e196a831d 100644 --- a/hare4/protocol.go +++ b/hare4/protocol.go @@ -1,4 +1,4 @@ -package hare3 +package hare4 import ( "bytes" @@ -111,6 +111,7 @@ type protocol struct { } func (p *protocol) OnInitial(proposals []types.ProposalID) { + slices.SortFunc(proposals, sortProposalIds) p.mu.Lock() defer p.mu.Unlock() p.initial = proposals diff --git a/hare4/protocol_test.go b/hare4/protocol_test.go index 611dec4470..c04e69fdcf 100644 --- a/hare4/protocol_test.go +++ b/hare4/protocol_test.go @@ -1,4 +1,4 @@ -package hare3 +package hare4 import ( "testing" diff --git a/hare4/tracer.go b/hare4/tracer.go index 861c5f9e9a..0b4a86f114 100644 --- a/hare4/tracer.go +++ b/hare4/tracer.go @@ -1,4 +1,4 @@ -package hare3 +package hare4 import "github.com/spacemeshos/go-spacemesh/common/types" @@ -8,6 +8,8 @@ type Tracer interface { OnActive([]*types.HareEligibility) OnMessageSent(*Message) OnMessageReceived(*Message) + OnCompactIdRequest(*CompactIdRequest) + OnCompactIdResponse(*CompactIdResponse) } var _ Tracer = noopTracer{} @@ -23,3 +25,7 @@ func (noopTracer) OnActive([]*types.HareEligibility) {} func (noopTracer) OnMessageSent(*Message) {} func (noopTracer) OnMessageReceived(*Message) {} + +func (noopTracer) OnCompactIdRequest(*CompactIdRequest) {} + +func (noopTracer) OnCompactIdResponse(*CompactIdResponse) {} diff --git a/hare4/types.go b/hare4/types.go index b94800c207..990ccc387b 100644 --- a/hare4/types.go +++ b/hare4/types.go @@ -1,4 +1,4 @@ -package hare3 +package hare4 import ( "errors" @@ -92,6 +92,9 @@ type Value struct { Proposals []types.ProposalID `scale:"max=2350"` // Reference is set in messages for commit and notify rounds. Reference *types.Hash32 + // CompactProposals is the array of two-byte SipHash(vrf,proposal_id) and is only used in the preround + // phase of the protocol. + CompactProposals []types.CompactProposalID `scale:"max=2050"` } type Body struct { @@ -170,3 +173,11 @@ func (m *Message) MarshalLogObject(encoder zapcore.ObjectEncoder) error { encoder.AddUint16("vrf_count", m.Eligibility.Count) return nil } + +type CompactIdRequest struct { + MsgId types.Hash32 +} + +type CompactIdResponse struct { + Ids []types.ProposalID `scale:"max=2050"` +} diff --git a/hare4/types_scale.go b/hare4/types_scale.go index 28e8b64035..8d9dda5969 100644 --- a/hare4/types_scale.go +++ b/hare4/types_scale.go @@ -1,7 +1,7 @@ // Code generated by github.com/spacemeshos/go-scale/scalegen. DO NOT EDIT. // nolint -package hare3 +package hare4 import ( "github.com/spacemeshos/go-scale" @@ -61,6 +61,13 @@ func (t *Value) EncodeScale(enc *scale.Encoder) (total int, err error) { } total += n } + { + n, err := scale.EncodeStructSliceWithLimit(enc, t.CompactProposals, 2050) + if err != nil { + return total, err + } + total += n + } return total, nil } @@ -81,6 +88,14 @@ func (t *Value) DecodeScale(dec *scale.Decoder) (total int, err error) { total += n t.Reference = field } + { + field, n, err := scale.DecodeStructSliceWithLimit[types.CompactProposalID](dec, 2050) + if err != nil { + return total, err + } + total += n + t.CompactProposals = field + } return total, nil } @@ -198,3 +213,48 @@ func (t *Message) DecodeScale(dec *scale.Decoder) (total int, err error) { } return total, nil } + +func (t *CompactIdRequest) EncodeScale(enc *scale.Encoder) (total int, err error) { + { + n, err := scale.EncodeByteArray(enc, t.MsgId[:]) + if err != nil { + return total, err + } + total += n + } + return total, nil +} + +func (t *CompactIdRequest) DecodeScale(dec *scale.Decoder) (total int, err error) { + { + n, err := scale.DecodeByteArray(dec, t.MsgId[:]) + if err != nil { + return total, err + } + total += n + } + return total, nil +} + +func (t *CompactIdResponse) EncodeScale(enc *scale.Encoder) (total int, err error) { + { + n, err := scale.EncodeStructSliceWithLimit(enc, t.Ids, 2050) + if err != nil { + return total, err + } + total += n + } + return total, nil +} + +func (t *CompactIdResponse) DecodeScale(dec *scale.Decoder) (total int, err error) { + { + field, n, err := scale.DecodeStructSliceWithLimit[types.ProposalID](dec, 2050) + if err != nil { + return total, err + } + total += n + t.Ids = field + } + return total, nil +} diff --git a/hare4/types_test.go b/hare4/types_test.go index ded67b1c6a..91cf9ffe4f 100644 --- a/hare4/types_test.go +++ b/hare4/types_test.go @@ -1,4 +1,4 @@ -package hare3 +package hare4 import ( "testing" diff --git a/metrics/common.go b/metrics/common.go index 23898e7b27..46860e9fb3 100644 --- a/metrics/common.go +++ b/metrics/common.go @@ -75,3 +75,11 @@ func ReportMessageLatency(protocol, msgType string, latency time.Duration) { } receivedMessagesLatency.WithLabelValues(protocol, msgType, sign).Observe(seconds) } + +func NewCounterOpts(ns, name, help string) prometheus.CounterOpts { + return prometheus.CounterOpts{ + Namespace: ns, + Name: name, + Help: help, + } +} diff --git a/node/node.go b/node/node.go index 9d7e8ee9b1..6ac7593199 100644 --- a/node/node.go +++ b/node/node.go @@ -54,6 +54,7 @@ import ( "github.com/spacemeshos/go-spacemesh/hare3" "github.com/spacemeshos/go-spacemesh/hare3/compat" "github.com/spacemeshos/go-spacemesh/hare3/eligibility" + "github.com/spacemeshos/go-spacemesh/hare4" "github.com/spacemeshos/go-spacemesh/hash" "github.com/spacemeshos/go-spacemesh/layerpatrol" "github.com/spacemeshos/go-spacemesh/log" @@ -398,6 +399,8 @@ type App struct { atxsdata *atxsdata.Data clock *timesync.NodeClock hare3 *hare3.Hare + hare4 *hare4.Hare + hareResultsChan chan hare4.ConsensusOutput hOracle *eligibility.Oracle blockGen *blocks.Generator certifier *blocks.Certifier @@ -864,37 +867,80 @@ func (app *App) initServices(ctx context.Context) error { } logger := app.addLogger(HareLogger, lg).Zap() - app.hare3 = hare3.New( - app.clock, - app.host, - app.db, - app.atxsdata, - proposalsStore, - app.edVerifier, - app.hOracle, - newSyncer, - patrol, - hare3.WithLogger(logger), - hare3.WithConfig(app.Config.HARE3), - ) - for _, sig := range app.signers { - app.hare3.Register(sig) + // should be removed after hare4 transition is complete + app.hareResultsChan = make(chan hare4.ConsensusOutput, 32) + if app.Config.HARE3.Enable { + app.hare3 = hare3.New( + app.clock, + app.host, + app.db, + app.atxsdata, + proposalsStore, + app.edVerifier, + app.hOracle, + newSyncer, + patrol, + hare3.WithLogger(logger), + hare3.WithConfig(app.Config.HARE3), + hare3.WithResultsChan(app.hareResultsChan), + ) + for _, sig := range app.signers { + app.hare3.Register(sig) + } + app.hare3.Start() + app.eg.Go(func() error { + compat.ReportWeakcoin( + ctx, + logger, + app.hare3.Coins(), + tortoiseWeakCoin{db: app.cachedDB, tortoise: trtl}, + ) + return nil + }) } - app.hare3.Start() - app.eg.Go(func() error { - compat.ReportWeakcoin( - ctx, - logger, - app.hare3.Coins(), - tortoiseWeakCoin{db: app.cachedDB, tortoise: trtl}, + + if app.Config.HARE4.Enable { + app.hare4 = hare4.New( + app.clock, + app.host, + app.db, + app.atxsdata, + proposalsStore, + app.edVerifier, + app.hOracle, + newSyncer, + patrol, + app.host, + hare4.WithLogger(logger), + hare4.WithConfig(app.Config.HARE4), + hare4.WithResultsChan(app.hareResultsChan), ) - return nil - }) + for _, sig := range app.signers { + app.hare4.Register(sig) + } + app.hare4.Start() + app.eg.Go(func() error { + compat.ReportWeakcoin( + ctx, + logger, + app.hare4.Coins(), + tortoiseWeakCoin{db: app.cachedDB, tortoise: trtl}, + ) + return nil + }) + } + + propHare := &proposalConsumerHare{ + clock: app.clock, + hare3: app.hare3, + h3DisableLayer: app.Config.HARE3.DisableLayer, + hare4: app.hare4, + } proposalListener := proposals.NewHandler( app.db, app.atxsdata, - app.hare3, + propHare, app.edVerifier, app.host, fetcherWrapped, @@ -928,7 +974,7 @@ func (app *App) initServices(ctx context.Context) error { OptFilterThreshold: app.Config.OptFilterThreshold, GenBlockInterval: 500 * time.Millisecond, }), - blocks.WithHareOutputChan(app.hare3.Results()), + blocks.WithHareOutputChan(app.hareResultsChan), blocks.WithGeneratorLogger(app.addLogger(BlockGenLogger, lg).Zap()), ) @@ -1827,6 +1873,14 @@ func (app *App) stopServices(ctx context.Context) { app.hare3.Stop() } + if app.hare4 != nil { + app.hare4.Stop() + } + + if app.hareResultsChan != nil { + close(app.hareResultsChan) + } + if app.blockGen != nil { app.blockGen.Stop() } @@ -2241,3 +2295,26 @@ func (w tortoiseWeakCoin) Set(lid types.LayerID, value bool) error { func onMainNet(conf *config.Config) bool { return conf.Genesis.GenesisTime == config.MainnetConfig().Genesis.GenesisTime } + +// proposalConsumerHare is used for the hare3->hare4 migration +// to satisfy the proposals handler dependency on hare. +type proposalConsumerHare struct { + hare3 *hare3.Hare + h3DisableLayer types.LayerID + clock *timesync.NodeClock + hare4 *hare4.Hare +} + +func (p *proposalConsumerHare) IsKnown(layer types.LayerID, proposal types.ProposalID) bool { + if p.clock.CurrentLayer() < p.h3DisableLayer { + return p.hare3.IsKnown(layer, proposal) + } + return p.hare4.IsKnown(layer, proposal) +} + +func (p *proposalConsumerHare) OnProposal(proposal *types.Proposal) error { + if p.clock.CurrentLayer() < p.h3DisableLayer { + return p.hare3.OnProposal(proposal) + } + return p.hare4.OnProposal(proposal) +} diff --git a/node/node_test.go b/node/node_test.go index 513ca40c4d..3a8f5a2407 100644 --- a/node/node_test.go +++ b/node/node_test.go @@ -1291,6 +1291,9 @@ func getTestDefaultConfig(tb testing.TB) *config.Config { cfg.HARE3.RoundDuration = 2 cfg.HARE3.PreroundDelay = 1 + cfg.HARE4.RoundDuration = 2 + cfg.HARE4.PreroundDelay = 1 + cfg.LayerAvgSize = 5 cfg.LayersPerEpoch = 3 cfg.TxsPerProposal = 100 diff --git a/p2p/server/server.go b/p2p/server/server.go index 38d52dabbf..a1ead5c8d9 100644 --- a/p2p/server/server.go +++ b/p2p/server/server.go @@ -516,7 +516,7 @@ func ReadResponse(r io.Reader, toCall func(resLen uint32) (int, error)) (int, er n, err := toCall(respLen) nBytes += n if err != nil { - return nBytes, err + return nBytes, fmt.Errorf("callback error: %w", err) } if int(respLen) != n { return nBytes, errors.New("malformed server response") @@ -526,7 +526,7 @@ func ReadResponse(r io.Reader, toCall func(resLen uint32) (int, error)) (int, er nBytes += n switch { case err != nil: - return nBytes, err + return nBytes, fmt.Errorf("decode error: %w", err) case errStr != "": return nBytes, NewServerError(errStr) case respLen == 0: From c6c3f44253f02f34a10471a27a2cc6e5a203eb02 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:34:29 -0600 Subject: [PATCH 03/17] chore: address pr comments --- hare4/hare.go | 11 +++-------- hare4/hare_test.go | 19 +++++++++++-------- hare4/types.go | 2 +- node/node.go | 4 ++-- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/hare4/hare.go b/hare4/hare.go index 9ce3539fe3..af63557aba 100644 --- a/hare4/hare.go +++ b/hare4/hare.go @@ -291,7 +291,7 @@ func (h *Hare) Start() { for next := enabled; next < disabled; next++ { select { case <-h.nodeclock.AwaitLayer(next): - h.log.Debug("notified", zap.Uint32("layer id", next.Uint32())) + h.log.Debug("notified", zap.Uint32("layer", next.Uint32())) h.onLayer(next) h.cleanMessageCache(next - 1) case <-h.ctx.Done(): @@ -327,13 +327,8 @@ func (h *Hare) fetchFull(ctx context.Context, peer p2p.Peer, msgId types.Hash32) if respLen >= MAX_EXCHANGE_SIZE { return errResponseTooBig } - buff := make([]byte, respLen) - _, err = io.ReadFull(rw, buff) - if err != nil { - return fmt.Errorf("read response buffer: %w", err) - } - err = codec.Decode(buff, resp) - if err != nil { + b, err := codec.DecodeFrom(rw, resp) + if err != nil || b != int(respLen) { return fmt.Errorf("decode response: %w", err) } return nil diff --git a/hare4/hare_test.go b/hare4/hare_test.go index b8927cf07f..2ffc3572f2 100644 --- a/hare4/hare_test.go +++ b/hare4/hare_test.go @@ -1289,14 +1289,17 @@ func TestHare_ReconstructAll(t *testing.T) { cluster.drainInteractiveMessages() // cluster setup - calls := [3]int{} - for i, n := range cluster.nodes { - n.mverifier.EXPECT().Verify(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()). - DoAndReturn(func(_ signing.Domain, _ types.NodeID, _ []byte, _ types.EdSignature) bool { - calls[i] = calls[i] + 1 - // when first call return false, other - return !(calls[i] == 1) - }).AnyTimes() + for _, n := range cluster.nodes { + gomock.InOrder( + n.mverifier.EXPECT(). + Verify(signing.PROPOSAL, gomock.Any(), gomock.Any(), gomock.Any()). + Return(false). + MaxTimes(1), + n.mverifier.EXPECT(). + Verify(signing.PROPOSAL, gomock.Any(), gomock.Any(), gomock.Any()). + Return(true). + AnyTimes(), + ) } cluster.setup() cluster.genProposals(layer) diff --git a/hare4/types.go b/hare4/types.go index 990ccc387b..1ae1c70c41 100644 --- a/hare4/types.go +++ b/hare4/types.go @@ -94,7 +94,7 @@ type Value struct { Reference *types.Hash32 // CompactProposals is the array of two-byte SipHash(vrf,proposal_id) and is only used in the preround // phase of the protocol. - CompactProposals []types.CompactProposalID `scale:"max=2050"` + CompactProposals []types.CompactProposalID `scale:"max=2350"` } type Body struct { diff --git a/node/node.go b/node/node.go index 6ac7593199..de83e07228 100644 --- a/node/node.go +++ b/node/node.go @@ -2306,14 +2306,14 @@ type proposalConsumerHare struct { } func (p *proposalConsumerHare) IsKnown(layer types.LayerID, proposal types.ProposalID) bool { - if p.clock.CurrentLayer() < p.h3DisableLayer { + if layer < p.h3DisableLayer { return p.hare3.IsKnown(layer, proposal) } return p.hare4.IsKnown(layer, proposal) } func (p *proposalConsumerHare) OnProposal(proposal *types.Proposal) error { - if p.clock.CurrentLayer() < p.h3DisableLayer { + if proposal.Layer < p.h3DisableLayer { return p.hare3.OnProposal(proposal) } return p.hare4.OnProposal(proposal) From 598c46c199a6a3b8a97b8b43e2dd3c78692b8ae0 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:51:41 -0600 Subject: [PATCH 04/17] chore: housekeeping --- node/node.go | 1 - 1 file changed, 1 deletion(-) diff --git a/node/node.go b/node/node.go index de83e07228..3c2349d099 100644 --- a/node/node.go +++ b/node/node.go @@ -2301,7 +2301,6 @@ func onMainNet(conf *config.Config) bool { type proposalConsumerHare struct { hare3 *hare3.Hare h3DisableLayer types.LayerID - clock *timesync.NodeClock hare4 *hare4.Hare } From d132d19420e4290ef9b02b4aa3d6a4f6c3e3834d Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Fri, 19 Jul 2024 15:03:15 -0600 Subject: [PATCH 05/17] chore: address pr comments --- hare4/hare.go | 15 +++------------ hare4/hare_test.go | 12 ------------ hare4/protocol.go | 2 +- node/node.go | 1 - 4 files changed, 4 insertions(+), 26 deletions(-) diff --git a/hare4/hare.go b/hare4/hare.go index af63557aba..4fb0b5780c 100644 --- a/hare4/hare.go +++ b/hare4/hare.go @@ -385,8 +385,7 @@ func (h *Hare) reconstructProposals(ctx context.Context, peer p2p.Peer, msgId ty for i := range proposals { proposalIds[i] = proposalTuple{id: proposals[i].ID(), compact: compacted[i]} } - slices.SortFunc(proposalIds, sortProposalsTuple) - + slices.SortFunc(proposalIds, func(i, j proposalTuple) int { return bytes.Compare(i.id[:], j.id[:]) }) taken := make([]bool, len(proposals)) findProp := func(id types.CompactProposalID) (bool, types.ProposalID) { for i := 0; i < len(proposalIds); i++ { @@ -427,7 +426,7 @@ func (h *Hare) reconstructProposals(ctx context.Context, peer p2p.Peer, msgId ty // sort the found proposals and unset the compact proposals // field before trying to check the signature // since it would add unnecessary data to the hasher - slices.SortFunc(msg.Value.Proposals, sortProposalIds) + slices.SortFunc(msg.Value.Proposals, func(i, j types.ProposalID) int { return bytes.Compare(i[:], j[:]) }) msg.Value.CompactProposals = []types.CompactProposalID{} return nil } @@ -469,7 +468,7 @@ func (h *Hare) Handler(ctx context.Context, peer p2p.Peer, buf []byte) error { if err != nil { return fmt.Errorf("fetch full: %w", err) } - slices.SortFunc(msg.Value.Proposals, sortProposalIds) + slices.SortFunc(msg.Value.Proposals, func(i, j types.ProposalID) int { return bytes.Compare(i[:], j[:]) }) msg.Value.CompactProposals = []types.CompactProposalID{} fetched = true case err != nil: @@ -920,11 +919,3 @@ type proposalTuple struct { id types.ProposalID compact types.CompactProposalID } - -func sortProposalsTuple(i, j proposalTuple) int { - return sortProposalIds(i.id, j.id) -} - -func sortProposalIds(i, j types.ProposalID) int { - return bytes.Compare(i[:], j[:]) -} diff --git a/hare4/hare_test.go b/hare4/hare_test.go index 2ffc3572f2..b6cead08de 100644 --- a/hare4/hare_test.go +++ b/hare4/hare_test.go @@ -1121,18 +1121,6 @@ func TestHare_AddProposal(t *testing.T) { require.ErrorIs(t, hare.OnProposal(p), store.ErrProposalExists) } -func TestProposalIDSort(t *testing.T) { - var ( - a = types.ProposalID{0, 3, 2, 3, 5} - b = types.ProposalID{0, 1, 2, 3, 4} - c = types.ProposalID{11, 4, 6, 254, 0} - d = types.ProposalID{0, 1, 2, 3, 5} - ) - srt := []types.ProposalID{c, b, a, d} - slices.SortFunc(srt, sortProposalIds) - require.Equal(t, []types.ProposalID{b, d, a, c}, srt) -} - // TestHare_ReconstructForward tests that a message // could be reconstructed on a downstream peer that // receives a gossipsub message from a forwarding node diff --git a/hare4/protocol.go b/hare4/protocol.go index 5e196a831d..0046a49534 100644 --- a/hare4/protocol.go +++ b/hare4/protocol.go @@ -111,7 +111,7 @@ type protocol struct { } func (p *protocol) OnInitial(proposals []types.ProposalID) { - slices.SortFunc(proposals, sortProposalIds) + slices.SortFunc(proposals, func(i, j types.ProposalID) int { return bytes.Compare(i[:], j[:]) }) p.mu.Lock() defer p.mu.Unlock() p.initial = proposals diff --git a/node/node.go b/node/node.go index 3c2349d099..600632ca8c 100644 --- a/node/node.go +++ b/node/node.go @@ -931,7 +931,6 @@ func (app *App) initServices(ctx context.Context) error { } propHare := &proposalConsumerHare{ - clock: app.clock, hare3: app.hare3, h3DisableLayer: app.Config.HARE3.DisableLayer, hare4: app.hare4, From 4da1875601f16977e05c4775041bc1d59b4db32c Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Fri, 19 Jul 2024 15:48:23 -0600 Subject: [PATCH 06/17] fix build --- hare4/hare.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hare4/hare.go b/hare4/hare.go index 4fb0b5780c..683cbfe6fa 100644 --- a/hare4/hare.go +++ b/hare4/hare.go @@ -821,8 +821,8 @@ func (h *Hare) selectProposals(session *session) []types.ProposalID { h.log.Warn("proposal has different beacon value", zap.Uint32("lid", session.lid.Uint32()), zap.Stringer("id", p.ID()), - zap.String("proposal_beacon", p.Beacon().ShortString()), - zap.String("epoch_beacon", session.beacon.ShortString()), + zap.Stringer("proposal_beacon", p.Beacon()), + zap.Stringer("epoch_beacon", session.beacon), ) } } From 45745c7be98aee7675ea57b160c2283a63361b77 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Fri, 19 Jul 2024 15:53:07 -0600 Subject: [PATCH 07/17] chore: make gen --- hare4/types_scale.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hare4/types_scale.go b/hare4/types_scale.go index 8d9dda5969..2f06ce6ff9 100644 --- a/hare4/types_scale.go +++ b/hare4/types_scale.go @@ -62,7 +62,7 @@ func (t *Value) EncodeScale(enc *scale.Encoder) (total int, err error) { total += n } { - n, err := scale.EncodeStructSliceWithLimit(enc, t.CompactProposals, 2050) + n, err := scale.EncodeStructSliceWithLimit(enc, t.CompactProposals, 2350) if err != nil { return total, err } @@ -89,7 +89,7 @@ func (t *Value) DecodeScale(dec *scale.Decoder) (total int, err error) { t.Reference = field } { - field, n, err := scale.DecodeStructSliceWithLimit[types.CompactProposalID](dec, 2050) + field, n, err := scale.DecodeStructSliceWithLimit[types.CompactProposalID](dec, 2350) if err != nil { return total, err } From be7806ba618c41f0834e0945260bc0e3bed299ed Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Tue, 23 Jul 2024 16:06:26 -0600 Subject: [PATCH 08/17] chore: disable hare4 --- config/config.go | 2 +- config/mainnet.go | 4 +--- config/presets/fastnet.go | 11 +---------- config/presets/testnet.go | 10 +--------- node/node.go | 1 + 5 files changed, 5 insertions(+), 23 deletions(-) diff --git a/config/config.go b/config/config.go index 83a3832a49..aac864e8da 100644 --- a/config/config.go +++ b/config/config.go @@ -193,7 +193,7 @@ func DefaultConfig() Config { P2P: p2p.DefaultConfig(), API: grpcserver.DefaultConfig(), HARE3: hare3.DefaultConfig(), - HARE4: hare4.DefaultConfig(), + HARE4: hare4.DefaultConfig(), // DEFAULT HARE4 IS DISABLED HareEligibility: eligibility.DefaultConfig(), Beacon: beacon.DefaultConfig(), TIME: timeConfig.DefaultConfig(), diff --git a/config/mainnet.go b/config/mainnet.go index cdf5bb8a9d..fdcc88d9aa 100644 --- a/config/mainnet.go +++ b/config/mainnet.go @@ -74,9 +74,7 @@ func MainnetConfig() Config { hare3conf.DisableLayer = forkLayer hare4conf := hare4.DefaultConfig() - hare4conf.Committee = 50 - hare4conf.Enable = true - hare4conf.EnableLayer = forkLayer + hare4conf.Enable = false return Config{ BaseConfig: BaseConfig{ DataDirParent: defaultDataDir, diff --git a/config/presets/fastnet.go b/config/presets/fastnet.go index aabc1bb236..de09ac0d19 100644 --- a/config/presets/fastnet.go +++ b/config/presets/fastnet.go @@ -33,22 +33,13 @@ func fastnet() config.Config { conf.ATXGradeDelay = 1 * time.Second conf.HARE3.Enable = true - conf.HARE3.DisableLayer = 22 + conf.HARE3.DisableLayer = types.LayerID(math.MaxUint32) conf.HARE3.Committee = 800 conf.HARE3.Leaders = 10 conf.HARE3.PreroundDelay = 3 * time.Second conf.HARE3.RoundDuration = 700 * time.Millisecond conf.HARE3.IterationsLimit = 2 - conf.HARE4.Enable = true - conf.HARE4.EnableLayer = types.LayerID(22) - conf.HARE4.DisableLayer = types.LayerID(math.MaxUint32) - conf.HARE4.Committee = 800 - conf.HARE4.Leaders = 10 - conf.HARE4.PreroundDelay = 3 * time.Second - conf.HARE4.RoundDuration = 700 * time.Millisecond - conf.HARE4.IterationsLimit = 2 - conf.P2P.MinPeers = 10 conf.Genesis = config.GenesisConfig{ diff --git a/config/presets/testnet.go b/config/presets/testnet.go index 4b9133cd40..892d924aaf 100644 --- a/config/presets/testnet.go +++ b/config/presets/testnet.go @@ -22,7 +22,6 @@ import ( "github.com/spacemeshos/go-spacemesh/fetch" "github.com/spacemeshos/go-spacemesh/hare3" "github.com/spacemeshos/go-spacemesh/hare3/eligibility" - "github.com/spacemeshos/go-spacemesh/hare4" "github.com/spacemeshos/go-spacemesh/miner" "github.com/spacemeshos/go-spacemesh/p2p" "github.com/spacemeshos/go-spacemesh/syncer" @@ -52,16 +51,10 @@ func testnet() config.Config { } hare3conf := hare3.DefaultConfig() hare3conf.Enable = true - hare3conf.EnableLayer = 0 - hare3conf.DisableLayer = 50 + hare3conf.EnableLayer = 7366 // NOTE(dshulyak) i forgot to set protocol name for testnet when we configured it manually. // we can't do rolling upgrade if protocol name changes, so lets keep it like that temporarily. hare3conf.ProtocolName = "" - hare4conf := hare4.DefaultConfig() - hare4conf.Enable = true - hare4conf.EnableLayer = 50 // TODO THIS NEEDS A VALUE - hare4conf.DisableLayer = math.MaxUint32 - defaultdir := filepath.Join(home, "spacemesh-testnet", "/") return config.Config{ Preset: "testnet", @@ -102,7 +95,6 @@ func testnet() config.Config { MinimalActiveSetWeight: []types.EpochMinimalActiveWeight{{Weight: 10_000}}, }, HARE3: hare3conf, - HARE4: hare4conf, HareEligibility: eligibility.Config{ ConfidenceParam: 20, }, diff --git a/node/node.go b/node/node.go index 7e06b83613..58e385f1fa 100644 --- a/node/node.go +++ b/node/node.go @@ -900,6 +900,7 @@ func (app *App) initServices(ctx context.Context) error { } if app.Config.HARE4.Enable { + panic("hare4 still not enabled") app.hare4 = hare4.New( app.clock, app.host, From 823a4e505a989c35c68cc025eba454800ffa9112 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Tue, 23 Jul 2024 17:07:04 -0600 Subject: [PATCH 09/17] chore: move panic --- node/node.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/node.go b/node/node.go index 58e385f1fa..e8ee52b6c0 100644 --- a/node/node.go +++ b/node/node.go @@ -900,7 +900,6 @@ func (app *App) initServices(ctx context.Context) error { } if app.Config.HARE4.Enable { - panic("hare4 still not enabled") app.hare4 = hare4.New( app.clock, app.host, @@ -929,6 +928,7 @@ func (app *App) initServices(ctx context.Context) error { ) return nil }) + panic("hare4 still not enabled") } propHare := &proposalConsumerHare{ From 3fcb975a880721b5792619bf25e029f107e5a77a Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:16:04 -0600 Subject: [PATCH 10/17] chore: address pr comment --- hare4/types.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hare4/types.go b/hare4/types.go index 1ae1c70c41..3837fcb973 100644 --- a/hare4/types.go +++ b/hare4/types.go @@ -92,8 +92,8 @@ type Value struct { Proposals []types.ProposalID `scale:"max=2350"` // Reference is set in messages for commit and notify rounds. Reference *types.Hash32 - // CompactProposals is the array of two-byte SipHash(vrf,proposal_id) and is only used in the preround - // phase of the protocol. + // CompactProposals is the array of compacted proposals IDs which are represented as truncated + // eligibility hashes. CompactProposals []types.CompactProposalID `scale:"max=2350"` } From 77b570259880dbf9487c8f0c6304c0fad0bef04a Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:55:58 -0600 Subject: [PATCH 11/17] reinstate CommitteeUpgrade --- config/presets/testnet.go | 5 +++++ hare4/hare.go | 35 ++++++++++++++++++++++++++--------- hare4/hare_test.go | 24 ++++++++++++++++++++++++ hare4/legacy_oracle.go | 4 ++-- 4 files changed, 57 insertions(+), 11 deletions(-) diff --git a/config/presets/testnet.go b/config/presets/testnet.go index 892d924aaf..669c902feb 100644 --- a/config/presets/testnet.go +++ b/config/presets/testnet.go @@ -22,6 +22,7 @@ import ( "github.com/spacemeshos/go-spacemesh/fetch" "github.com/spacemeshos/go-spacemesh/hare3" "github.com/spacemeshos/go-spacemesh/hare3/eligibility" + "github.com/spacemeshos/go-spacemesh/hare4" "github.com/spacemeshos/go-spacemesh/miner" "github.com/spacemeshos/go-spacemesh/p2p" "github.com/spacemeshos/go-spacemesh/syncer" @@ -55,6 +56,9 @@ func testnet() config.Config { // NOTE(dshulyak) i forgot to set protocol name for testnet when we configured it manually. // we can't do rolling upgrade if protocol name changes, so lets keep it like that temporarily. hare3conf.ProtocolName = "" + + hare4conf := hare4.DefaultConfig() + hare4conf.Enable = false defaultdir := filepath.Join(home, "spacemesh-testnet", "/") return config.Config{ Preset: "testnet", @@ -95,6 +99,7 @@ func testnet() config.Config { MinimalActiveSetWeight: []types.EpochMinimalActiveWeight{{Weight: 10_000}}, }, HARE3: hare3conf, + HARE4: hare4conf, HareEligibility: eligibility.Config{ ConfidenceParam: 20, }, diff --git a/hare4/hare.go b/hare4/hare.go index 683cbfe6fa..cffa2ec366 100644 --- a/hare4/hare.go +++ b/hare4/hare.go @@ -49,21 +49,34 @@ var ( fetchFullTimeout = 5 * time.Second ) +type CommitteeUpgrade struct { + Layer types.LayerID + Size uint16 +} + type Config struct { - Enable bool `mapstructure:"enable"` - EnableLayer types.LayerID `mapstructure:"enable-layer"` - DisableLayer types.LayerID `mapstructure:"disable-layer"` - Committee uint16 `mapstructure:"committee"` - Leaders uint16 `mapstructure:"leaders"` - IterationsLimit uint8 `mapstructure:"iterations-limit"` - PreroundDelay time.Duration `mapstructure:"preround-delay"` - RoundDuration time.Duration `mapstructure:"round-duration"` + Enable bool `mapstructure:"enable"` + EnableLayer types.LayerID `mapstructure:"enable-layer"` + DisableLayer types.LayerID `mapstructure:"disable-layer"` + Committee uint16 `mapstructure:"committee"` + CommitteeUpgrade *CommitteeUpgrade + Leaders uint16 `mapstructure:"leaders"` + IterationsLimit uint8 `mapstructure:"iterations-limit"` + PreroundDelay time.Duration `mapstructure:"preround-delay"` + RoundDuration time.Duration `mapstructure:"round-duration"` // LogStats if true will log iteration statistics with INFO level at the start of the next iteration. // This requires additional computation and should be used for debugging only. LogStats bool `mapstructure:"log-stats"` ProtocolName string `mapstructure:"protocolname"` } +func (cfg *Config) CommitteeFor(layer types.LayerID) uint16 { + if cfg.CommitteeUpgrade != nil && layer >= cfg.CommitteeUpgrade.Layer { + return cfg.CommitteeUpgrade.Size + } + return cfg.Committee +} + func (cfg *Config) Validate(zdist time.Duration) error { terminates := cfg.roundStart(IterRound{Iter: cfg.IterationsLimit, Round: hardlock}) if terminates > zdist { @@ -81,6 +94,10 @@ func (cfg *Config) MarshalLogObject(encoder zapcore.ObjectEncoder) error { encoder.AddUint32("enabled layer", cfg.EnableLayer.Uint32()) encoder.AddUint32("disabled layer", cfg.DisableLayer.Uint32()) encoder.AddUint16("committee", cfg.Committee) + if cfg.CommitteeUpgrade != nil { + encoder.AddUint32("committee upgrade layer", cfg.CommitteeUpgrade.Layer.Uint32()) + encoder.AddUint16("committee upgrade size", cfg.CommitteeUpgrade.Size) + } encoder.AddUint16("leaders", cfg.Leaders) encoder.AddUint8("iterations limit", cfg.IterationsLimit) encoder.AddDuration("preround delay", cfg.PreroundDelay) @@ -574,7 +591,7 @@ func (h *Hare) onLayer(layer types.LayerID) { beacon: beacon, signers: maps.Values(h.signers), vrfs: make([]*types.HareEligibility, len(h.signers)), - proto: newProtocol(h.config.Committee/2 + 1), + proto: newProtocol(h.config.CommitteeFor(layer)/2 + 1), } h.sessions[layer] = s.proto h.mu.Unlock() diff --git a/hare4/hare_test.go b/hare4/hare_test.go index b6cead08de..6f5d00147a 100644 --- a/hare4/hare_test.go +++ b/hare4/hare_test.go @@ -1121,6 +1121,30 @@ func TestHare_AddProposal(t *testing.T) { require.ErrorIs(t, hare.OnProposal(p), store.ErrProposalExists) } +func TestHareConfig_CommitteeUpgrade(t *testing.T) { + t.Parallel() + t.Run("no upgrade", func(t *testing.T) { + cfg := Config{ + Committee: 400, + } + require.Equal(t, cfg.Committee, cfg.CommitteeFor(0)) + require.Equal(t, cfg.Committee, cfg.CommitteeFor(100)) + }) + t.Run("upgrade", func(t *testing.T) { + cfg := Config{ + Committee: 400, + CommitteeUpgrade: &CommitteeUpgrade{ + Layer: 16, + Size: 50, + }, + } + require.EqualValues(t, cfg.Committee, cfg.CommitteeFor(0)) + require.EqualValues(t, cfg.Committee, cfg.CommitteeFor(15)) + require.EqualValues(t, 50, cfg.CommitteeFor(16)) + require.EqualValues(t, 50, cfg.CommitteeFor(100)) + }) +} + // TestHare_ReconstructForward tests that a message // could be reconstructed on a downstream peer that // receives a gossipsub message from a forwarding node diff --git a/hare4/legacy_oracle.go b/hare4/legacy_oracle.go index dbb263bdb4..2c8cc026b8 100644 --- a/hare4/legacy_oracle.go +++ b/hare4/legacy_oracle.go @@ -26,7 +26,7 @@ func (lg *legacyOracle) validate(msg *Message) grade { if msg.Eligibility.Count == 0 { return grade0 } - committee := int(lg.config.Committee) + committee := int(lg.config.CommitteeFor(msg.Layer)) if msg.Round == propose { committee = int(lg.config.Leaders) } @@ -50,7 +50,7 @@ func (lg *legacyOracle) active( ir IterRound, ) *types.HareEligibility { vrf := eligibility.GenVRF(context.Background(), signer.VRFSigner(), beacon, layer, ir.Absolute()) - committee := int(lg.config.Committee) + committee := int(lg.config.CommitteeFor(layer)) if ir.Round == propose { committee = int(lg.config.Leaders) } From f75c44c02a98c8d785ce97364207088e85c963c3 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Wed, 31 Jul 2024 17:57:20 -0600 Subject: [PATCH 12/17] test: remove compact function injection --- hare4/hare.go | 35 +++++++++++------------------- hare4/hare_test.go | 54 ++++++++++++++++++++++++++++------------------ 2 files changed, 46 insertions(+), 43 deletions(-) diff --git a/hare4/hare.go b/hare4/hare.go index cffa2ec366..96803f05a5 100644 --- a/hare4/hare.go +++ b/hare4/hare.go @@ -46,6 +46,7 @@ var ( errResponseTooBig = errors.New("response too big") errCannotFindProposal = errors.New("cannot find proposal") errNoEligibilityProofs = errors.New("no eligibility proofs") + errSigTooShort = errors.New("signature too short") fetchFullTimeout = 5 * time.Second ) @@ -224,7 +225,6 @@ func New( atxsdata: atxsdata, proposals: proposals, verifier: verif, - compactFn: compactTruncate, oracle: &legacyOracle{ log: zap.NewNop(), oracle: oracle, @@ -268,7 +268,6 @@ type Hare struct { atxsdata *atxsdata.Data proposals *store.Store verifier verifier - compactFn func([]byte) []byte oracle *legacyOracle sync system.SyncStateProvider patrol *layerpatrol.LayerPatrol @@ -397,7 +396,7 @@ func (h *Hare) reconstructProposals(ctx context.Context, peer p2p.Peer, msgId ty if len(proposals) == 0 { return errNoLayerProposals } - compacted := h.compactProposals(h.compactFn, msg.Layer, proposals) + compacted := h.compactProposals(msg.Layer, proposals) proposalIds := make([]proposalTuple, len(proposals)) for i := range proposals { proposalIds[i] = proposalTuple{id: proposals[i].ID(), compact: compacted[i]} @@ -718,7 +717,7 @@ func (h *Hare) onOutput(session *session, ir IterRound, out output) error { msg.Signature = session.signers[i].Sign(signing.HARE, msg.ToMetadata().ToBytes()) if ir.Round == preround { var err error - msg.Body.Value.CompactProposals, err = h.compactProposalIds(h.compactFn, msg.Layer, + msg.Body.Value.CompactProposals, err = h.compactProposalIds(msg.Layer, out.message.Body.Value.Proposals) if err != nil { h.log.Debug("failed to compact proposals", zap.Error(err)) @@ -887,32 +886,20 @@ type session struct { vrfs []*types.HareEligibility } -type compactFunc func([]byte) []byte - -// compactFunc will truncate a given byte slice to a shorter -// byte slice by reslicing. -func compactTruncate(b []byte) []byte { - return b[:4] -} - -func compactVrf(compacter compactFunc, v types.VrfSignature) (c types.CompactProposalID) { - b := compacter(v[:]) - copy(c[:], b) - return c -} - -func (h *Hare) compactProposals(compacter compactFunc, layer types.LayerID, +func (h *Hare) compactProposals(layer types.LayerID, proposals []*types.Proposal, ) []types.CompactProposalID { compactProposals := make([]types.CompactProposalID, len(proposals)) for i, prop := range proposals { vrf := prop.EligibilityProofs[0].Sig - compactProposals[i] = compactVrf(compacter, vrf) + var c types.CompactProposalID + copy(c[:], vrf[:4]) + compactProposals[i] = c } return compactProposals } -func (h *Hare) compactProposalIds(compacter compactFunc, layer types.LayerID, +func (h *Hare) compactProposalIds(layer types.LayerID, proposals []types.ProposalID, ) ([]types.CompactProposalID, error) { compactProposals := make([]types.CompactProposalID, len(proposals)) @@ -927,7 +914,11 @@ func (h *Hare) compactProposalIds(compacter compactFunc, layer types.LayerID, if len(fp.EligibilityProofs) == 0 { return nil, errNoEligibilityProofs } - compactProposals[i] = compactVrf(compacter, fp.EligibilityProofs[0].Sig) + + var c types.CompactProposalID + copy(c[:], fp.EligibilityProofs[0].Sig[:4]) + + compactProposals[i] = c } return compactProposals, nil } diff --git a/hare4/hare_test.go b/hare4/hare_test.go index 6f5d00147a..8b4a87887d 100644 --- a/hare4/hare_test.go +++ b/hare4/hare_test.go @@ -308,9 +308,9 @@ func withMockVerifier() clusterOpt { } } -func withMockCompactFn(f func([]byte) []byte) clusterOpt { +func withCollidingProposals() clusterOpt { return func(cluster *lockstepCluster) { - cluster.mockCompactFn = f + cluster.collidingProposals = true } } @@ -349,9 +349,9 @@ type lockstepCluster struct { nodes []*node signers []*node // nodes that active on consensus but don't run hare instance - mockVerify bool - mockCompactFn func([]byte) []byte - units struct { + mockVerify bool + collidingProposals bool + units struct { min, max int } proposals struct { @@ -396,9 +396,6 @@ func (cl *lockstepCluster) addActive(n int) *lockstepCluster { if cl.mockVerify { nn = nn.withVerifier() } - if cl.mockCompactFn != nil { - nn.hare.compactFn = cl.mockCompactFn - } cl.addNode(nn) } return cl @@ -502,7 +499,11 @@ func (cl *lockstepCluster) genProposals(lid types.LayerID, skipNodes ...int) { proposal.SetID(id) proposal.Ballot.SetID(bid) var vrf types.VrfSignature - cl.t.rng.Read(vrf[:]) + if !cl.collidingProposals { + // if we want non-colliding proposals we copy from the rng + // otherwise it is kept as an array of zeroes + cl.t.rng.Read(vrf[:]) + } proposal.Ballot.EligibilityProofs = append(proposal.Ballot.EligibilityProofs, types.VotingEligibility{Sig: vrf}) proposal.SetBeacon(proposal.EpochData.Beacon) @@ -897,7 +898,7 @@ func TestHandler(t *testing.T) { msg1.Signature = n.signer.Sign(signing.HARE, msg1.ToMetadata().ToBytes()) msg1.Value.Proposals = nil msg1.Value.CompactProposals = []types.CompactProposalID{ - compactVrf(compactTruncate, p1.Ballot.EligibilityProofs[0].Sig), + compactVrf(p1.Ballot.EligibilityProofs[0].Sig), } msg2 := &Message{} @@ -908,7 +909,7 @@ func TestHandler(t *testing.T) { msg2.Signature = n.signer.Sign(signing.HARE, msg2.ToMetadata().ToBytes()) msg2.Value.Proposals = nil msg2.Value.CompactProposals = []types.CompactProposalID{ - compactVrf(compactTruncate, p2.Ballot.EligibilityProofs[0].Sig), + compactVrf(p2.Ballot.EligibilityProofs[0].Sig), } require.NoError(t, n.hare.Handler(context.Background(), "", codec.MustEncode(msg1))) @@ -1184,7 +1185,7 @@ func TestHare_ReconstructForward(t *testing.T) { n.mpublisher.EXPECT(). Publish(gomock.Any(), gomock.Any(), gomock.Any()). Do(func(ctx context.Context, proto string, msg []byte) error { - // here we wanna call the handler on the second node + // here we want to call the handler on the second node // but then call the handler on the third with the incoming peer id // of the second node, this way we know the peers could resolve between // themselves without having the connection to the original sender @@ -1345,7 +1346,7 @@ func TestHare_ReconstructAll(t *testing.T) { } // TestHare_ReconstructCollision tests that the nodes go into a -// full message exchange in the case that there's a siphash collision. +// full message exchange in the case that there's a compacted id collision. func TestHare_ReconstructCollision(t *testing.T) { cfg := DefaultConfig() cfg.LogStats = true @@ -1359,10 +1360,7 @@ func TestHare_ReconstructCollision(t *testing.T) { genesis: types.GetEffectiveGenesis(), } - fn := func(_ []byte) []byte { - return []byte{0xab, 0xab, 0xab, 0xab} - } - cluster := newLockstepCluster(tst, withMockCompactFn(fn), withProposals(1)). + cluster := newLockstepCluster(tst, withProposals(1), withCollidingProposals()). addActive(2) if cluster.signersCount > 0 { cluster = cluster.addSigner(cluster.signersCount) @@ -1371,16 +1369,24 @@ func TestHare_ReconstructCollision(t *testing.T) { layer := tst.genesis + 1 // scenario: - // node 1 has generated 1 proposal that (mocked) hash into 0xab as prefix - both nodes know the proposal - // node 2 has generated 1 proposal that hash into 0xab (but node 1 doesn't know about it) + // node 1 has generated 1 proposal that hash into 0x00 as prefix - both nodes know the proposal + // node 2 has generated 1 proposal that hash into 0x00 (but node 1 doesn't know about it) // so the two proposals collide and then we check that the nodes actually go into a round of // exchanging the missing/colliding hashes and then the signature verification (not mocked) // should pass and that a full exchange of all hashes is not triggered (disambiguates case of // failed signature vs. hashes colliding - there's a difference in number of single prefixes // that are sent, but the response should be the same) - go func() { <-cluster.nodes[1].tracer.compactReq }() // node 2 gets the request - go func() { <-cluster.nodes[0].tracer.compactResp }() // node 1 gets the response + var wg sync.WaitGroup + wg.Add(2) + go func() { + <-cluster.nodes[1].tracer.compactReq + wg.Done() + }() // node 2 gets the request + go func() { + <-cluster.nodes[0].tracer.compactResp + wg.Done() + }() // node 1 gets the response cluster.setup() @@ -1411,6 +1417,12 @@ func TestHare_ReconstructCollision(t *testing.T) { default: t.Fatal("no result") } + wg.Wait() require.Empty(t, n.hare.Running()) } } + +func compactVrf(v types.VrfSignature) (c types.CompactProposalID) { + copy(c[:], v[:4]) + return c +} From 39efcec95f01bcdc83be92d861c47c2875f86a88 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Wed, 31 Jul 2024 18:01:07 -0600 Subject: [PATCH 13/17] remove mainnet fork --- config/mainnet.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/config/mainnet.go b/config/mainnet.go index fdcc88d9aa..f2b45a1774 100644 --- a/config/mainnet.go +++ b/config/mainnet.go @@ -62,7 +62,6 @@ func MainnetConfig() Config { logging.TrtlLoggerLevel = zapcore.WarnLevel.String() logging.AtxHandlerLevel = zapcore.WarnLevel.String() logging.ProposalListenerLevel = zapcore.WarnLevel.String() - forkLayer := types.LayerID(111_111_111) // TODO THIS NEEDS A NUMBER hare3conf := hare3.DefaultConfig() hare3conf.Committee = 400 hare3conf.Enable = true @@ -71,7 +70,6 @@ func MainnetConfig() Config { Layer: 105_720, // July 15, 2024, 10:00:00 AM UTC Size: 50, } - hare3conf.DisableLayer = forkLayer hare4conf := hare4.DefaultConfig() hare4conf.Enable = false From 6a0d12073a1a8952aff96e6134077a784b24390a Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Thu, 1 Aug 2024 07:54:34 -0600 Subject: [PATCH 14/17] Update hare4/hare.go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bartosz Różański --- hare4/hare.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/hare4/hare.go b/hare4/hare.go index 96803f05a5..7773e2f8bc 100644 --- a/hare4/hare.go +++ b/hare4/hare.go @@ -915,10 +915,7 @@ func (h *Hare) compactProposalIds(layer types.LayerID, return nil, errNoEligibilityProofs } - var c types.CompactProposalID - copy(c[:], fp.EligibilityProofs[0].Sig[:4]) - - compactProposals[i] = c + compactProposals[i] = types.CompactProposalID(vrf[:]) } return compactProposals, nil } From 11d82a71416e459b0b1b834b7fe4465901b83121 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Thu, 1 Aug 2024 07:54:58 -0600 Subject: [PATCH 15/17] Update hare4/hare_test.go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bartosz Różański --- hare4/hare_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hare4/hare_test.go b/hare4/hare_test.go index 8b4a87887d..70f363a8f6 100644 --- a/hare4/hare_test.go +++ b/hare4/hare_test.go @@ -1423,6 +1423,5 @@ func TestHare_ReconstructCollision(t *testing.T) { } func compactVrf(v types.VrfSignature) (c types.CompactProposalID) { - copy(c[:], v[:4]) - return c + return types.CompactProposalID(v[:]) } From 9b39e4bc74783cc69bf6372a03095c4222f6ec4b Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Thu, 1 Aug 2024 09:44:09 -0600 Subject: [PATCH 16/17] fix build --- hare4/hare.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hare4/hare.go b/hare4/hare.go index 7773e2f8bc..0ab72e2a85 100644 --- a/hare4/hare.go +++ b/hare4/hare.go @@ -915,7 +915,7 @@ func (h *Hare) compactProposalIds(layer types.LayerID, return nil, errNoEligibilityProofs } - compactProposals[i] = types.CompactProposalID(vrf[:]) + compactProposals[i] = types.CompactProposalID(fp.EligibilityProofs[0].Sig[:]) } return compactProposals, nil } From 59fe1b1700a96402dc1f04aecb7834c462604163 Mon Sep 17 00:00:00 2001 From: acud <12988138+acud@users.noreply.github.com> Date: Thu, 1 Aug 2024 09:51:29 -0600 Subject: [PATCH 17/17] lint --- hare4/hare.go | 1 - 1 file changed, 1 deletion(-) diff --git a/hare4/hare.go b/hare4/hare.go index 0ab72e2a85..7789c6c723 100644 --- a/hare4/hare.go +++ b/hare4/hare.go @@ -46,7 +46,6 @@ var ( errResponseTooBig = errors.New("response too big") errCannotFindProposal = errors.New("cannot find proposal") errNoEligibilityProofs = errors.New("no eligibility proofs") - errSigTooShort = errors.New("signature too short") fetchFullTimeout = 5 * time.Second )