Skip to content

Commit

Permalink
Execute tortoise beacon round only if node is synced at the start of …
Browse files Browse the repository at this point in the history
…the epoch (#2715)

## Motivation

Tortoise beacon round should be skipped if node is not synced at the start of it.

Tortoise beacon checks that the miner is eligible to send any message during the round (proposal, initial vote, following) using miner weights from the previous epoch. If node doesn't have ATXs at the start of the epoch it is almost guaranteed to produce invalid beacon.  

probably related #2699 . will close after verifying

## Changes
- skip tortoise beacon round if we are not synced at the start of it

## Test Plan
it addresses race condition that only appears when new node is added to the network. if this works latenodes test must be stabilized even futher.

## TODO
- [x] Explain motivation or link existing issue(s)
- [ ] Test changes and document test plan
- [ ] Update documentation as needed

## DevOps Notes
<!-- Please uncheck these items as applicable to make DevOps aware of changes that may affect releases -->
- [x] This PR does not require configuration changes (e.g., environment variables, GitHub secrets, VM resources)
- [x] This PR does not affect public APIs
- [x] This PR does not rely on a new version of external services (PoET, elasticsearch, etc.)
- [x] This PR does not make changes to log messages (which monitoring infrastructure may rely on)


Co-authored-by: Nikita Kryuchkov <[email protected]>
  • Loading branch information
dshulyak and nkryuchkov committed Aug 26, 2021
1 parent 4b927f0 commit 7253950
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 13 deletions.
5 changes: 1 addition & 4 deletions blocks/blockeligibilityvalidator.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,7 @@ func (v BlockEligibilityValidator) BlockSignedAndEligible(block *types.Block) (b
numberOfEligibleBlocks, totalWeight)
}

epochBeacon, err := v.beaconProvider.GetBeacon(epochNumber)
if err != nil {
return false, fmt.Errorf("get beacon for epoch %v: %w", epochNumber, err)
}
epochBeacon := block.EligibilityProof.TortoiseBeacon

message, err := serializeVRFMessage(epochBeacon, epochNumber, counter)
if err != nil {
Expand Down
5 changes: 3 additions & 2 deletions blocks/blockoracle.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,9 @@ func (bo *Oracle) calcEligibilityProofs(epochNumber types.EpochID) (map[types.La

eligibleLayer := calcEligibleLayer(epochNumber, bo.layersPerEpoch, vrfSig)
eligibilityProofs[eligibleLayer] = append(eligibilityProofs[eligibleLayer], types.BlockEligibilityProof{
J: counter,
Sig: vrfSig,
J: counter,
Sig: vrfSig,
TortoiseBeacon: epochBeacon,
})
}

Expand Down
3 changes: 2 additions & 1 deletion cmd/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,8 @@ func (app *App) initServices(ctx context.Context,
AlwaysListen: app.Config.AlwaysListen,
}
syncer := syncer.NewSyncer(ctx, syncerConf, clock, msh, layerFetch, app.addLogger(SyncLogger, lg))

// TODO(dshulyak) this needs to be improved, but dependency graph is a bit complicated
tBeacon.SetSyncState(syncer)
blockOracle := blocks.NewMinerBlockOracle(layerSize, layersPerEpoch, atxDB, tBeacon, vrfSigner, nodeID, syncer.ListenToGossip, app.addLogger(BlockOracle, lg))

// TODO: we should probably decouple the apptest and the node (and duplicate as necessary) (#1926)
Expand Down
9 changes: 7 additions & 2 deletions common/types/block.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ type BlockEligibilityProof struct {

// Sig is the VRF signature from which the block's LayerID is derived.
Sig []byte

// TortoiseBeacon is the tortoise beacon value for this block.
TortoiseBeacon []byte
}

// BlockHeader includes all of a block's fields, except the list of transaction IDs, activation transaction IDs and the
Expand Down Expand Up @@ -437,9 +440,11 @@ func NewExistingBlock(layerIndex LayerID, data []byte, txs []TransactionID) *Blo
MiniBlock: MiniBlock{
BlockHeader: BlockHeader{
LayerIndex: layerIndex,
Data: data},
Data: data,
},
TxIDs: txs,
}}
},
}
b.Signature = signing.NewEdSigner().Sign(b.Bytes())
b.Initialize()
return &b
Expand Down
6 changes: 6 additions & 0 deletions hare/eligibility/beacon.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ func NewBeacon(beaconGetter blocks.BeaconGetter, confidenceParam uint32, logger
// Note: Value is concurrency-safe but not concurrency-optimized
// TODO: does this ever return an error? If not, remove it
func (b *Beacon) Value(ctx context.Context, epochID types.EpochID) (uint32, error) {
// TODO(nkryuchkov): remove when beacon sync is done
beaconSyncEnabled := false
if !beaconSyncEnabled {
return uint32(epochID), nil
}

// check cache
if val, ok := b.cache.Get(epochID); ok {
return val.(uint32), nil
Expand Down
3 changes: 3 additions & 0 deletions hare/eligibility/beacon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ func (mbp mockBeaconProvider) GetBeacon(types.EpochID) ([]byte, error) {
return mbp.value, nil
}

// TODO(nkryuchkov): enable when beacon sync is finished
func TestBeacon_Value(t *testing.T) {
t.Skip()

r := require.New(t)

b := NewBeacon(nil, 0, logtest.New(t))
Expand Down
8 changes: 5 additions & 3 deletions hare/eligibility/oracle.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ import (
"github.com/spacemeshos/go-spacemesh/log"
)

const vrfMsgCacheSize = 20 // numRounds per layer is <= 2. numConcurrentLayers<=10 (typically <=2) so numRounds*numConcurrentLayers <= 2*10 = 20 is a good upper bound
const activesCacheSize = 5 // we don't expect to handle more than two layers concurrently
const maxSupportedN = 1073741824 // higher values result in an overflow
const (
vrfMsgCacheSize = 20 // numRounds per layer is <= 2. numConcurrentLayers<=10 (typically <=2) so numRounds*numConcurrentLayers <= 2*10 = 20 is a good upper bound
activesCacheSize = 5 // we don't expect to handle more than two layers concurrently
maxSupportedN = 1073741824 // higher values result in an overflow
)

type valueProvider interface {
Value(context.Context, types.EpochID) (uint32, error)
Expand Down
23 changes: 22 additions & 1 deletion tortoisebeacon/tortoise_beacon.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ type layerClock interface {
LayerToTime(id types.LayerID) time.Time
}

// SyncState interface to check the state the sync.
type SyncState interface {
IsSynced(context.Context) bool
}

// New returns a new TortoiseBeacon.
func New(
conf Config,
Expand Down Expand Up @@ -120,6 +125,7 @@ type TortoiseBeacon struct {
layerDuration time.Duration
nodeID types.NodeID

sync SyncState
net broadcaster
atxDB activationDB
tortoiseBeaconDB tortoiseBeaconDB
Expand Down Expand Up @@ -156,13 +162,24 @@ type TortoiseBeacon struct {
proposalChans map[types.EpochID]chan *proposalMessageWithReceiptData
}

// SetSyncState updates sync state provider. Must be executed only once.
func (tb *TortoiseBeacon) SetSyncState(sync SyncState) {
if tb.sync != nil {
tb.Log.Panic("sync state provider can be updated only once")
}
tb.sync = sync
}

// Start starts listening for layers and outputs.
func (tb *TortoiseBeacon) Start(ctx context.Context) error {
if !atomic.CompareAndSwapUint64(&tb.closed, 0, 1) {
tb.Log.Warning("attempt to start tortoise beacon more than once")
return nil
}
tb.Log.Info("Starting %v with the following config: %+v", protoName, tb.config)
tb.Log.Info("starting %v with the following config: %+v", protoName, tb.config)
if tb.sync == nil {
tb.Log.Panic("update sync state provider can't be nil")
}

ctx, cancel := context.WithCancel(ctx)
tb.tg = taskgroup.New(taskgroup.WithContext(ctx))
Expand Down Expand Up @@ -359,6 +376,10 @@ func (tb *TortoiseBeacon) handleEpoch(ctx context.Context, epoch types.EpochID)

return
}
if !tb.sync.IsSynced(ctx) {
tb.Log.With().Info("tortoise beacon protocol is skipped while node is not synced", epoch)
return
}

tb.Log.With().Info("Handling epoch",
log.Uint32("epoch_id", uint32(epoch)))
Expand Down
7 changes: 7 additions & 0 deletions tortoisebeacon/tortoise_beacon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ func (*validatorMock) ValidatePost([]byte, *types.Post, *types.PostMetadata, uin
return nil
}

type testSyncState bool

func (ss testSyncState) IsSynced(context.Context) bool {
return bool(ss)
}

func TestTortoiseBeacon(t *testing.T) {
t.Parallel()

Expand Down Expand Up @@ -75,6 +81,7 @@ func TestTortoiseBeacon(t *testing.T) {

tb := New(conf, ld, minerID, n1, mockDB, nil, edSgn, signing.NewEDVerifier(), vrfSigner, signing.VRFVerifier{}, mwc, clock, logger)
requirer.NotNil(tb)
tb.SetSyncState(testSyncState(true))

err = tb.Start(context.TODO())
requirer.NoError(err)
Expand Down

0 comments on commit 7253950

Please sign in to comment.