From d3de17e9834737277d47ebf10678ba8a23be0648 Mon Sep 17 00:00:00 2001 From: Dmitry Shulyak Date: Mon, 18 Sep 2023 10:48:22 +0000 Subject: [PATCH] tortoise: remove several bottlenecks during recovery (#5010) related: https://github.com/spacemeshos/go-spacemesh/issues/3006 - tally votes once per window, or after all ballots were loaded - load ballots without joining identities table. that information already available in tortoise - set tortoise log level to warn on mainnet. it is not pleasant that info is printed every time node is restarted --- config/mainnet.go | 7 +++++-- node/node.go | 2 ++ sql/ballots/ballots.go | 25 +++++++++++++++++++++++++ sql/ballots/ballots_test.go | 7 +++++++ tortoise/algorithm.go | 3 +++ tortoise/full.go | 3 ++- tortoise/model/core.go | 2 +- tortoise/recover.go | 17 +++++++++-------- tortoise/recover_test.go | 2 +- tortoise/tortoise_test.go | 4 ++-- 10 files changed, 57 insertions(+), 15 deletions(-) diff --git a/config/mainnet.go b/config/mainnet.go index 3d2c5fe6d2..d8ca3930d9 100644 --- a/config/mainnet.go +++ b/config/mainnet.go @@ -8,6 +8,8 @@ import ( "runtime" "time" + "go.uber.org/zap/zapcore" + "github.com/spacemeshos/go-spacemesh/activation" "github.com/spacemeshos/go-spacemesh/api/grpcserver" "github.com/spacemeshos/go-spacemesh/beacon" @@ -36,7 +38,8 @@ func MainnetConfig() Config { if smeshing.ProvingOpts.Threads < 1 { smeshing.ProvingOpts.Threads = 1 } - + logging := DefaultLoggingConfig() + logging.TrtlLoggerLevel = zapcore.WarnLevel.String() return Config{ BaseConfig: BaseConfig{ DataDirParent: defaultDataDir, @@ -131,7 +134,7 @@ func MainnetConfig() Config { TIME: timeConfig.DefaultConfig(), SMESHING: smeshing, FETCH: fetch.DefaultConfig(), - LOGGING: DefaultLoggingConfig(), + LOGGING: logging, Sync: syncer.Config{ Interval: time.Minute, EpochEndFraction: 0.8, diff --git a/node/node.go b/node/node.go index 9f38672fcf..c00aaa34bc 100644 --- a/node/node.go +++ b/node/node.go @@ -619,6 +619,7 @@ func (app *App) initServices(ctx context.Context) error { app.log.With().Info("tortoise will trace execution") trtlopts = append(trtlopts, tortoise.WithTracer()) } + start := time.Now() trtl, err := tortoise.Recover( app.cachedDB, app.clock.CurrentLayer(), beaconProtocol, trtlopts..., @@ -626,6 +627,7 @@ func (app *App) initServices(ctx context.Context) error { if err != nil { return fmt.Errorf("can't recover tortoise state: %w", err) } + app.log.With().Info("tortoise initialized", log.Duration("duration", time.Since(start))) app.eg.Go(func() error { for rst := range beaconProtocol.Results() { events.EmitBeacon(rst.Epoch, rst.Beacon) diff --git a/sql/ballots/ballots.go b/sql/ballots/ballots.go index 827a29df95..d543b2ced2 100644 --- a/sql/ballots/ballots.go +++ b/sql/ballots/ballots.go @@ -118,6 +118,31 @@ func Layer(db sql.Executor, lid types.LayerID) (rst []*types.Ballot, err error) return rst, err } +// LayerNoMalicious returns full ballot without joining malicious identities. +func LayerNoMalicious(db sql.Executor, lid types.LayerID) (rst []*types.Ballot, err error) { + var derr error + if _, err = db.Exec(`select id, ballot from ballots where layer = ?1;`, + func(stmt *sql.Statement) { + stmt.BindInt64(1, int64(lid)) + }, func(stmt *sql.Statement) bool { + id := types.BallotID{} + stmt.ColumnBytes(0, id[:]) + var ballot types.Ballot + _, derr := codec.DecodeFrom(stmt.ColumnReader(1), &ballot) + if derr != nil { + return false + } + ballot.SetID(id) + rst = append(rst, &ballot) + return true + }); err != nil { + return nil, fmt.Errorf("selecting %d: %w", lid, err) + } else if derr != nil { + return nil, fmt.Errorf("decoding %d: %w", lid, err) + } + return rst, err +} + // IDsInLayer returns ballots ids in the layer. func IDsInLayer(db sql.Executor, lid types.LayerID) (rst []types.BallotID, err error) { if _, err := db.Exec("select id from ballots where layer = ?1;", func(stmt *sql.Statement) { diff --git a/sql/ballots/ballots_test.go b/sql/ballots/ballots_test.go index 88dd3b8b77..7c2a0b6277 100644 --- a/sql/ballots/ballots_test.go +++ b/sql/ballots/ballots_test.go @@ -53,6 +53,13 @@ func TestLayer(t *testing.T) { for _, ballot := range rst { require.True(t, ballot.IsMalicious()) } + + rst, err = LayerNoMalicious(db, start) + require.NoError(t, err) + require.Len(t, rst, len(ballots)) + for _, ballot := range rst { + require.False(t, ballot.IsMalicious()) + } } func TestAdd(t *testing.T) { diff --git a/tortoise/algorithm.go b/tortoise/algorithm.go index 11e5a41d49..77069b4bce 100644 --- a/tortoise/algorithm.go +++ b/tortoise/algorithm.go @@ -96,6 +96,9 @@ func New(opts ...Opt) (*Tortoise, error) { zap.Uint32("zdist", t.cfg.Zdist), ) } + if t.cfg.WindowSize == 0 { + t.logger.Panic("tortoise-window-size should not be zero") + } t.trtl = newTurtle(t.logger, t.cfg) if t.tracer != nil { t.tracer.On(&ConfigTrace{ diff --git a/tortoise/full.go b/tortoise/full.go index 9b68108d55..ade4f40cf6 100644 --- a/tortoise/full.go +++ b/tortoise/full.go @@ -46,9 +46,10 @@ func (f *full) countBallot(logger *zap.Logger, ballot *ballotInfo) { continue } layer := f.layer(lvote.lid) + height := ballot.reference.height empty := true for _, block := range layer.blocks { - if block.height > ballot.reference.height { + if block.height > height { continue } vote := lvote.getVote(block) diff --git a/tortoise/model/core.go b/tortoise/model/core.go index 83d8427e44..b57af73012 100644 --- a/tortoise/model/core.go +++ b/tortoise/model/core.go @@ -129,7 +129,7 @@ func (c *core) OnMessage(m Messenger, event Message) { m.Send(MessageBallot{Ballot: ballot}) case MessageLayerEnd: if ev.LayerID.After(types.GetEffectiveGenesis()) { - tortoise.RecoverLayer(context.Background(), c.tortoise, c.cdb, c.beacons, ev.LayerID, ev.LayerID) + tortoise.RecoverLayer(context.Background(), c.tortoise, c.cdb, c.beacons, ev.LayerID, ev.LayerID, ev.LayerID) m.Notify(EventVerified{ID: c.id, Verified: c.tortoise.LatestComplete(), Layer: ev.LayerID}) } diff --git a/tortoise/recover.go b/tortoise/recover.go index 555580ef7d..b5eb505a00 100644 --- a/tortoise/recover.go +++ b/tortoise/recover.go @@ -24,7 +24,7 @@ func Recover(db *datastore.CachedDB, latest types.LayerID, beacon system.BeaconG return nil, err } - layer, err := ballots.LatestLayer(db) + last, err := ballots.LatestLayer(db) if err != nil { return nil, fmt.Errorf("failed to load latest known layer: %w", err) } @@ -49,15 +49,15 @@ func Recover(db *datastore.CachedDB, latest types.LayerID, beacon system.BeaconG return nil, fmt.Errorf("failed to load latest epoch: %w", err) } epoch++ // recoverEpoch expects target epoch, rather than publish - if layer.GetEpoch() != epoch { - for eid := layer.GetEpoch(); eid <= epoch; eid++ { + if last.GetEpoch() != epoch { + for eid := last.GetEpoch(); eid <= epoch; eid++ { if err := recoverEpoch(eid, trtl, db, beacon); err != nil { return nil, err } } } - for lid := types.GetEffectiveGenesis().Add(1); !lid.After(layer); lid = lid.Add(1) { - if err := RecoverLayer(context.Background(), trtl, db, beacon, lid, min(layer, latest)); err != nil { + for lid := types.GetEffectiveGenesis().Add(1); !lid.After(last); lid = lid.Add(1) { + if err := RecoverLayer(context.Background(), trtl, db, beacon, lid, last, min(last, latest)); err != nil { return nil, fmt.Errorf("failed to load tortoise state at layer %d: %w", lid, err) } } @@ -78,7 +78,7 @@ func recoverEpoch(epoch types.EpochID, trtl *Tortoise, db *datastore.CachedDB, b return nil } -func RecoverLayer(ctx context.Context, trtl *Tortoise, db *datastore.CachedDB, beacon system.BeaconGetter, lid, current types.LayerID) error { +func RecoverLayer(ctx context.Context, trtl *Tortoise, db *datastore.CachedDB, beacon system.BeaconGetter, lid, last, current types.LayerID) error { if lid.FirstInEpoch() { if err := recoverEpoch(lid.GetEpoch(), trtl, db, beacon); err != nil { return err @@ -106,7 +106,8 @@ func RecoverLayer(ctx context.Context, trtl *Tortoise, db *datastore.CachedDB, b trtl.OnHareOutput(lid, hare) } } - ballotsrst, err := ballots.Layer(db, lid) + // NOTE(dshulyak) we loaded information about malicious identities earlier. + ballotsrst, err := ballots.LayerNoMalicious(db, lid) if err != nil { return err } @@ -127,7 +128,7 @@ func RecoverLayer(ctx context.Context, trtl *Tortoise, db *datastore.CachedDB, b if err == nil { trtl.OnWeakCoin(lid, coin) } - if lid <= current { + if lid <= current && (lid%types.LayerID(trtl.cfg.WindowSize) == 0 || lid == last) { trtl.TallyVotes(ctx, lid) opinion, err := layers.GetAggregatedHash(db, lid-1) diff --git a/tortoise/recover_test.go b/tortoise/recover_test.go index c3befcc724..2c825ed653 100644 --- a/tortoise/recover_test.go +++ b/tortoise/recover_test.go @@ -30,7 +30,7 @@ func (a *recoveryAdapter) TallyVotes(ctx context.Context, current types.LayerID) a.prev = genesis } for lid := a.prev; lid <= current; lid++ { - require.NoError(a, RecoverLayer(ctx, a.Tortoise, a.db, a.beacon, lid, current)) + require.NoError(a, RecoverLayer(ctx, a.Tortoise, a.db, a.beacon, lid, current, current)) a.prev = lid } } diff --git a/tortoise/tortoise_test.go b/tortoise/tortoise_test.go index b1022084b9..b3adfe2be5 100644 --- a/tortoise/tortoise_test.go +++ b/tortoise/tortoise_test.go @@ -25,7 +25,6 @@ import ( "github.com/spacemeshos/go-spacemesh/sql/ballots" "github.com/spacemeshos/go-spacemesh/sql/blocks" "github.com/spacemeshos/go-spacemesh/sql/certificates" - "github.com/spacemeshos/go-spacemesh/sql/identities" "github.com/spacemeshos/go-spacemesh/sql/layers" "github.com/spacemeshos/go-spacemesh/tortoise/opinionhash" "github.com/spacemeshos/go-spacemesh/tortoise/sim" @@ -1094,6 +1093,7 @@ func TestBaseBallotPrioritization(t *testing.T) { sim.WithSequence(5), }, expected: genesis.Add(5), + window: 1, }, { desc: "BadBlocksIgnored", @@ -1885,7 +1885,7 @@ func TestMaliciousBallotsAreIgnored(t *testing.T) { blts, err := ballots.Layer(s.GetState(0).DB, last) require.NoError(t, err) for _, ballot := range blts { - require.NoError(t, identities.SetMalicious(s.GetState(0).DB, ballot.SmesherID, []byte("proof"), time.Now())) + tortoise.OnMalfeasance(ballot.SmesherID) } tortoise.TallyVotes(ctx, s.Next())