From be50af6c143b4696543e686de04d75a6b43278cb Mon Sep 17 00:00:00 2001 From: Matthias Fasching <5011972+fasmat@users.noreply.github.com> Date: Mon, 19 Aug 2024 18:36:14 +0000 Subject: [PATCH 1/2] Update time measurement of metrics for proposal builder in 1:n (#6268) ## Motivation This updates how timing metrics for the proposal builder are calculated. --- miner/metrics.go | 26 ++++++++++++++------------ miner/proposal_builder.go | 21 ++++++++++++++------- miner/proposal_builder_test.go | 2 +- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/miner/metrics.go b/miner/metrics.go index 1ec5b4ba16..113334a3ba 100644 --- a/miner/metrics.go +++ b/miner/metrics.go @@ -18,24 +18,26 @@ var proposalBuild = metrics.NewHistogramWithBuckets( ).WithLabelValues() type latencyTracker struct { - start time.Time - data time.Time - tortoise time.Time - txs time.Time - hash time.Time - publish time.Time + start time.Time + end time.Time + + data time.Duration + tortoise time.Duration + hash time.Duration + txs time.Duration + publish time.Duration } func (lt *latencyTracker) total() time.Duration { - return lt.publish.Sub(lt.start) + return lt.end.Sub(lt.start) } func (lt *latencyTracker) MarshalLogObject(encoder zapcore.ObjectEncoder) error { - encoder.AddDuration("data", lt.data.Sub(lt.start)) - encoder.AddDuration("tortoise", lt.tortoise.Sub(lt.data)) - encoder.AddDuration("hash", lt.hash.Sub(lt.tortoise)) - encoder.AddDuration("txs", lt.txs.Sub(lt.hash)) - encoder.AddDuration("publish", lt.publish.Sub(lt.hash)) + encoder.AddDuration("data", lt.data) + encoder.AddDuration("tortoise", lt.tortoise) + encoder.AddDuration("hash", lt.hash) + encoder.AddDuration("txs", lt.txs) + encoder.AddDuration("publish", lt.publish) total := lt.total() encoder.AddDuration("total", total) // arbitrary threshold that we want to highlight as a problem diff --git a/miner/proposal_builder.go b/miner/proposal_builder.go index 96c0ee46e4..fe58751353 100644 --- a/miner/proposal_builder.go +++ b/miner/proposal_builder.go @@ -558,7 +558,9 @@ func (pb *ProposalBuilder) initSignerData( } func (pb *ProposalBuilder) build(ctx context.Context, lid types.LayerID) error { - start := time.Now() + for _, ss := range pb.signers.signers { + ss.latency.start = time.Now() + } if err := pb.initSharedData(ctx, lid); err != nil { return err } @@ -571,8 +573,8 @@ func (pb *ProposalBuilder) build(ctx context.Context, lid types.LayerID) error { var eg errgroup.Group eg.SetLimit(pb.cfg.workersLimit) for _, ss := range signers { - ss.latency.start = start eg.Go(func() error { + start := time.Now() if err := pb.initSignerData(ctx, ss, lid); err != nil { if errors.Is(err, errAtxNotAvailable) { ss.log.Debug("smesher doesn't have atx that targets this epoch", @@ -591,7 +593,7 @@ func (pb *ProposalBuilder) build(ctx context.Context, lid types.LayerID) error { ) } ss.session.prev = lid - ss.latency.data = time.Now() + ss.latency.data = time.Since(start) return nil }) } @@ -599,6 +601,7 @@ func (pb *ProposalBuilder) build(ctx context.Context, lid types.LayerID) error { return err } + start := time.Now() any := false for _, ss := range signers { if n := len(ss.session.eligibilities.proofs[lid]); n == 0 { @@ -629,14 +632,16 @@ func (pb *ProposalBuilder) build(ctx context.Context, lid types.LayerID) error { return fmt.Errorf("encode votes: %w", err) } for _, ss := range signers { - ss.latency.tortoise = time.Now() + ss.latency.tortoise = time.Since(start) } + start = time.Now() meshHash := pb.decideMeshHash(ctx, lid) for _, ss := range signers { - ss.latency.hash = time.Now() + ss.latency.hash = time.Since(start) } + start = time.Now() for _, ss := range signers { proofs := ss.session.eligibilities.proofs[lid] if len(proofs) == 0 { @@ -654,7 +659,7 @@ func (pb *ProposalBuilder) build(ctx context.Context, lid types.LayerID) error { ) txs := pb.conState.SelectProposalTXs(lid, len(proofs)) - ss.latency.txs = time.Now() + ss.latency.txs = time.Since(start) // needs to be saved before publishing, as we will query it in handler if ss.session.ref == types.EmptyBallotID { @@ -667,6 +672,7 @@ func (pb *ProposalBuilder) build(ctx context.Context, lid types.LayerID) error { } eg.Go(func() error { + start := time.Now() proposal := createProposal( &ss.session, pb.shared.beacon, @@ -686,7 +692,8 @@ func (pb *ProposalBuilder) build(ctx context.Context, lid types.LayerID) error { zap.Error(err), ) } else { - ss.latency.publish = time.Now() + ss.latency.publish = time.Since(start) + ss.latency.end = time.Now() ss.log.Info("proposal created", log.ZContext(ctx), zap.Inline(proposal), diff --git a/miner/proposal_builder_test.go b/miner/proposal_builder_test.go index 9b5ce40a79..7cfd0c7528 100644 --- a/miner/proposal_builder_test.go +++ b/miner/proposal_builder_test.go @@ -891,7 +891,7 @@ func TestMarshalLog(t *testing.T) { require.NoError(t, session.MarshalLogObject(encoder)) }) t.Run("latency", func(t *testing.T) { - latency := &latencyTracker{start: time.Unix(0, 0), publish: time.Unix(1000, 0)} + latency := &latencyTracker{start: time.Unix(0, 0), end: time.Unix(1000, 0)} require.NoError(t, latency.MarshalLogObject(encoder)) }) } From 9a5d30c33bf39892f9462c326430d29940a73507 Mon Sep 17 00:00:00 2001 From: Matthias <5011972+fasmat@users.noreply.github.com> Date: Thu, 22 Aug 2024 14:40:11 +0000 Subject: [PATCH 2/2] Update CHANGELOG --- CHANGELOG.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01e71e78db..a2cd6ed973 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,15 @@ See [RELEASE](./RELEASE.md) for workflow instructions. ### Improvements -* [#6269](https://github.com/spacemeshos/go-spacemesh/pull/6269) Use sub query instead of left join to speed up. - transaction list query +* [#6269](https://github.com/spacemeshos/go-spacemesh/pull/6269) Use sub query instead of left join to speed up + transaction list query. + +* [#6278](https://github.com/spacemeshos/go-spacemesh/pull/6278) Reduce logging levels of some messages to reduce noise. + +* [#6279](https://github.com/spacemeshos/go-spacemesh/pull/6279) Speed up initialization of tortoise by reducing the + number of queries to the DB. + +* [#6268](https://github.com/spacemeshos/go-spacemesh/pull/6268) Fix incorrect timing collection for proposal builder. * [#6278](https://github.com/spacemeshos/go-spacemesh/pull/6278) Reduce logging levels of some messages to reduce noise.