Skip to content

Commit

Permalink
ledger: fix possible dbRound sync issue in trackers and the registry (#…
Browse files Browse the repository at this point in the history
…3910)

The trackerRegistry.dbRound value and cached dbRound values stored in trackers
might go out of sync. Although they are updated under the same lock,
produceCommittingTask might use outdated dbRound and give it to trackers
with the updated state.
The fix is simple: to have dbRound usage and produceCommittingTask invocation
under the same lock.
  • Loading branch information
algorandskiy authored Apr 26, 2022
1 parent e58954d commit bd1129e
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 8 deletions.
9 changes: 7 additions & 2 deletions ledger/applications_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,18 @@ import (
"github.com/algorand/go-algorand/test/partitiontest"
)

// commitRound schedules a commit for known offset and dbRound
// and waits for completion
func commitRound(offset uint64, dbRound basics.Round, l *Ledger) {
commitRoundLookback(dbRound+basics.Round(offset), l)
}

func commitRoundLookback(lookback basics.Round, l *Ledger) {
l.trackers.mu.Lock()
l.trackers.lastFlushTime = time.Time{}
l.trackers.mu.Unlock()

l.trackers.scheduleCommit(l.Latest(), l.Latest()-(dbRound+basics.Round(offset)))
l.trackers.scheduleCommit(l.Latest(), l.Latest()-lookback)
// wait for the operation to complete. Once it does complete, the tr.lastFlushTime is going to be updated, so we can
// use that as an indicator.
for {
Expand All @@ -49,7 +55,6 @@ func commitRound(offset uint64, dbRound basics.Round, l *Ledger) {
break
}
time.Sleep(time.Millisecond)

}
}

Expand Down
9 changes: 3 additions & 6 deletions ledger/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,16 +322,15 @@ func (tr *trackerRegistry) committedUpTo(rnd basics.Round) basics.Round {
}

func (tr *trackerRegistry) scheduleCommit(blockqRound, maxLookback basics.Round) {
tr.mu.RLock()
dbRound := tr.dbRound
tr.mu.RUnlock()

dcc := &deferredCommitContext{
deferredCommitRange: deferredCommitRange{
lookback: maxLookback,
},
}
cdr := &dcc.deferredCommitRange

tr.mu.RLock()
dbRound := tr.dbRound
for _, lt := range tr.trackers {
base := cdr.oldBase
offset := cdr.offset
Expand All @@ -351,8 +350,6 @@ func (tr *trackerRegistry) scheduleCommit(blockqRound, maxLookback basics.Round)
} else {
dcc = nil
}

tr.mu.RLock()
// If we recently flushed, wait to aggregate some more blocks.
// ( unless we're creating a catchpoint, in which case we want to flush it right away
// so that all the instances of the catchpoint would contain exactly the same data )
Expand Down
173 changes: 173 additions & 0 deletions ledger/tracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,32 @@ package ledger

import (
"bytes"
"context"
"database/sql"
"sync"
"testing"

"github.com/stretchr/testify/require"

"github.com/algorand/go-algorand/agreement"
"github.com/algorand/go-algorand/config"
"github.com/algorand/go-algorand/crypto"
"github.com/algorand/go-algorand/data/basics"
"github.com/algorand/go-algorand/data/bookkeeping"
"github.com/algorand/go-algorand/ledger/ledgercore"
ledgertesting "github.com/algorand/go-algorand/ledger/testing"
"github.com/algorand/go-algorand/logging"
"github.com/algorand/go-algorand/protocol"
"github.com/algorand/go-algorand/test/partitiontest"
)

// commitRoundNext schedules a commit with as many rounds as possible
func commitRoundNext(l *Ledger) {
// maxAcctLookback := l.trackers.cfg.MaxAcctLookback
maxAcctLookback := 320
commitRoundLookback(basics.Round(maxAcctLookback), l)
}

// TestTrackerScheduleCommit checks catchpointTracker.produceCommittingTask does not increase commit offset relative
// to the value set by accountUpdates
func TestTrackerScheduleCommit(t *testing.T) {
Expand Down Expand Up @@ -123,3 +136,163 @@ func TestTrackerScheduleCommit(t *testing.T) {
dc := <-ml.trackers.deferredCommits
a.Equal(expectedOffset, dc.offset)
}

type producePrepareBlockingTracker struct {
produceReleaseLock chan struct{}
prepareCommitEntryLock chan struct{}
prepareCommitReleaseLock chan struct{}
cancelTasks bool
}

// loadFromDisk is not implemented in the blockingTracker.
func (bt *producePrepareBlockingTracker) loadFromDisk(ledgerForTracker, basics.Round) error {
return nil
}

// newBlock is not implemented in the blockingTracker.
func (bt *producePrepareBlockingTracker) newBlock(blk bookkeeping.Block, delta ledgercore.StateDelta) {
}

// committedUpTo in the blockingTracker just stores the committed round.
func (bt *producePrepareBlockingTracker) committedUpTo(committedRnd basics.Round) (minRound, lookback basics.Round) {
return 0, basics.Round(0)
}

func (bt *producePrepareBlockingTracker) produceCommittingTask(committedRound basics.Round, dbRound basics.Round, dcr *deferredCommitRange) *deferredCommitRange {
if bt.cancelTasks {
return nil
}

<-bt.produceReleaseLock
return dcr
}

// prepareCommit, is not used by the blockingTracker
func (bt *producePrepareBlockingTracker) prepareCommit(*deferredCommitContext) error {
bt.prepareCommitEntryLock <- struct{}{}
<-bt.prepareCommitReleaseLock
return nil
}

// commitRound is not used by the blockingTracker
func (bt *producePrepareBlockingTracker) commitRound(context.Context, *sql.Tx, *deferredCommitContext) error {
return nil
}

func (bt *producePrepareBlockingTracker) postCommit(ctx context.Context, dcc *deferredCommitContext) {
}

// postCommitUnlocked implements entry/exit blockers, designed for testing.
func (bt *producePrepareBlockingTracker) postCommitUnlocked(ctx context.Context, dcc *deferredCommitContext) {
}

// handleUnorderedCommit is not used by the blockingTracker
func (bt *producePrepareBlockingTracker) handleUnorderedCommit(*deferredCommitContext) {
}

// close is not used by the blockingTracker
func (bt *producePrepareBlockingTracker) close() {
}

func (bt *producePrepareBlockingTracker) reset() {
bt.prepareCommitEntryLock = make(chan struct{})
bt.prepareCommitReleaseLock = make(chan struct{})
bt.prepareCommitReleaseLock = make(chan struct{})
bt.cancelTasks = false
}

// TestTrackerDbRoundDataRace checks for dbRound data race
// when commit scheduling relies on dbRound from the tracker registry but tracker's deltas
// are used in calculations
// 1. Add say 128 + MaxAcctLookback (MaxLookback) blocks and commit
// 2. Add 2*MaxLookback blocks without committing
// 3. Set a block in prepareCommit, and initiate the commit
// 4. Set a block in produceCommittingTask, add a new block and resume the commit
// 5. Resume produceCommittingTask
// 6. The data race and panic happens in block queue syncher thread
func TestTrackerDbRoundDataRace(t *testing.T) {
partitiontest.PartitionTest(t)

t.Skip("For manual run when touching ledger locking")

a := require.New(t)

genesisInitState, _ := ledgertesting.GenerateInitState(t, protocol.ConsensusCurrentVersion, 1)
const inMem = true
log := logging.TestingLog(t)
log.SetLevel(logging.Warn)
cfg := config.GetDefaultLocal()
ledger, err := OpenLedger(log, t.Name(), inMem, genesisInitState, cfg)
a.NoError(err, "could not open ledger")
defer ledger.Close()

stallingTracker := &producePrepareBlockingTracker{
// produceEntryLock: make(chan struct{}, 10),
produceReleaseLock: make(chan struct{}),
prepareCommitEntryLock: make(chan struct{}, 10),
prepareCommitReleaseLock: make(chan struct{}),
}
ledger.trackerMu.Lock()
ledger.trackers.mu.Lock()
ledger.trackers.trackers = append([]ledgerTracker{stallingTracker}, ledger.trackers.trackers...)
ledger.trackers.mu.Unlock()
ledger.trackerMu.Unlock()

close(stallingTracker.produceReleaseLock)
close(stallingTracker.prepareCommitReleaseLock)

targetRound := basics.Round(128) * 5
blk := genesisInitState.Block
for i := basics.Round(0); i < targetRound-1; i++ {
blk.BlockHeader.Round++
blk.BlockHeader.TimeStamp += int64(crypto.RandUint64() % 100 * 1000)
err := ledger.AddBlock(blk, agreement.Certificate{})
a.NoError(err)
}
blk.BlockHeader.Round++
blk.BlockHeader.TimeStamp += int64(crypto.RandUint64() % 100 * 1000)
err = ledger.AddBlock(blk, agreement.Certificate{})
a.NoError(err)
commitRoundNext(ledger)
ledger.trackers.waitAccountsWriting()
lookback := 320
// lookback := cfg.MaxAcctLookback
a.Equal(targetRound-basics.Round(lookback), ledger.trackers.dbRound)

// build up some non-committed queue
stallingTracker.cancelTasks = true
for i := targetRound; i < 2*targetRound; i++ {
blk.BlockHeader.Round++
blk.BlockHeader.TimeStamp += int64(crypto.RandUint64() % 100 * 1000)
err := ledger.AddBlock(blk, agreement.Certificate{})
a.NoError(err)
}
ledger.WaitForCommit(2*targetRound - 1)

stallingTracker.reset()
var wg sync.WaitGroup
wg.Add(1)
go func() {
commitRoundNext(ledger)
wg.Done()
}()

<-stallingTracker.prepareCommitEntryLock
stallingTracker.produceReleaseLock = make(chan struct{})

blk.BlockHeader.Round++
blk.BlockHeader.TimeStamp += int64(crypto.RandUint64() % 100 * 1000)
err = ledger.AddBlock(blk, agreement.Certificate{})
a.NoError(err)
// the notifyCommit -> committedUpTo -> scheduleCommit chain
// is called right after the cond var, so wait until that moment
ledger.WaitForCommit(2 * targetRound)

// let the commit to complete
close(stallingTracker.prepareCommitReleaseLock)
wg.Wait()

// unblock the notifyCommit (scheduleCommit) goroutine
stallingTracker.cancelTasks = true
close(stallingTracker.produceReleaseLock)
}

0 comments on commit bd1129e

Please sign in to comment.