Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ledger: fix possible dbRound unsynchronization for trackers and registry #3910

Merged
merged 4 commits into from
Apr 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions ledger/applications_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,18 @@ import (
"github.com/algorand/go-algorand/test/partitiontest"
)

// commitRound schedules a commit for known offset and dbRound
// and waits for completion
func commitRound(offset uint64, dbRound basics.Round, l *Ledger) {
commitRoundLookback(dbRound+basics.Round(offset), l)
}

func commitRoundLookback(lookback basics.Round, l *Ledger) {
l.trackers.mu.Lock()
l.trackers.lastFlushTime = time.Time{}
l.trackers.mu.Unlock()

l.trackers.scheduleCommit(l.Latest(), l.Latest()-(dbRound+basics.Round(offset)))
l.trackers.scheduleCommit(l.Latest(), l.Latest()-lookback)
// wait for the operation to complete. Once it does complete, the tr.lastFlushTime is going to be updated, so we can
// use that as an indicator.
for {
Expand All @@ -49,7 +55,6 @@ func commitRound(offset uint64, dbRound basics.Round, l *Ledger) {
break
}
time.Sleep(time.Millisecond)

}
}

Expand Down
9 changes: 3 additions & 6 deletions ledger/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,16 +322,15 @@ func (tr *trackerRegistry) committedUpTo(rnd basics.Round) basics.Round {
}

func (tr *trackerRegistry) scheduleCommit(blockqRound, maxLookback basics.Round) {
tr.mu.RLock()
dbRound := tr.dbRound
tr.mu.RUnlock()

dcc := &deferredCommitContext{
deferredCommitRange: deferredCommitRange{
lookback: maxLookback,
},
}
cdr := &dcc.deferredCommitRange

tr.mu.RLock()
dbRound := tr.dbRound
for _, lt := range tr.trackers {
base := cdr.oldBase
offset := cdr.offset
Expand All @@ -351,8 +350,6 @@ func (tr *trackerRegistry) scheduleCommit(blockqRound, maxLookback basics.Round)
} else {
dcc = nil
}

tr.mu.RLock()
// If we recently flushed, wait to aggregate some more blocks.
// ( unless we're creating a catchpoint, in which case we want to flush it right away
// so that all the instances of the catchpoint would contain exactly the same data )
Expand Down
173 changes: 173 additions & 0 deletions ledger/tracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,32 @@ package ledger

import (
"bytes"
"context"
"database/sql"
"sync"
"testing"

"github.com/stretchr/testify/require"

"github.com/algorand/go-algorand/agreement"
"github.com/algorand/go-algorand/config"
"github.com/algorand/go-algorand/crypto"
"github.com/algorand/go-algorand/data/basics"
"github.com/algorand/go-algorand/data/bookkeeping"
"github.com/algorand/go-algorand/ledger/ledgercore"
ledgertesting "github.com/algorand/go-algorand/ledger/testing"
"github.com/algorand/go-algorand/logging"
"github.com/algorand/go-algorand/protocol"
"github.com/algorand/go-algorand/test/partitiontest"
)

// commitRoundNext schedules a commit with as many rounds as possible
func commitRoundNext(l *Ledger) {
// maxAcctLookback := l.trackers.cfg.MaxAcctLookback
maxAcctLookback := 320
commitRoundLookback(basics.Round(maxAcctLookback), l)
}

// TestTrackerScheduleCommit checks catchpointTracker.produceCommittingTask does not increase commit offset relative
// to the value set by accountUpdates
func TestTrackerScheduleCommit(t *testing.T) {
Expand Down Expand Up @@ -123,3 +136,163 @@ func TestTrackerScheduleCommit(t *testing.T) {
dc := <-ml.trackers.deferredCommits
a.Equal(expectedOffset, dc.offset)
}

type producePrepareBlockingTracker struct {
produceReleaseLock chan struct{}
prepareCommitEntryLock chan struct{}
prepareCommitReleaseLock chan struct{}
cancelTasks bool
}

// loadFromDisk is not implemented in the blockingTracker.
func (bt *producePrepareBlockingTracker) loadFromDisk(ledgerForTracker, basics.Round) error {
return nil
}

// newBlock is not implemented in the blockingTracker.
func (bt *producePrepareBlockingTracker) newBlock(blk bookkeeping.Block, delta ledgercore.StateDelta) {
}

// committedUpTo in the blockingTracker just stores the committed round.
func (bt *producePrepareBlockingTracker) committedUpTo(committedRnd basics.Round) (minRound, lookback basics.Round) {
return 0, basics.Round(0)
}

func (bt *producePrepareBlockingTracker) produceCommittingTask(committedRound basics.Round, dbRound basics.Round, dcr *deferredCommitRange) *deferredCommitRange {
if bt.cancelTasks {
return nil
}

<-bt.produceReleaseLock
return dcr
}

// prepareCommit, is not used by the blockingTracker
func (bt *producePrepareBlockingTracker) prepareCommit(*deferredCommitContext) error {
bt.prepareCommitEntryLock <- struct{}{}
<-bt.prepareCommitReleaseLock
return nil
}

// commitRound is not used by the blockingTracker
func (bt *producePrepareBlockingTracker) commitRound(context.Context, *sql.Tx, *deferredCommitContext) error {
return nil
}

func (bt *producePrepareBlockingTracker) postCommit(ctx context.Context, dcc *deferredCommitContext) {
}

// postCommitUnlocked implements entry/exit blockers, designed for testing.
func (bt *producePrepareBlockingTracker) postCommitUnlocked(ctx context.Context, dcc *deferredCommitContext) {
}

// handleUnorderedCommit is not used by the blockingTracker
func (bt *producePrepareBlockingTracker) handleUnorderedCommit(*deferredCommitContext) {
}

// close is not used by the blockingTracker
func (bt *producePrepareBlockingTracker) close() {
}

func (bt *producePrepareBlockingTracker) reset() {
bt.prepareCommitEntryLock = make(chan struct{})
bt.prepareCommitReleaseLock = make(chan struct{})
bt.prepareCommitReleaseLock = make(chan struct{})
bt.cancelTasks = false
}

// TestTrackerDbRoundDataRace checks for dbRound data race
// when commit scheduling relies on dbRound from the tracker registry but tracker's deltas
// are used in calculations
// 1. Add say 128 + MaxAcctLookback (MaxLookback) blocks and commit
// 2. Add 2*MaxLookback blocks without committing
// 3. Set a block in prepareCommit, and initiate the commit
// 4. Set a block in produceCommittingTask, add a new block and resume the commit
// 5. Resume produceCommittingTask
// 6. The data race and panic happens in block queue syncher thread
func TestTrackerDbRoundDataRace(t *testing.T) {
partitiontest.PartitionTest(t)

t.Skip("For manual run when touching ledger locking")

a := require.New(t)

genesisInitState, _ := ledgertesting.GenerateInitState(t, protocol.ConsensusCurrentVersion, 1)
const inMem = true
log := logging.TestingLog(t)
log.SetLevel(logging.Warn)
cfg := config.GetDefaultLocal()
ledger, err := OpenLedger(log, t.Name(), inMem, genesisInitState, cfg)
a.NoError(err, "could not open ledger")
defer ledger.Close()

stallingTracker := &producePrepareBlockingTracker{
// produceEntryLock: make(chan struct{}, 10),
produceReleaseLock: make(chan struct{}),
prepareCommitEntryLock: make(chan struct{}, 10),
prepareCommitReleaseLock: make(chan struct{}),
}
ledger.trackerMu.Lock()
ledger.trackers.mu.Lock()
ledger.trackers.trackers = append([]ledgerTracker{stallingTracker}, ledger.trackers.trackers...)
ledger.trackers.mu.Unlock()
ledger.trackerMu.Unlock()

close(stallingTracker.produceReleaseLock)
close(stallingTracker.prepareCommitReleaseLock)

targetRound := basics.Round(128) * 5
blk := genesisInitState.Block
for i := basics.Round(0); i < targetRound-1; i++ {
blk.BlockHeader.Round++
blk.BlockHeader.TimeStamp += int64(crypto.RandUint64() % 100 * 1000)
err := ledger.AddBlock(blk, agreement.Certificate{})
a.NoError(err)
}
blk.BlockHeader.Round++
blk.BlockHeader.TimeStamp += int64(crypto.RandUint64() % 100 * 1000)
err = ledger.AddBlock(blk, agreement.Certificate{})
a.NoError(err)
commitRoundNext(ledger)
ledger.trackers.waitAccountsWriting()
lookback := 320
// lookback := cfg.MaxAcctLookback
a.Equal(targetRound-basics.Round(lookback), ledger.trackers.dbRound)

// build up some non-committed queue
stallingTracker.cancelTasks = true
for i := targetRound; i < 2*targetRound; i++ {
blk.BlockHeader.Round++
blk.BlockHeader.TimeStamp += int64(crypto.RandUint64() % 100 * 1000)
err := ledger.AddBlock(blk, agreement.Certificate{})
a.NoError(err)
}
ledger.WaitForCommit(2*targetRound - 1)

stallingTracker.reset()
var wg sync.WaitGroup
wg.Add(1)
go func() {
commitRoundNext(ledger)
wg.Done()
}()

<-stallingTracker.prepareCommitEntryLock
stallingTracker.produceReleaseLock = make(chan struct{})

blk.BlockHeader.Round++
blk.BlockHeader.TimeStamp += int64(crypto.RandUint64() % 100 * 1000)
err = ledger.AddBlock(blk, agreement.Certificate{})
a.NoError(err)
// the notifyCommit -> committedUpTo -> scheduleCommit chain
// is called right after the cond var, so wait until that moment
ledger.WaitForCommit(2 * targetRound)

// let the commit to complete
close(stallingTracker.prepareCommitReleaseLock)
wg.Wait()

// unblock the notifyCommit (scheduleCommit) goroutine
stallingTracker.cancelTasks = true
close(stallingTracker.produceReleaseLock)
}