-
Notifications
You must be signed in to change notification settings - Fork 3.4k
/
driver.go
951 lines (827 loc) · 31.4 KB
/
driver.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
package batcher
import (
"context"
"errors"
"fmt"
"io"
"math/big"
_ "net/http/pprof"
"sync"
"time"
"golang.org/x/sync/errgroup"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/ethereum/go-ethereum/core"
"github.com/ethereum/go-ethereum/core/txpool"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rpc"
altda "github.com/ethereum-optimism/optimism/op-alt-da"
"github.com/ethereum-optimism/optimism/op-batcher/metrics"
"github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum-optimism/optimism/op-service/dial"
"github.com/ethereum-optimism/optimism/op-service/eth"
"github.com/ethereum-optimism/optimism/op-service/txmgr"
)
var (
ErrBatcherNotRunning = errors.New("batcher is not running")
emptyTxData = txData{
frames: []frameData{
{
data: []byte{},
},
},
}
SetMaxDASizeMethod = "miner_setMaxDASize"
)
type txRef struct {
id txID
isCancel bool
isBlob bool
}
func (r txRef) String() string {
return r.string(func(id txID) string { return id.String() })
}
func (r txRef) TerminalString() string {
return r.string(func(id txID) string { return id.TerminalString() })
}
func (r txRef) string(txIDStringer func(txID) string) string {
if r.isCancel {
if r.isBlob {
return "blob-cancellation"
} else {
return "calldata-cancellation"
}
}
return txIDStringer(r.id)
}
type L1Client interface {
HeaderByNumber(ctx context.Context, number *big.Int) (*types.Header, error)
NonceAt(ctx context.Context, account common.Address, blockNumber *big.Int) (uint64, error)
}
type L2Client interface {
BlockByNumber(ctx context.Context, number *big.Int) (*types.Block, error)
}
type RollupClient interface {
SyncStatus(ctx context.Context) (*eth.SyncStatus, error)
}
// DriverSetup is the collection of input/output interfaces and configuration that the driver operates on.
type DriverSetup struct {
Log log.Logger
Metr metrics.Metricer
RollupConfig *rollup.Config
Config BatcherConfig
Txmgr txmgr.TxManager
L1Client L1Client
EndpointProvider dial.L2EndpointProvider
ChannelConfig ChannelConfigProvider
AltDA *altda.DAClient
ChannelOutFactory ChannelOutFactory
}
// BatchSubmitter encapsulates a service responsible for submitting L2 tx
// batches to L1 for availability.
type BatchSubmitter struct {
DriverSetup
wg sync.WaitGroup
shutdownCtx context.Context
cancelShutdownCtx context.CancelFunc
killCtx context.Context
cancelKillCtx context.CancelFunc
l2BlockAdded chan struct{} // notifies the throttling loop whenever an l2 block is added
mutex sync.Mutex
running bool
txpoolMutex sync.Mutex // guards txpoolState and txpoolBlockedBlob
txpoolState TxPoolState
txpoolBlockedBlob bool
// lastStoredBlock is the last block loaded into `state`. If it is empty it should be set to the l2 safe head.
lastStoredBlock eth.BlockID
lastL1Tip eth.L1BlockRef
state *channelManager
}
// NewBatchSubmitter initializes the BatchSubmitter driver from a preconfigured DriverSetup
func NewBatchSubmitter(setup DriverSetup) *BatchSubmitter {
state := NewChannelManager(setup.Log, setup.Metr, setup.ChannelConfig, setup.RollupConfig)
if setup.ChannelOutFactory != nil {
state.SetChannelOutFactory(setup.ChannelOutFactory)
}
return &BatchSubmitter{
DriverSetup: setup,
state: state,
}
}
func (l *BatchSubmitter) StartBatchSubmitting() error {
l.Log.Info("Starting Batch Submitter")
l.mutex.Lock()
defer l.mutex.Unlock()
if l.running {
return errors.New("batcher is already running")
}
l.running = true
l.shutdownCtx, l.cancelShutdownCtx = context.WithCancel(context.Background())
l.killCtx, l.cancelKillCtx = context.WithCancel(context.Background())
l.clearState(l.shutdownCtx)
l.lastStoredBlock = eth.BlockID{}
if err := l.waitForL2Genesis(); err != nil {
return fmt.Errorf("error waiting for L2 genesis: %w", err)
}
if l.Config.WaitNodeSync {
err := l.waitNodeSync()
if err != nil {
return fmt.Errorf("error waiting for node sync: %w", err)
}
}
receiptsCh := make(chan txmgr.TxReceipt[txRef])
receiptsLoopCtx, cancelReceiptsLoopCtx := context.WithCancel(context.Background())
throttlingLoopCtx, cancelThrottlingLoopCtx := context.WithCancel(context.Background())
// DA throttling loop should always be started except for testing (indicated by ThrottleInterval == 0)
if l.Config.ThrottleInterval > 0 {
l.wg.Add(1)
go l.throttlingLoop(throttlingLoopCtx)
} else {
l.Log.Warn("Throttling loop is DISABLED due to 0 throttle-interval. This should not be disabled in prod.")
}
l.wg.Add(2)
go l.processReceiptsLoop(receiptsLoopCtx, receiptsCh) // receives from receiptsCh
go l.mainLoop(l.shutdownCtx, receiptsCh, cancelReceiptsLoopCtx, cancelThrottlingLoopCtx) // sends on receiptsCh
l.Log.Info("Batch Submitter started")
return nil
}
// waitForL2Genesis waits for the L2 genesis time to be reached.
func (l *BatchSubmitter) waitForL2Genesis() error {
genesisTime := time.Unix(int64(l.RollupConfig.Genesis.L2Time), 0)
now := time.Now()
if now.After(genesisTime) {
return nil
}
l.Log.Info("Waiting for L2 genesis", "genesisTime", genesisTime)
// Create a ticker that fires every 30 seconds
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
genesisTrigger := time.After(time.Until(genesisTime))
for {
select {
case <-ticker.C:
remaining := time.Until(genesisTime)
l.Log.Info("Waiting for L2 genesis", "remainingTime", remaining.Round(time.Second))
case <-genesisTrigger:
l.Log.Info("L2 genesis time reached")
return nil
case <-l.shutdownCtx.Done():
return errors.New("batcher stopped")
}
}
}
func (l *BatchSubmitter) StopBatchSubmittingIfRunning(ctx context.Context) error {
err := l.StopBatchSubmitting(ctx)
if errors.Is(err, ErrBatcherNotRunning) {
return nil
}
return err
}
// StopBatchSubmitting stops the batch-submitter loop, and force-kills if the provided ctx is done.
func (l *BatchSubmitter) StopBatchSubmitting(ctx context.Context) error {
l.Log.Info("Stopping Batch Submitter")
l.mutex.Lock()
defer l.mutex.Unlock()
if !l.running {
return ErrBatcherNotRunning
}
l.running = false
// go routine will call cancelKill() if the passed in ctx is ever Done
cancelKill := l.cancelKillCtx
wrapped, cancel := context.WithCancel(ctx)
defer cancel()
go func() {
<-wrapped.Done()
cancelKill()
}()
l.cancelShutdownCtx()
l.wg.Wait()
l.cancelKillCtx()
l.Log.Info("Batch Submitter stopped")
return nil
}
// loadBlocksIntoState loads all blocks since the previous stored block
// It does the following:
// 1. Fetch the sync status of the sequencer
// 2. Check if the sync status is valid or if we are all the way up to date
// 3. Check if it needs to initialize state OR it is lagging (todo: lagging just means race condition?)
// 4. Load all new blocks into the local state.
// 5. Dequeue blocks from local state which are now safe.
//
// If there is a reorg, it will reset the last stored block but not clear the internal state so
// the state can be flushed to L1.
func (l *BatchSubmitter) loadBlocksIntoState(syncStatus eth.SyncStatus, ctx context.Context) error {
start, end, err := l.calculateL2BlockRangeToStore(syncStatus)
if err != nil {
l.Log.Warn("Error calculating L2 block range", "err", err)
return err
} else if start.Number >= end.Number {
return errors.New("start number is >= end number")
}
var latestBlock *types.Block
// Add all blocks to "state"
for i := start.Number + 1; i < end.Number+1; i++ {
block, err := l.loadBlockIntoState(ctx, i)
if errors.Is(err, ErrReorg) {
l.Log.Warn("Found L2 reorg", "block_number", i)
l.lastStoredBlock = eth.BlockID{}
return err
} else if err != nil {
l.Log.Warn("Failed to load block into state", "err", err)
return err
}
l.lastStoredBlock = eth.ToBlockID(block)
latestBlock = block
}
l2ref, err := derive.L2BlockToBlockRef(l.RollupConfig, latestBlock)
if err != nil {
l.Log.Warn("Invalid L2 block loaded into state", "err", err)
return err
}
l.Metr.RecordL2BlocksLoaded(l2ref)
return nil
}
// loadBlockIntoState fetches & stores a single block into `state`. It returns the block it loaded.
func (l *BatchSubmitter) loadBlockIntoState(ctx context.Context, blockNumber uint64) (*types.Block, error) {
l2Client, err := l.EndpointProvider.EthClient(ctx)
if err != nil {
return nil, fmt.Errorf("getting L2 client: %w", err)
}
cCtx, cancel := context.WithTimeout(ctx, l.Config.NetworkTimeout)
defer cancel()
block, err := l2Client.BlockByNumber(cCtx, new(big.Int).SetUint64(blockNumber))
if err != nil {
return nil, fmt.Errorf("getting L2 block: %w", err)
}
if err := l.state.AddL2Block(block); err != nil {
return nil, fmt.Errorf("adding L2 block to state: %w", err)
}
// notify the throttling loop it may be time to initiate throttling without blocking
select {
case l.l2BlockAdded <- struct{}{}:
default:
}
l.Log.Info("Added L2 block to local state", "block", eth.ToBlockID(block), "tx_count", len(block.Transactions()), "time", block.Time())
return block, nil
}
func (l *BatchSubmitter) getSyncStatus(ctx context.Context) (*eth.SyncStatus, error) {
rollupClient, err := l.EndpointProvider.RollupClient(ctx)
if err != nil {
return nil, fmt.Errorf("getting rollup client: %w", err)
}
var (
syncStatus *eth.SyncStatus
backoff = time.Second
maxBackoff = 30 * time.Second
)
timer := time.NewTimer(backoff)
defer timer.Stop()
for {
cCtx, cancel := context.WithTimeout(ctx, l.Config.NetworkTimeout)
syncStatus, err = rollupClient.SyncStatus(cCtx)
cancel()
// Ensure that we have the sync status
if err != nil {
return nil, fmt.Errorf("failed to get sync status: %w", err)
}
// If we have a head, break out of the loop
if syncStatus.HeadL1 != (eth.L1BlockRef{}) {
break
}
// Empty sync status, implement backoff
l.Log.Info("Received empty sync status, backing off", "backoff", backoff)
select {
case <-timer.C:
backoff *= 2
backoff = min(backoff, maxBackoff)
// Reset timer to tick of the new backoff time again
timer.Reset(backoff)
case <-ctx.Done():
return nil, ctx.Err()
}
}
return syncStatus, nil
}
// calculateL2BlockRangeToStore determines the range (start,end] that should be loaded into the local state.
// It also takes care of initializing some local state (i.e. will modify l.lastStoredBlock in certain conditions
// as well as garbage collecting blocks which became safe)
func (l *BatchSubmitter) calculateL2BlockRangeToStore(syncStatus eth.SyncStatus) (eth.BlockID, eth.BlockID, error) {
if syncStatus.HeadL1 == (eth.L1BlockRef{}) {
return eth.BlockID{}, eth.BlockID{}, errors.New("empty sync status")
}
// Check last stored to see if it needs to be set on startup OR set if is lagged behind.
// It lagging implies that the op-node processed some batches that were submitted prior to the current instance of the batcher being alive.
if l.lastStoredBlock == (eth.BlockID{}) {
l.Log.Info("Starting batch-submitter work at safe-head", "safe", syncStatus.SafeL2)
l.lastStoredBlock = syncStatus.SafeL2.ID()
} else if l.lastStoredBlock.Number < syncStatus.SafeL2.Number {
l.Log.Warn("Last submitted block lagged behind L2 safe head: batch submission will continue from the safe head now", "last", l.lastStoredBlock, "safe", syncStatus.SafeL2)
l.lastStoredBlock = syncStatus.SafeL2.ID()
}
// Check if we should even attempt to load any blocks. TODO: May not need this check
if syncStatus.SafeL2.Number >= syncStatus.UnsafeL2.Number {
return eth.BlockID{}, eth.BlockID{}, fmt.Errorf("L2 safe head(%d) ahead of L2 unsafe head(%d)", syncStatus.SafeL2.Number, syncStatus.UnsafeL2.Number)
}
return l.lastStoredBlock, syncStatus.UnsafeL2.ID(), nil
}
// The following things occur:
// New L2 block (reorg or not)
// L1 transaction is confirmed
//
// What the batcher does:
// Ensure that channels are created & submitted as frames for an L2 range
//
// Error conditions:
// Submitted batch, but it is not valid
// Missed L2 block somehow.
type TxPoolState int
const (
// Txpool states. Possible state transitions:
// TxpoolGood -> TxpoolBlocked:
// happens when ErrAlreadyReserved is ever returned by the TxMgr.
// TxpoolBlocked -> TxpoolCancelPending:
// happens once the send loop detects the txpool is blocked, and results in attempting to
// send a cancellation transaction.
// TxpoolCancelPending -> TxpoolGood:
// happens once the cancel transaction completes, whether successfully or in error.
TxpoolGood TxPoolState = iota
TxpoolBlocked
TxpoolCancelPending
)
// setTxPoolState locks the mutex, sets the parameters to the supplied ones, and release the mutex.
func (l *BatchSubmitter) setTxPoolState(txPoolState TxPoolState, txPoolBlockedBlob bool) {
l.txpoolMutex.Lock()
l.txpoolState = txPoolState
l.txpoolBlockedBlob = txPoolBlockedBlob
l.txpoolMutex.Unlock()
}
// mainLoop periodically:
// - polls the sequencer,
// - prunes the channel manager state (i.e. safe blocks)
// - loads unsafe blocks from the sequencer
// - drives the creation of channels and frames
// - sends transactions to the DA layer
func (l *BatchSubmitter) mainLoop(ctx context.Context, receiptsCh chan txmgr.TxReceipt[txRef], receiptsLoopCancel, throttlingLoopCancel context.CancelFunc) {
defer l.wg.Done()
defer receiptsLoopCancel()
defer throttlingLoopCancel()
queue := txmgr.NewQueue[txRef](l.killCtx, l.Txmgr, l.Config.MaxPendingTransactions)
daGroup := &errgroup.Group{}
// errgroup with limit of 0 means no goroutine is able to run concurrently,
// so we only set the limit if it is greater than 0.
if l.Config.MaxConcurrentDARequests > 0 {
daGroup.SetLimit(int(l.Config.MaxConcurrentDARequests))
}
l.txpoolMutex.Lock()
l.txpoolState = TxpoolGood
l.txpoolMutex.Unlock()
l.l2BlockAdded = make(chan struct{})
defer close(l.l2BlockAdded)
ticker := time.NewTicker(l.Config.PollInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if !l.checkTxpool(queue, receiptsCh) {
continue
}
syncStatus, err := l.getSyncStatus(l.shutdownCtx)
if err != nil {
l.Log.Warn("could not get sync status", "err", err)
continue
}
l.state.pruneSafeBlocks(syncStatus.SafeL2)
l.state.pruneChannels(syncStatus.SafeL2)
err = l.state.CheckExpectedProgress(*syncStatus)
if err != nil {
l.Log.Warn("error checking expected progress, clearing state and waiting for node sync", "err", err)
l.waitNodeSyncAndClearState()
continue
}
if err := l.loadBlocksIntoState(*syncStatus, l.shutdownCtx); errors.Is(err, ErrReorg) {
l.Log.Warn("error loading blocks, clearing state and waiting for node sync", "err", err)
l.waitNodeSyncAndClearState()
continue
}
l.publishStateToL1(queue, receiptsCh, daGroup, l.Config.PollInterval)
case <-ctx.Done():
l.Log.Warn("main loop returning")
return
}
}
}
// processReceiptsLoop handles transaction receipts from the DA layer
func (l *BatchSubmitter) processReceiptsLoop(ctx context.Context, receiptsCh chan txmgr.TxReceipt[txRef]) {
defer l.wg.Done()
l.Log.Info("Starting receipts processing loop")
for {
select {
case r := <-receiptsCh:
if errors.Is(r.Err, txpool.ErrAlreadyReserved) && l.txpoolState == TxpoolGood {
l.setTxPoolState(TxpoolBlocked, r.ID.isBlob)
l.Log.Warn("incompatible tx in txpool", "id", r.ID, "is_blob", r.ID.isBlob)
} else if r.ID.isCancel && l.txpoolState == TxpoolCancelPending {
// Set state to TxpoolGood even if the cancellation transaction ended in error
// since the stuck transaction could have cleared while we were waiting.
l.setTxPoolState(TxpoolGood, l.txpoolBlockedBlob)
l.Log.Info("txpool may no longer be blocked", "err", r.Err)
}
l.Log.Info("Handling receipt", "id", r.ID)
l.handleReceipt(r)
case <-ctx.Done():
l.Log.Info("Receipt processing loop done")
return
}
}
}
// throttlingLoop monitors the backlog in bytes we need to make available, and appropriately enables or disables
// throttling of incoming data prevent the backlog from growing too large. By looping & calling the miner API setter
// continuously, we ensure the engine currently in use is always going to be reset to the proper throttling settings
// even in the event of sequencer failover.
func (l *BatchSubmitter) throttlingLoop(ctx context.Context) {
defer l.wg.Done()
l.Log.Info("Starting DA throttling loop")
ticker := time.NewTicker(l.Config.ThrottleInterval)
defer ticker.Stop()
updateParams := func() {
ctx, cancel := context.WithTimeout(l.shutdownCtx, l.Config.NetworkTimeout)
defer cancel()
cl, err := l.EndpointProvider.EthClient(ctx)
if err != nil {
l.Log.Error("Can't reach sequencer execution RPC", "err", err)
return
}
pendingBytes := l.state.PendingDABytes()
maxTxSize := uint64(0)
maxBlockSize := l.Config.ThrottleAlwaysBlockSize
if pendingBytes > int64(l.Config.ThrottleThreshold) {
l.Log.Warn("Pending bytes over limit, throttling DA", "bytes", pendingBytes, "limit", l.Config.ThrottleThreshold)
maxTxSize = l.Config.ThrottleTxSize
if maxBlockSize == 0 || (l.Config.ThrottleBlockSize != 0 && l.Config.ThrottleBlockSize < maxBlockSize) {
maxBlockSize = l.Config.ThrottleBlockSize
}
}
var (
success bool
rpcErr rpc.Error
)
if err := cl.Client().CallContext(
ctx, &success, SetMaxDASizeMethod, hexutil.Uint64(maxTxSize), hexutil.Uint64(maxBlockSize),
); errors.As(err, &rpcErr) && eth.ErrorCode(rpcErr.ErrorCode()).IsGenericRPCError() {
l.Log.Error("SetMaxDASize rpc unavailable or broken, shutting down. Either enable it or disable throttling.", "err", err)
// We'd probably hit this error right after startup, so a short shutdown duration should suffice.
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// Always returns nil. An error is only returned to expose this function as an RPC.
_ = l.StopBatchSubmitting(ctx)
return
} else if err != nil {
l.Log.Error("SetMaxDASize rpc failed, retrying.", "err", err)
return
}
if !success {
l.Log.Error("Result of SetMaxDASize was false, retrying.")
}
}
for {
select {
case <-l.l2BlockAdded:
updateParams()
case <-ticker.C:
updateParams()
case <-ctx.Done():
l.Log.Info("DA throttling loop done")
return
}
}
}
func (l *BatchSubmitter) waitNodeSyncAndClearState() {
// Wait for any in flight transactions
// to be ingested by the node before
// we start loading blocks again.
err := l.waitNodeSync()
if err != nil {
l.Log.Warn("error waiting for node sync", "err", err)
}
l.clearState(l.shutdownCtx)
}
// waitNodeSync Check to see if there was a batcher tx sent recently that
// still needs more block confirmations before being considered finalized
func (l *BatchSubmitter) waitNodeSync() error {
ctx := l.shutdownCtx
rollupClient, err := l.EndpointProvider.RollupClient(ctx)
if err != nil {
return fmt.Errorf("failed to get rollup client: %w", err)
}
cCtx, cancel := context.WithTimeout(ctx, l.Config.NetworkTimeout)
defer cancel()
l1Tip, err := l.l1Tip(cCtx)
if err != nil {
return fmt.Errorf("failed to retrieve l1 tip: %w", err)
}
l1TargetBlock := l1Tip.Number
if l.Config.CheckRecentTxsDepth != 0 {
l.Log.Info("Checking for recently submitted batcher transactions on L1")
recentBlock, found, err := eth.CheckRecentTxs(cCtx, l.L1Client, l.Config.CheckRecentTxsDepth, l.Txmgr.From())
if err != nil {
return fmt.Errorf("failed checking recent batcher txs: %w", err)
}
l.Log.Info("Checked for recently submitted batcher transactions on L1",
"l1_head", l1Tip, "l1_recent", recentBlock, "found", found)
l1TargetBlock = recentBlock
}
return dial.WaitRollupSync(l.shutdownCtx, l.Log, rollupClient, l1TargetBlock, time.Second*12)
}
// publishStateToL1 queues up all pending TxData to be published to the L1, returning when there is no more data to
// queue for publishing or if there was an error queing the data. maxDuration tells this function to return from state
// publishing after this amount of time has been exceeded even if there is more data remaining.
func (l *BatchSubmitter) publishStateToL1(queue *txmgr.Queue[txRef], receiptsCh chan txmgr.TxReceipt[txRef], daGroup *errgroup.Group, maxDuration time.Duration) {
start := time.Now()
for {
// if the txmgr is closed, we stop the transaction sending
if l.Txmgr.IsClosed() {
l.Log.Info("Txmgr is closed, aborting state publishing")
return
}
if !l.checkTxpool(queue, receiptsCh) {
l.Log.Info("txpool state is not good, aborting state publishing")
return
}
err := l.publishTxToL1(l.killCtx, queue, receiptsCh, daGroup)
if err != nil {
if err != io.EOF {
l.Log.Error("Error publishing tx to l1", "err", err)
}
return
}
if time.Since(start) > maxDuration {
l.Log.Warn("Aborting state publishing, max duration exceeded")
return
}
}
}
// clearState clears the state of the channel manager
func (l *BatchSubmitter) clearState(ctx context.Context) {
l.Log.Info("Clearing state")
defer l.Log.Info("State cleared")
clearStateWithL1Origin := func() bool {
l1SafeOrigin, err := l.safeL1Origin(ctx)
if err != nil {
l.Log.Warn("Failed to query L1 safe origin, will retry", "err", err)
return false
} else {
l.Log.Info("Clearing state with safe L1 origin", "origin", l1SafeOrigin)
l.state.Clear(l1SafeOrigin)
return true
}
}
// Attempt to set the L1 safe origin and clear the state, if fetching fails -- fall through to an infinite retry
if clearStateWithL1Origin() {
return
}
tick := time.NewTicker(5 * time.Second)
defer tick.Stop()
for {
select {
case <-tick.C:
if clearStateWithL1Origin() {
return
}
case <-ctx.Done():
l.Log.Warn("Clearing state cancelled")
l.state.Clear(eth.BlockID{})
return
}
}
}
// publishTxToL1 submits a single state tx to the L1
func (l *BatchSubmitter) publishTxToL1(ctx context.Context, queue *txmgr.Queue[txRef], receiptsCh chan txmgr.TxReceipt[txRef], daGroup *errgroup.Group) error {
// send all available transactions
l1tip, err := l.l1Tip(ctx)
if err != nil {
l.Log.Error("Failed to query L1 tip", "err", err)
return err
}
l.recordL1Tip(l1tip)
// Collect next transaction data. This pulls data out of the channel, so we need to make sure
// to put it back if ever da or txmgr requests fail, by calling l.recordFailedDARequest/recordFailedTx.
txdata, err := l.state.TxData(l1tip.ID())
if err == io.EOF {
l.Log.Trace("No transaction data available")
return err
} else if err != nil {
l.Log.Error("Unable to get tx data", "err", err)
return err
}
if err = l.sendTransaction(txdata, queue, receiptsCh, daGroup); err != nil {
return fmt.Errorf("BatchSubmitter.sendTransaction failed: %w", err)
}
return nil
}
func (l *BatchSubmitter) safeL1Origin(ctx context.Context) (eth.BlockID, error) {
c, err := l.EndpointProvider.RollupClient(ctx)
if err != nil {
log.Error("Failed to get rollup client", "err", err)
return eth.BlockID{}, fmt.Errorf("safe l1 origin: error getting rollup client: %w", err)
}
cCtx, cancel := context.WithTimeout(ctx, l.Config.NetworkTimeout)
defer cancel()
status, err := c.SyncStatus(cCtx)
if err != nil {
log.Error("Failed to get sync status", "err", err)
return eth.BlockID{}, fmt.Errorf("safe l1 origin: error getting sync status: %w", err)
}
// If the safe L2 block origin is 0, we are at the genesis block and should use the L1 origin from the rollup config.
if status.SafeL2.L1Origin.Number == 0 {
return l.RollupConfig.Genesis.L1, nil
}
return status.SafeL2.L1Origin, nil
}
// cancelBlockingTx creates an empty transaction of appropriate type to cancel out the incompatible
// transaction stuck in the txpool. In the future we might send an actual batch transaction instead
// of an empty one to avoid wasting the tx fee.
func (l *BatchSubmitter) cancelBlockingTx(queue *txmgr.Queue[txRef], receiptsCh chan txmgr.TxReceipt[txRef], isBlockedBlob bool) {
var candidate *txmgr.TxCandidate
var err error
if isBlockedBlob {
candidate = l.calldataTxCandidate([]byte{})
} else if candidate, err = l.blobTxCandidate(emptyTxData); err != nil {
panic(err) // this error should not happen
}
l.Log.Warn("sending a cancellation transaction to unblock txpool", "blocked_blob", isBlockedBlob)
l.sendTx(txData{}, true, candidate, queue, receiptsCh)
}
// publishToAltDAAndL1 posts the txdata to the DA Provider and then sends the commitment to L1.
func (l *BatchSubmitter) publishToAltDAAndL1(txdata txData, queue *txmgr.Queue[txRef], receiptsCh chan txmgr.TxReceipt[txRef], daGroup *errgroup.Group) {
// sanity checks
if nf := len(txdata.frames); nf != 1 {
l.Log.Crit("Unexpected number of frames in calldata tx", "num_frames", nf)
}
if txdata.asBlob {
l.Log.Crit("Unexpected blob txdata with AltDA enabled")
}
// when posting txdata to an external DA Provider, we use a goroutine to avoid blocking the main loop
// since it may take a while for the request to return.
goroutineSpawned := daGroup.TryGo(func() error {
// TODO: probably shouldn't be using the global shutdownCtx here, see https://go.dev/blog/context-and-structs
// but sendTransaction receives l.killCtx as an argument, which currently is only canceled after waiting for the main loop
// to exit, which would wait on this DA call to finish, which would take a long time.
// So we prefer to mimic the behavior of txmgr and cancel all pending DA/txmgr requests when the batcher is stopped.
comm, err := l.AltDA.SetInput(l.shutdownCtx, txdata.CallData())
if err != nil {
l.Log.Error("Failed to post input to Alt DA", "error", err)
// requeue frame if we fail to post to the DA Provider so it can be retried
// note: this assumes that the da server caches requests, otherwise it might lead to resubmissions of the blobs
l.recordFailedDARequest(txdata.ID(), err)
return nil
}
l.Log.Info("Set altda input", "commitment", comm, "tx", txdata.ID())
candidate := l.calldataTxCandidate(comm.TxData())
l.sendTx(txdata, false, candidate, queue, receiptsCh)
return nil
})
if !goroutineSpawned {
// We couldn't start the goroutine because the errgroup.Group limit
// is already reached. Since we can't send the txdata, we have to
// return it for later processing. We use nil error to skip error logging.
l.recordFailedDARequest(txdata.ID(), nil)
}
}
// sendTransaction creates & queues for sending a transaction to the batch inbox address with the given `txData`.
// This call will block if the txmgr queue is at the max-pending limit.
// The method will block if the queue's MaxPendingTransactions is exceeded.
func (l *BatchSubmitter) sendTransaction(txdata txData, queue *txmgr.Queue[txRef], receiptsCh chan txmgr.TxReceipt[txRef], daGroup *errgroup.Group) error {
var err error
// if Alt DA is enabled we post the txdata to the DA Provider and replace it with the commitment.
if l.Config.UseAltDA {
l.publishToAltDAAndL1(txdata, queue, receiptsCh, daGroup)
// we return nil to allow publishStateToL1 to keep processing the next txdata
return nil
}
var candidate *txmgr.TxCandidate
if txdata.asBlob {
if candidate, err = l.blobTxCandidate(txdata); err != nil {
// We could potentially fall through and try a calldata tx instead, but this would
// likely result in the chain spending more in gas fees than it is tuned for, so best
// to just fail. We do not expect this error to trigger unless there is a serious bug
// or configuration issue.
return fmt.Errorf("could not create blob tx candidate: %w", err)
}
} else {
// sanity check
if nf := len(txdata.frames); nf != 1 {
l.Log.Crit("Unexpected number of frames in calldata tx", "num_frames", nf)
}
candidate = l.calldataTxCandidate(txdata.CallData())
}
l.sendTx(txdata, false, candidate, queue, receiptsCh)
return nil
}
// sendTx uses the txmgr queue to send the given transaction candidate after setting its
// gaslimit. It will block if the txmgr queue has reached its MaxPendingTransactions limit.
func (l *BatchSubmitter) sendTx(txdata txData, isCancel bool, candidate *txmgr.TxCandidate, queue *txmgr.Queue[txRef], receiptsCh chan txmgr.TxReceipt[txRef]) {
intrinsicGas, err := core.IntrinsicGas(candidate.TxData, nil, false, true, true, false)
if err != nil {
// we log instead of return an error here because txmgr can do its own gas estimation
l.Log.Error("Failed to calculate intrinsic gas", "err", err)
} else {
candidate.GasLimit = intrinsicGas
}
queue.Send(txRef{id: txdata.ID(), isCancel: isCancel, isBlob: txdata.asBlob}, *candidate, receiptsCh)
}
func (l *BatchSubmitter) blobTxCandidate(data txData) (*txmgr.TxCandidate, error) {
blobs, err := data.Blobs()
if err != nil {
return nil, fmt.Errorf("generating blobs for tx data: %w", err)
}
size := data.Len()
lastSize := len(data.frames[len(data.frames)-1].data)
l.Log.Info("Building Blob transaction candidate",
"size", size, "last_size", lastSize, "num_blobs", len(blobs))
l.Metr.RecordBlobUsedBytes(lastSize)
return &txmgr.TxCandidate{
To: &l.RollupConfig.BatchInboxAddress,
Blobs: blobs,
}, nil
}
func (l *BatchSubmitter) calldataTxCandidate(data []byte) *txmgr.TxCandidate {
l.Log.Info("Building Calldata transaction candidate", "size", len(data))
return &txmgr.TxCandidate{
To: &l.RollupConfig.BatchInboxAddress,
TxData: data,
}
}
func (l *BatchSubmitter) handleReceipt(r txmgr.TxReceipt[txRef]) {
// Record TX Status
if r.Err != nil {
l.recordFailedTx(r.ID.id, r.Err)
} else {
l.recordConfirmedTx(r.ID.id, r.Receipt)
}
}
func (l *BatchSubmitter) recordL1Tip(l1tip eth.L1BlockRef) {
if l.lastL1Tip == l1tip {
return
}
l.lastL1Tip = l1tip
l.Metr.RecordLatestL1Block(l1tip)
}
func (l *BatchSubmitter) recordFailedDARequest(id txID, err error) {
if err != nil {
l.Log.Warn("DA request failed", logFields(id, err)...)
}
l.state.TxFailed(id)
}
func (l *BatchSubmitter) recordFailedTx(id txID, err error) {
l.Log.Warn("Transaction failed to send", logFields(id, err)...)
l.state.TxFailed(id)
}
func (l *BatchSubmitter) recordConfirmedTx(id txID, receipt *types.Receipt) {
l.Log.Info("Transaction confirmed", logFields(id, receipt)...)
l1block := eth.ReceiptBlockID(receipt)
l.state.TxConfirmed(id, l1block)
}
// l1Tip gets the current L1 tip as a L1BlockRef. The passed context is assumed
// to be a lifetime context, so it is internally wrapped with a network timeout.
func (l *BatchSubmitter) l1Tip(ctx context.Context) (eth.L1BlockRef, error) {
tctx, cancel := context.WithTimeout(ctx, l.Config.NetworkTimeout)
defer cancel()
head, err := l.L1Client.HeaderByNumber(tctx, nil)
if err != nil {
return eth.L1BlockRef{}, fmt.Errorf("getting latest L1 block: %w", err)
}
return eth.InfoToL1BlockRef(eth.HeaderBlockInfo(head)), nil
}
func (l *BatchSubmitter) checkTxpool(queue *txmgr.Queue[txRef], receiptsCh chan txmgr.TxReceipt[txRef]) bool {
l.txpoolMutex.Lock()
if l.txpoolState == TxpoolBlocked {
// txpoolState is set to Blocked only if Send() is returning
// ErrAlreadyReserved. In this case, the TxMgr nonce should be reset to nil,
// allowing us to send a cancellation transaction.
l.txpoolState = TxpoolCancelPending
isBlob := l.txpoolBlockedBlob
l.txpoolMutex.Unlock()
l.cancelBlockingTx(queue, receiptsCh, isBlob)
return false
}
r := l.txpoolState == TxpoolGood
l.txpoolMutex.Unlock()
return r
}
func logFields(xs ...any) (fs []any) {
for _, x := range xs {
switch v := x.(type) {
case txID:
fs = append(fs, "tx_id", v.String())
case *types.Receipt:
fs = append(fs, "tx", v.TxHash, "block", eth.ReceiptBlockID(v))
case error:
fs = append(fs, "err", v)
default:
fs = append(fs, "ERROR", fmt.Sprintf("logFields: unknown type: %T", x))
}
}
return fs
}