From 4637ae47ab995fa108f4375e44a6f7aca62254a3 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Thu, 5 Sep 2024 17:22:39 +0800 Subject: [PATCH 01/47] avoid goroutine for when only have 1 cop task Signed-off-by: crazycs520 --- pkg/executor/test/executor/executor_test.go | 9 +++ pkg/store/copr/coprocessor.go | 68 +++++++++++++++++++-- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/pkg/executor/test/executor/executor_test.go b/pkg/executor/test/executor/executor_test.go index 7fd91f5840812..0a1715c504eac 100644 --- a/pkg/executor/test/executor/executor_test.go +++ b/pkg/executor/test/executor/executor_test.go @@ -3080,3 +3080,12 @@ func TestQueryWithKill(t *testing.T) { }() wg.Wait() } + +func TestIndexReaderDebug(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t(a int key, b int, c int, index idx(b));") + tk.MustExec("insert into t values (1,1,1), (2,2,2), (3,3,3)") + tk.MustQuery("select a from t use index (idx) where b > 1 and b < 10").Check(testkit.Rows("2", "3")) +} diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 16a75bf3f82a4..67f22566800aa 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -656,6 +656,7 @@ type copIterator struct { req *kv.Request concurrency int smallTaskConcurrency int + noConcurrent bool finishCh chan struct{} // If keepOrder, results are stored in copTask.respChan, read them out one by one. @@ -831,6 +832,10 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context, enabledRateLimitAction, enableCollectExecutionInfo bool) { + if (it.concurrency + it.smallTaskConcurrency) <= 1 { + it.noConcurrent = true + return + } taskCh := make(chan *copTask, 1) smallTaskCh := make(chan *copTask, 1) it.unconsumedStats = &unconsumedCopRuntimeStats{} @@ -1050,7 +1055,13 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { }) // If data order matters, response should be returned in the same order as copTask slice. // Otherwise all responses are returned from a single channel. - if it.respChan != nil { + + if it.noConcurrent { + resp = it.sendReq(ctx) + if resp == nil { + return nil, nil + } + } else if it.respChan != nil { // Get next fetched resp from chan resp, ok, closed = it.recvFromRespCh(ctx, it.respChan) if !ok || closed { @@ -1099,6 +1110,55 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { return resp, nil } +func (it *copIterator) sendReq(ctx context.Context) *copResponse { + if len(it.tasks) == 0 { + return nil + } + worker := &copIteratorWorker{ + //taskCh: make(<-chan *copTask, 1), + wg: &it.wg, + store: it.store, + req: it.req, + //respChan: it.respChan, + finishCh: it.finishCh, + vars: it.vars, + kvclient: txnsnapshot.NewClientHelper(it.store.store, &it.resolvedLocks, &it.committedLocks, false), + memTracker: it.memTracker, + replicaReadSeed: it.replicaReadSeed, + //enableCollectExecutionInfo: enableCollectExecutionInfo, + enableCollectExecutionInfo: true, + pagingTaskIdx: &it.pagingTaskIdx, + storeBatchedNum: &it.storeBatchedNum, + storeBatchedFallbackNum: &it.storeBatchedFallbackNum, + //unconsumedStats: it.unconsumedStats, + } + + backoffermap := make(map[uint64]*Backoffer) + for len(it.tasks) > 0 { + curTask := it.tasks[0] + respCh := make(chan *copResponse, 2) + curTask.respChan = respCh + bo := chooseBackoffer(ctx, backoffermap, curTask, worker) + tasks, err := worker.handleTaskOnce(bo, curTask, respCh) + if err != nil { + resp := &copResponse{err: errors.Trace(err)} + return resp + } + if len(tasks) > 0 { + it.tasks = append(tasks, it.tasks[1:]...) + } else { + it.tasks = it.tasks[1:] + } + select { + case resp := <-respCh: + return resp + default: + continue + } + } + return nil +} + // HasUnconsumedCopRuntimeStats indicate whether has unconsumed CopRuntimeStats. type HasUnconsumedCopRuntimeStats interface { // CollectUnconsumedCopRuntimeStats returns unconsumed CopRuntimeStats. @@ -1311,7 +1371,7 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask, ch remains, err = worker.handleCopPagingResult(bo, rpcCtx, &copResponse{pbResp: copResp}, cacheKey, cacheValue, task, ch, costTime) } else { // Handles the response for non-paging copTask. - remains, err = worker.handleCopResponse(bo, rpcCtx, &copResponse{pbResp: copResp}, cacheKey, cacheValue, task, ch, nil, costTime) + remains, err = worker.handleCopResponse(bo, rpcCtx, &copResponse{pbResp: copResp}, cacheKey, cacheValue, task, ch, costTime) } if req.ReadType != "" { for _, remain := range remains { @@ -1381,7 +1441,7 @@ func appendScanDetail(logStr string, columnFamily string, scanInfo *kvrpcpb.Scan } func (worker *copIteratorWorker) handleCopPagingResult(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, ch chan<- *copResponse, costTime time.Duration) ([]*copTask, error) { - remainedTasks, err := worker.handleCopResponse(bo, rpcCtx, resp, cacheKey, cacheValue, task, ch, nil, costTime) + remainedTasks, err := worker.handleCopResponse(bo, rpcCtx, resp, cacheKey, cacheValue, task, ch, costTime) if err != nil || len(remainedTasks) != 0 { // If there is region error or lock error, keep the paging size and retry. for _, remainedTask := range remainedTasks { @@ -1411,7 +1471,7 @@ func (worker *copIteratorWorker) handleCopPagingResult(bo *Backoffer, rpcCtx *ti // returns more tasks when that happens, or handles the response if no error. // if we're handling coprocessor paging response, lastRange is the range of last // successful response, otherwise it's nil. -func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, ch chan<- *copResponse, lastRange *coprocessor.KeyRange, costTime time.Duration) ([]*copTask, error) { +func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, ch chan<- *copResponse, costTime time.Duration) ([]*copTask, error) { if ver := resp.pbResp.GetLatestBucketsVersion(); task.bucketsVer < ver { worker.store.GetRegionCache().UpdateBucketsIfNeeded(task.region, ver) } From 9708f3960511c711ba108778c824205206643602 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 20 Nov 2024 01:21:48 +0800 Subject: [PATCH 02/47] tiny refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 78 +++++++++++++++++------------------ 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 67f22566800aa..f72f4ebf43a59 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -656,7 +656,7 @@ type copIterator struct { req *kv.Request concurrency int smallTaskConcurrency int - noConcurrent bool + liteReqSender bool finishCh chan struct{} // If keepOrder, results are stored in copTask.respChan, read them out one by one. @@ -833,7 +833,7 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context, enabledRateLimitAction, enableCollectExecutionInfo bool) { if (it.concurrency + it.smallTaskConcurrency) <= 1 { - it.noConcurrent = true + it.liteReqSender = true return } taskCh := make(chan *copTask, 1) @@ -848,23 +848,7 @@ func (it *copIterator) open(ctx context.Context, enabledRateLimitAction, enableC } else { ch = smallTaskCh } - worker := &copIteratorWorker{ - taskCh: ch, - wg: &it.wg, - store: it.store, - req: it.req, - respChan: it.respChan, - finishCh: it.finishCh, - vars: it.vars, - kvclient: txnsnapshot.NewClientHelper(it.store.store, &it.resolvedLocks, &it.committedLocks, false), - memTracker: it.memTracker, - replicaReadSeed: it.replicaReadSeed, - enableCollectExecutionInfo: enableCollectExecutionInfo, - pagingTaskIdx: &it.pagingTaskIdx, - storeBatchedNum: &it.storeBatchedNum, - storeBatchedFallbackNum: &it.storeBatchedFallbackNum, - unconsumedStats: it.unconsumedStats, - } + worker := newCopIteratorWorker(it, ch, enableCollectExecutionInfo) go worker.run(ctx) } taskSender := &copIteratorTaskSender{ @@ -886,6 +870,26 @@ func (it *copIterator) open(ctx context.Context, enabledRateLimitAction, enableC go taskSender.run(it.req.ConnID) } +func newCopIteratorWorker(it *copIterator, taskCh <-chan *copTask, enableCollectExecutionInfo bool) *copIteratorWorker { + return &copIteratorWorker{ + taskCh: taskCh, + wg: &it.wg, + store: it.store, + req: it.req, + respChan: it.respChan, + finishCh: it.finishCh, + vars: it.vars, + kvclient: txnsnapshot.NewClientHelper(it.store.store, &it.resolvedLocks, &it.committedLocks, false), + memTracker: it.memTracker, + replicaReadSeed: it.replicaReadSeed, + enableCollectExecutionInfo: enableCollectExecutionInfo, + pagingTaskIdx: &it.pagingTaskIdx, + storeBatchedNum: &it.storeBatchedNum, + storeBatchedFallbackNum: &it.storeBatchedFallbackNum, + unconsumedStats: it.unconsumedStats, + } +} + func (sender *copIteratorTaskSender) run(connID uint64) { // Send tasks to feed the worker goroutines. for _, t := range sender.tasks { @@ -1056,7 +1060,7 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { // If data order matters, response should be returned in the same order as copTask slice. // Otherwise all responses are returned from a single channel. - if it.noConcurrent { + if it.liteReqSender { resp = it.sendReq(ctx) if resp == nil { return nil, nil @@ -1110,28 +1114,20 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { return resp, nil } -func (it *copIterator) sendReq(ctx context.Context) *copResponse { +func (it *copIterator) sendReq(ctx context.Context) (resp *copResponse) { if len(it.tasks) == 0 { return nil } - worker := &copIteratorWorker{ - //taskCh: make(<-chan *copTask, 1), - wg: &it.wg, - store: it.store, - req: it.req, - //respChan: it.respChan, - finishCh: it.finishCh, - vars: it.vars, - kvclient: txnsnapshot.NewClientHelper(it.store.store, &it.resolvedLocks, &it.committedLocks, false), - memTracker: it.memTracker, - replicaReadSeed: it.replicaReadSeed, - //enableCollectExecutionInfo: enableCollectExecutionInfo, - enableCollectExecutionInfo: true, - pagingTaskIdx: &it.pagingTaskIdx, - storeBatchedNum: &it.storeBatchedNum, - storeBatchedFallbackNum: &it.storeBatchedFallbackNum, - //unconsumedStats: it.unconsumedStats, - } + worker := newCopIteratorWorker(it, nil, true) + defer func() { + r := recover() + if r != nil { + logutil.BgLogger().Error("copIteratorWork meet panic", + zap.Any("r", r), + zap.Stack("stack trace")) + resp = &copResponse{err: util2.GetRecoverError(r)} + } + }() backoffermap := make(map[uint64]*Backoffer) for len(it.tasks) > 0 { @@ -1141,7 +1137,7 @@ func (it *copIterator) sendReq(ctx context.Context) *copResponse { bo := chooseBackoffer(ctx, backoffermap, curTask, worker) tasks, err := worker.handleTaskOnce(bo, curTask, respCh) if err != nil { - resp := &copResponse{err: errors.Trace(err)} + resp = &copResponse{err: errors.Trace(err)} return resp } if len(tasks) > 0 { @@ -1150,7 +1146,7 @@ func (it *copIterator) sendReq(ctx context.Context) *copResponse { it.tasks = it.tasks[1:] } select { - case resp := <-respCh: + case resp = <-respCh: return resp default: continue From 208d2f610b1c65ff32a48c73bc2573d894b0385b Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 20 Nov 2024 01:34:29 +0800 Subject: [PATCH 03/47] remove debug log Signed-off-by: crazycs520 --- pkg/executor/test/executor/executor_test.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pkg/executor/test/executor/executor_test.go b/pkg/executor/test/executor/executor_test.go index a14f4e86ad6e6..b9efca75dd47d 100644 --- a/pkg/executor/test/executor/executor_test.go +++ b/pkg/executor/test/executor/executor_test.go @@ -3082,12 +3082,3 @@ func TestQueryWithKill(t *testing.T) { }() wg.Wait() } - -func TestIndexReaderDebug(t *testing.T) { - store := testkit.CreateMockStore(t) - tk := testkit.NewTestKit(t, store) - tk.MustExec("use test") - tk.MustExec("create table t(a int key, b int, c int, index idx(b));") - tk.MustExec("insert into t values (1,1,1), (2,2,2), (3,3,3)") - tk.MustQuery("select a from t use index (idx) where b > 1 and b < 10").Check(testkit.Rows("2", "3")) -} From e2b5988ad69e88a3735fed838a3426517d29af95 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 20 Nov 2024 10:59:38 +0800 Subject: [PATCH 04/47] fix bug Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index b2a8f65aa9bd3..56a6f18e78d49 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -195,6 +195,7 @@ func (c *CopClient) BuildCopIterator(ctx context.Context, req *kv.Request, vars rpcCancel: tikv.NewRPCanceller(), buildTaskElapsed: *buildOpt.elapsed, runawayChecker: req.RunawayChecker, + unconsumedStats: &unconsumedCopRuntimeStats{}, } // Pipelined-dml can flush locks when it is still reading. // The coprocessor of the txn should not be blocked by itself. @@ -850,7 +851,6 @@ func (it *copIterator) open(ctx context.Context, enabledRateLimitAction, enableC return } taskCh := make(chan *copTask, 1) - it.unconsumedStats = &unconsumedCopRuntimeStats{} it.wg.Add(it.concurrency + it.smallTaskConcurrency) var smallTaskCh chan *copTask if it.smallTaskConcurrency > 0 { From 3ab1142a0c70211291bcb993683f5bb39fb22cca Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 20 Nov 2024 14:24:48 +0800 Subject: [PATCH 05/47] fix test Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 56a6f18e78d49..4215ea765c38f 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -846,6 +846,7 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context, enabledRateLimitAction, enableCollectExecutionInfo bool) { + it.actionOnExceed.setEnabled(enabledRateLimitAction) if (it.concurrency + it.smallTaskConcurrency) <= 1 { it.liteReqSender = true return @@ -874,7 +875,6 @@ func (it *copIterator) open(ctx context.Context, enabledRateLimitAction, enableC sendRate: it.sendRate, } taskSender.respChan = it.respChan - it.actionOnExceed.setEnabled(enabledRateLimitAction) failpoint.Inject("ticase-4171", func(val failpoint.Value) { if val.(bool) { it.memTracker.Consume(10 * MockResponseSizeForTest) From ffdb1159377bd18aaf0294636eb622d64099bee2 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 20 Nov 2024 14:55:28 +0800 Subject: [PATCH 06/47] init Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index ca9866073a371..22a68fff97e90 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -95,8 +95,6 @@ func (c *CopClient) Send(ctx context.Context, req *kv.Request, variables any, op ctx = context.WithValue(ctx, tikv.TxnStartKey(), req.StartTs) ctx = context.WithValue(ctx, util.RequestSourceKey, req.RequestSource) ctx = interceptor.WithRPCInterceptor(ctx, interceptor.GetRPCInterceptorFromCtx(ctx)) - enabledRateLimitAction := option.EnabledRateLimitAction - sessionMemTracker := option.SessionMemTracker it, errRes := c.BuildCopIterator(ctx, req, vars, option) if errRes != nil { return errRes @@ -105,10 +103,7 @@ func (c *CopClient) Send(ctx context.Context, req *kv.Request, variables any, op if ctx.Value(util.RUDetailsCtxKey) == nil { ctx = context.WithValue(ctx, util.RUDetailsCtxKey, util.NewRUDetails()) } - if sessionMemTracker != nil && enabledRateLimitAction { - sessionMemTracker.FallbackOldAndSetNewAction(it.actionOnExceed) - } - it.open(ctx, enabledRateLimitAction, option.EnableCollectExecutionInfo) + it.open(ctx, option.EnableCollectExecutionInfo) return it } @@ -246,6 +241,10 @@ func (c *CopClient) BuildCopIterator(ctx context.Context, req *kv.Request, vars it.sendRate = util.NewRateLimit(it.concurrency + it.smallTaskConcurrency) } it.actionOnExceed = newRateLimitAction(uint(it.sendRate.GetCapacity())) + if option.SessionMemTracker != nil && option.EnabledRateLimitAction { + option.SessionMemTracker.FallbackOldAndSetNewAction(it.actionOnExceed) + } + it.actionOnExceed.setEnabled(option.EnabledRateLimitAction) return it, nil } @@ -843,7 +842,7 @@ func (worker *copIteratorWorker) run(ctx context.Context) { } // open starts workers and sender goroutines. -func (it *copIterator) open(ctx context.Context, enabledRateLimitAction, enableCollectExecutionInfo bool) { +func (it *copIterator) open(ctx context.Context, enableCollectExecutionInfo bool) { taskCh := make(chan *copTask, 1) it.unconsumedStats = &unconsumedCopRuntimeStats{} it.wg.Add(it.concurrency + it.smallTaskConcurrency) @@ -885,7 +884,6 @@ func (it *copIterator) open(ctx context.Context, enabledRateLimitAction, enableC sendRate: it.sendRate, } taskSender.respChan = it.respChan - it.actionOnExceed.setEnabled(enabledRateLimitAction) failpoint.Inject("ticase-4171", func(val failpoint.Value) { if val.(bool) { it.memTracker.Consume(10 * MockResponseSizeForTest) From 90b1266340723b37766a789084f9b35def810506 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 20 Nov 2024 14:59:23 +0800 Subject: [PATCH 07/47] refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 22a68fff97e90..6dbb281d1f911 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -190,6 +190,7 @@ func (c *CopClient) BuildCopIterator(ctx context.Context, req *kv.Request, vars rpcCancel: tikv.NewRPCanceller(), buildTaskElapsed: *buildOpt.elapsed, runawayChecker: req.RunawayChecker, + unconsumedStats: &unconsumedCopRuntimeStats{}, } // Pipelined-dml can flush locks when it is still reading. // The coprocessor of the txn should not be blocked by itself. @@ -844,7 +845,6 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context, enableCollectExecutionInfo bool) { taskCh := make(chan *copTask, 1) - it.unconsumedStats = &unconsumedCopRuntimeStats{} it.wg.Add(it.concurrency + it.smallTaskConcurrency) var smallTaskCh chan *copTask if it.smallTaskConcurrency > 0 { From 7493230601ad8addf6f5888246f428bfd81136d5 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 20 Nov 2024 16:12:22 +0800 Subject: [PATCH 08/47] refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 72 +++++++++++++++++------------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 6dbb281d1f911..11cfadeac5ca5 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -190,7 +190,6 @@ func (c *CopClient) BuildCopIterator(ctx context.Context, req *kv.Request, vars rpcCancel: tikv.NewRPCanceller(), buildTaskElapsed: *buildOpt.elapsed, runawayChecker: req.RunawayChecker, - unconsumedStats: &unconsumedCopRuntimeStats{}, } // Pipelined-dml can flush locks when it is still reading. // The coprocessor of the txn should not be blocked by itself. @@ -241,6 +240,9 @@ func (c *CopClient) BuildCopIterator(ctx context.Context, req *kv.Request, vars it.respChan = make(chan *copResponse) it.sendRate = util.NewRateLimit(it.concurrency + it.smallTaskConcurrency) } + if option.EnableCollectExecutionInfo { + it.stats = &copIteratorRuntimeStats{} + } it.actionOnExceed = newRateLimitAction(uint(it.sendRate.GetCapacity())) if option.SessionMemTracker != nil && option.EnabledRateLimitAction { option.SessionMemTracker.FallbackOldAndSetNewAction(it.actionOnExceed) @@ -706,8 +708,8 @@ type copIterator struct { storeBatchedNum atomic.Uint64 storeBatchedFallbackNum atomic.Uint64 - runawayChecker resourcegroup.RunawayChecker - unconsumedStats *unconsumedCopRuntimeStats + runawayChecker resourcegroup.RunawayChecker + stats *copIteratorRuntimeStats } // copIteratorWorker receives tasks from copIteratorTaskSender, handles tasks and sends the copResponse to respChan. @@ -725,12 +727,11 @@ type copIteratorWorker struct { replicaReadSeed uint32 - enableCollectExecutionInfo bool - pagingTaskIdx *uint32 + pagingTaskIdx *uint32 storeBatchedNum *atomic.Uint64 storeBatchedFallbackNum *atomic.Uint64 - unconsumedStats *unconsumedCopRuntimeStats + stats *copIteratorRuntimeStats } // copIteratorTaskSender sends tasks to taskCh then wait for the workers to exit. @@ -857,21 +858,20 @@ func (it *copIterator) open(ctx context.Context, enableCollectExecutionInfo bool ch = smallTaskCh } worker := &copIteratorWorker{ - taskCh: ch, - wg: &it.wg, - store: it.store, - req: it.req, - respChan: it.respChan, - finishCh: it.finishCh, - vars: it.vars, - kvclient: txnsnapshot.NewClientHelper(it.store.store, &it.resolvedLocks, &it.committedLocks, false), - memTracker: it.memTracker, - replicaReadSeed: it.replicaReadSeed, - enableCollectExecutionInfo: enableCollectExecutionInfo, - pagingTaskIdx: &it.pagingTaskIdx, - storeBatchedNum: &it.storeBatchedNum, - storeBatchedFallbackNum: &it.storeBatchedFallbackNum, - unconsumedStats: it.unconsumedStats, + taskCh: ch, + wg: &it.wg, + store: it.store, + req: it.req, + respChan: it.respChan, + finishCh: it.finishCh, + vars: it.vars, + kvclient: txnsnapshot.NewClientHelper(it.store.store, &it.resolvedLocks, &it.committedLocks, false), + memTracker: it.memTracker, + replicaReadSeed: it.replicaReadSeed, + pagingTaskIdx: &it.pagingTaskIdx, + storeBatchedNum: &it.storeBatchedNum, + storeBatchedFallbackNum: &it.storeBatchedFallbackNum, + stats: it.stats, } go worker.run(ctx) } @@ -1124,13 +1124,13 @@ type HasUnconsumedCopRuntimeStats interface { } func (it *copIterator) CollectUnconsumedCopRuntimeStats() []*CopRuntimeStats { - if it == nil || it.unconsumedStats == nil { + if it == nil || it.stats == nil { return nil } - it.unconsumedStats.Lock() - stats := make([]*CopRuntimeStats, 0, len(it.unconsumedStats.stats)) - stats = append(stats, it.unconsumedStats.stats...) - it.unconsumedStats.Unlock() + it.stats.Lock() + stats := make([]*CopRuntimeStats, 0, len(it.stats.stats)) + stats = append(stats, it.stats.stats...) + it.stats.Unlock() return stats } @@ -1259,7 +1259,7 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask, ch } req.StoreTp = getEndPointType(task.storeType) startTime := time.Now() - if worker.kvclient.Stats == nil { + if worker.stats != nil && worker.kvclient.Stats == nil { worker.kvclient.Stats = tikv.NewRegionRequestRuntimeStats() } // set ReadReplicaScope and TxnScope so that req.IsStaleRead will be true when it's a global scope stale read. @@ -1786,19 +1786,19 @@ func (worker *copIteratorWorker) handleCopCache(task *copTask, resp *copResponse } func (worker *copIteratorWorker) getLockResolverDetails() *util.ResolveLockDetail { - if !worker.enableCollectExecutionInfo { + if worker.stats == nil { return nil } return &util.ResolveLockDetail{} } func (worker *copIteratorWorker) handleCollectExecutionInfo(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse) error { + if worker.stats == nil { + return nil + } defer func() { worker.kvclient.Stats = nil }() - if !worker.enableCollectExecutionInfo { - return nil - } failpoint.Inject("disable-collect-execution", func(val failpoint.Value) { if val.(bool) { panic("shouldn't reachable") @@ -1863,16 +1863,16 @@ func (worker *copIteratorWorker) collectCopRuntimeStats(copStats *CopRuntimeStat } func (worker *copIteratorWorker) collectUnconsumedCopRuntimeStats(bo *Backoffer, rpcCtx *tikv.RPCContext) error { - if worker.kvclient.Stats == nil { + if worker.kvclient.Stats == nil || worker.stats == nil { return nil } copStats := &CopRuntimeStats{} if err := worker.collectCopRuntimeStats(copStats, bo, rpcCtx, nil); err != nil { return err } - worker.unconsumedStats.Lock() - worker.unconsumedStats.stats = append(worker.unconsumedStats.stats, copStats) - worker.unconsumedStats.Unlock() + worker.stats.Lock() + worker.stats.stats = append(worker.stats.stats, copStats) + worker.stats.Unlock() worker.kvclient.Stats = nil return nil } @@ -1885,7 +1885,7 @@ type CopRuntimeStats struct { CoprCacheHit bool } -type unconsumedCopRuntimeStats struct { +type copIteratorRuntimeStats struct { sync.Mutex stats []*CopRuntimeStats } From 9b21d1755938c4b82b994489c53052c892d655dc Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 20 Nov 2024 16:38:55 +0800 Subject: [PATCH 09/47] refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 56 +++++++++++++++++------------------ 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 11cfadeac5ca5..287e560383729 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1291,9 +1291,7 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask, ch err = worker.handleTiDBSendReqErr(err, task, ch) return nil, err } - if runawayErr := worker.collectUnconsumedCopRuntimeStats(bo, rpcCtx); runawayErr != nil { - return nil, runawayErr - } + worker.collectUnconsumedCopRuntimeStats(bo, rpcCtx) return nil, errors.Trace(err) } @@ -1330,6 +1328,7 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask, ch remain.firstReadType = req.ReadType } } + worker.collectUnconsumedCopRuntimeStats(bo, rpcCtx) return remains, err } @@ -1796,9 +1795,6 @@ func (worker *copIteratorWorker) handleCollectExecutionInfo(bo *Backoffer, rpcCt if worker.stats == nil { return nil } - defer func() { - worker.kvclient.Stats = nil - }() failpoint.Inject("disable-collect-execution", func(val failpoint.Value) { if val.(bool) { panic("shouldn't reachable") @@ -1811,18 +1807,7 @@ func (worker *copIteratorWorker) handleCollectExecutionInfo(bo *Backoffer, rpcCt } func (worker *copIteratorWorker) collectCopRuntimeStats(copStats *CopRuntimeStats, bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse) error { - copStats.ReqStats = worker.kvclient.Stats - backoffTimes := bo.GetBackoffTimes() - copStats.BackoffTime = time.Duration(bo.GetTotalSleep()) * time.Millisecond - copStats.BackoffSleep = make(map[string]time.Duration, len(backoffTimes)) - copStats.BackoffTimes = make(map[string]int, len(backoffTimes)) - for backoff := range backoffTimes { - copStats.BackoffTimes[backoff] = backoffTimes[backoff] - copStats.BackoffSleep[backoff] = time.Duration(bo.GetBackoffSleepMS()[backoff]) * time.Millisecond - } - if rpcCtx != nil { - copStats.CalleeAddress = rpcCtx.Addr - } + worker.collectKVClientRuntimeStats(copStats, bo, rpcCtx) if resp == nil { return nil } @@ -1862,19 +1847,32 @@ func (worker *copIteratorWorker) collectCopRuntimeStats(copStats *CopRuntimeStat return nil } -func (worker *copIteratorWorker) collectUnconsumedCopRuntimeStats(bo *Backoffer, rpcCtx *tikv.RPCContext) error { - if worker.kvclient.Stats == nil || worker.stats == nil { - return nil +func (worker *copIteratorWorker) collectKVClientRuntimeStats(copStats *CopRuntimeStats, bo *Backoffer, rpcCtx *tikv.RPCContext) { + defer func() { + worker.kvclient.Stats = nil + }() + copStats.ReqStats = worker.kvclient.Stats + backoffTimes := bo.GetBackoffTimes() + copStats.BackoffTime = time.Duration(bo.GetTotalSleep()) * time.Millisecond + copStats.BackoffSleep = make(map[string]time.Duration, len(backoffTimes)) + copStats.BackoffTimes = make(map[string]int, len(backoffTimes)) + for backoff := range backoffTimes { + copStats.BackoffTimes[backoff] = backoffTimes[backoff] + copStats.BackoffSleep[backoff] = time.Duration(bo.GetBackoffSleepMS()[backoff]) * time.Millisecond } - copStats := &CopRuntimeStats{} - if err := worker.collectCopRuntimeStats(copStats, bo, rpcCtx, nil); err != nil { - return err + if rpcCtx != nil { + copStats.CalleeAddress = rpcCtx.Addr + } +} + +func (worker *copIteratorWorker) collectUnconsumedCopRuntimeStats(bo *Backoffer, rpcCtx *tikv.RPCContext) { + if worker.kvclient.Stats != nil && worker.stats != nil { + copStats := &CopRuntimeStats{} + worker.collectKVClientRuntimeStats(copStats, bo, rpcCtx) + worker.stats.Lock() + worker.stats.stats = append(worker.stats.stats, copStats) + worker.stats.Unlock() } - worker.stats.Lock() - worker.stats.stats = append(worker.stats.stats, copStats) - worker.stats.Unlock() - worker.kvclient.Stats = nil - return nil } // CopRuntimeStats contains execution detail information. From 0adfd32a909375871302fdb43aa08404efac46b3 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 20 Nov 2024 17:10:36 +0800 Subject: [PATCH 10/47] refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 4215ea765c38f..ebc557342143e 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1085,6 +1085,8 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { if resp == nil { return nil, nil } + consumed := resp.MemSize() + it.memTracker.Consume(-consumed) } else if it.respChan != nil { // Get next fetched resp from chan resp, ok, closed = it.recvFromRespCh(ctx, it.respChan) From d42989f1e30a383b7c4eebf04cdc10b44298382b Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 20 Nov 2024 21:35:10 +0800 Subject: [PATCH 11/47] fix test Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 287e560383729..39010525f87dc 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1328,7 +1328,6 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask, ch remain.firstReadType = req.ReadType } } - worker.collectUnconsumedCopRuntimeStats(bo, rpcCtx) return remains, err } From 986275f8612383c27804c7f5d5c8ad312b7310dc Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Thu, 21 Nov 2024 00:03:16 +0800 Subject: [PATCH 12/47] fix test Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index ebc557342143e..c2d0ff6556b89 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -185,17 +185,18 @@ func (c *CopClient) BuildCopIterator(ctx context.Context, req *kv.Request, vars return nil, copErrorResponse{err} } it := &copIterator{ - store: c.store, - req: req, - concurrency: req.Concurrency, - finishCh: make(chan struct{}), - vars: vars, - memTracker: req.MemTracker, - replicaReadSeed: c.replicaReadSeed, - rpcCancel: tikv.NewRPCanceller(), - buildTaskElapsed: *buildOpt.elapsed, - runawayChecker: req.RunawayChecker, - unconsumedStats: &unconsumedCopRuntimeStats{}, + store: c.store, + req: req, + concurrency: req.Concurrency, + finishCh: make(chan struct{}), + vars: vars, + memTracker: req.MemTracker, + replicaReadSeed: c.replicaReadSeed, + rpcCancel: tikv.NewRPCanceller(), + buildTaskElapsed: *buildOpt.elapsed, + runawayChecker: req.RunawayChecker, + unconsumedStats: &unconsumedCopRuntimeStats{}, + enableCollectExecutionInfo: option.EnableCollectExecutionInfo, } // Pipelined-dml can flush locks when it is still reading. // The coprocessor of the txn should not be blocked by itself. @@ -708,8 +709,10 @@ type copIterator struct { storeBatchedNum atomic.Uint64 storeBatchedFallbackNum atomic.Uint64 - runawayChecker resourcegroup.RunawayChecker - unconsumedStats *unconsumedCopRuntimeStats + runawayChecker resourcegroup.RunawayChecker + // TODO: refactor following by https://github.com/pingcap/tidb/pull/57545 + enableCollectExecutionInfo bool + unconsumedStats *unconsumedCopRuntimeStats } // copIteratorWorker receives tasks from copIteratorTaskSender, handles tasks and sends the copResponse to respChan. @@ -1140,7 +1143,7 @@ func (it *copIterator) sendReq(ctx context.Context) (resp *copResponse) { if len(it.tasks) == 0 { return nil } - worker := newCopIteratorWorker(it, nil, true) + worker := newCopIteratorWorker(it, nil, it.enableCollectExecutionInfo) defer func() { r := recover() if r != nil { From 5bd2cab49cf34934a3be5011ab5707f57fce9c22 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Thu, 21 Nov 2024 12:57:41 +0800 Subject: [PATCH 13/47] skip test Signed-off-by: crazycs520 --- pkg/session/test/variable/variable_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/session/test/variable/variable_test.go b/pkg/session/test/variable/variable_test.go index 3d200251e2479..5f995c9636aaf 100644 --- a/pkg/session/test/variable/variable_test.go +++ b/pkg/session/test/variable/variable_test.go @@ -173,6 +173,7 @@ func TestCoprocessorOOMAction(t *testing.T) { } require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/store/copr/testRateLimitActionMockWaitMax")) + return // assert oom fallback for _, testcase := range testcases { t.Log(testcase.name) From 3936e38549438355999e2a4614e175dba8da3cec Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Thu, 21 Nov 2024 16:37:21 +0800 Subject: [PATCH 14/47] fix test Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 6a79eeaea16d3..285cc46a966d9 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1095,8 +1095,17 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { if resp == nil { return nil, nil } - consumed := resp.MemSize() - it.memTracker.Consume(-consumed) + if it.memTracker != nil { + consumed := resp.MemSize() + failpoint.Inject("testRateLimitActionMockConsumeAndAssert", func(val failpoint.Value) { + if val.(bool) { + if resp != finCopResp { + consumed = MockResponseSizeForTest + } + } + }) + it.memTracker.Consume(-consumed) + } } else if it.respChan != nil { // Get next fetched resp from chan resp, ok, closed = it.recvFromRespCh(ctx, it.respChan) From 231f80bd6b213c79efb5d31073b1eac02774b22d Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Thu, 21 Nov 2024 16:48:36 +0800 Subject: [PATCH 15/47] refine Signed-off-by: crazycs520 --- pkg/session/test/variable/variable_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/session/test/variable/variable_test.go b/pkg/session/test/variable/variable_test.go index 5f995c9636aaf..a12dfc718c4a1 100644 --- a/pkg/session/test/variable/variable_test.go +++ b/pkg/session/test/variable/variable_test.go @@ -62,6 +62,7 @@ func TestForbidSettingBothTSVariable(t *testing.T) { } func TestCoprocessorOOMAction(t *testing.T) { + t.Skip("fix me later") // Assert Coprocessor OOMAction store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) @@ -173,7 +174,6 @@ func TestCoprocessorOOMAction(t *testing.T) { } require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/store/copr/testRateLimitActionMockWaitMax")) - return // assert oom fallback for _, testcase := range testcases { t.Log(testcase.name) From 2525b37dd8f47939d88a01430cf1fe43f8c52c62 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Fri, 22 Nov 2024 17:57:48 +0800 Subject: [PATCH 16/47] fix test Signed-off-by: crazycs520 --- pkg/session/test/variable/variable_test.go | 1 - pkg/store/copr/coprocessor.go | 35 ++++++++++++++-------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/pkg/session/test/variable/variable_test.go b/pkg/session/test/variable/variable_test.go index a12dfc718c4a1..3d200251e2479 100644 --- a/pkg/session/test/variable/variable_test.go +++ b/pkg/session/test/variable/variable_test.go @@ -62,7 +62,6 @@ func TestForbidSettingBothTSVariable(t *testing.T) { } func TestCoprocessorOOMAction(t *testing.T) { - t.Skip("fix me later") // Assert Coprocessor OOMAction store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 8bfc917993dac..efa136d17bc6a 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -247,7 +247,11 @@ func (c *CopClient) BuildCopIterator(ctx context.Context, req *kv.Request, vars if option.SessionMemTracker != nil && option.EnabledRateLimitAction { option.SessionMemTracker.FallbackOldAndSetNewAction(it.actionOnExceed) } - it.actionOnExceed.setEnabled(option.EnabledRateLimitAction) + if (it.concurrency + it.smallTaskConcurrency) <= 1 { + it.liteReqSender = true + } else { + it.actionOnExceed.setEnabled(option.EnabledRateLimitAction) + } return it, nil } @@ -853,8 +857,7 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context) { - if (it.concurrency + it.smallTaskConcurrency) <= 1 { - it.liteReqSender = true + if it.liteReqSender { return } taskCh := make(chan *copTask, 1) @@ -1017,6 +1020,16 @@ func (sender *copIteratorTaskSender) sendToTaskCh(t *copTask, sendTo chan<- *cop } func (worker *copIteratorWorker) sendToRespCh(resp *copResponse, respCh chan<- *copResponse, checkOOM bool) (exit bool) { + worker.checkRespOOM(resp, checkOOM) + select { + case respCh <- resp: + case <-worker.finishCh: + exit = true + } + return +} + +func (worker *copIteratorWorker) checkRespOOM(resp *copResponse, checkOOM bool) { if worker.memTracker != nil && checkOOM { consumed := resp.MemSize() failpoint.Inject("testRateLimitActionMockConsumeAndAssert", func(val failpoint.Value) { @@ -1029,12 +1042,6 @@ func (worker *copIteratorWorker) sendToRespCh(resp *copResponse, respCh chan<- * failpoint.Inject("ConsumeRandomPanic", nil) worker.memTracker.Consume(consumed) } - select { - case respCh <- resp: - case <-worker.finishCh: - exit = true - } - return } // MockResponseSizeForTest mock the response size @@ -1073,7 +1080,7 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { // Otherwise all responses are returned from a single channel. if it.liteReqSender { - resp = it.sendReq(ctx) + resp = it.liteSendReq(ctx) if resp == nil { return nil, nil } @@ -1088,6 +1095,7 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { }) it.memTracker.Consume(-consumed) } + //it.actionOnExceed.destroyTokenIfNeeded(func() {}) } else if it.respChan != nil { // Get next fetched resp from chan resp, ok, closed = it.recvFromRespCh(ctx, it.respChan) @@ -1137,7 +1145,7 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { return resp, nil } -func (it *copIterator) sendReq(ctx context.Context) (resp *copResponse) { +func (it *copIterator) liteSendReq(ctx context.Context) (resp *copResponse) { if len(it.tasks) == 0 { return nil } @@ -1161,6 +1169,7 @@ func (it *copIterator) sendReq(ctx context.Context) (resp *copResponse) { tasks, err := worker.handleTaskOnce(bo, curTask, respCh) if err != nil { resp = &copResponse{err: errors.Trace(err)} + worker.checkRespOOM(resp, true) return resp } if len(tasks) > 0 { @@ -1170,7 +1179,9 @@ func (it *copIterator) sendReq(ctx context.Context) (resp *copResponse) { } select { case resp = <-respCh: - return resp + if resp != nil { + return resp + } default: continue } From 74c705cb65f2567bb775f35c154f1614299a1ec0 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Fri, 22 Nov 2024 19:35:48 +0800 Subject: [PATCH 17/47] refactor Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index efa136d17bc6a..3ff368c96d071 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -683,6 +683,7 @@ type copIterator struct { concurrency int smallTaskConcurrency int liteReqSender bool + ctxForLite context.Context finishCh chan struct{} // If keepOrder, results are stored in copTask.respChan, read them out one by one. @@ -858,6 +859,7 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context) { if it.liteReqSender { + it.ctxForLite = ctx return } taskCh := make(chan *copTask, 1) @@ -1080,7 +1082,7 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { // Otherwise all responses are returned from a single channel. if it.liteReqSender { - resp = it.liteSendReq(ctx) + resp = it.liteSendReq(it.ctxForLite) if resp == nil { return nil, nil } From d75b47cb1ea1c2b9a78a8fa2f5d60cf69152a6da Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Fri, 22 Nov 2024 19:50:31 +0800 Subject: [PATCH 18/47] fix test Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 3ff368c96d071..f330f921708a4 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -247,9 +247,7 @@ func (c *CopClient) BuildCopIterator(ctx context.Context, req *kv.Request, vars if option.SessionMemTracker != nil && option.EnabledRateLimitAction { option.SessionMemTracker.FallbackOldAndSetNewAction(it.actionOnExceed) } - if (it.concurrency + it.smallTaskConcurrency) <= 1 { - it.liteReqSender = true - } else { + if (it.concurrency + it.smallTaskConcurrency) > 1 { it.actionOnExceed.setEnabled(option.EnabledRateLimitAction) } return it, nil @@ -682,7 +680,7 @@ type copIterator struct { req *kv.Request concurrency int smallTaskConcurrency int - liteReqSender bool + liteWorker *liteCopIteratorWorker ctxForLite context.Context finishCh chan struct{} @@ -725,6 +723,11 @@ type copIterator struct { stats *copIteratorRuntimeStats } +type liteCopIteratorWorker struct { + ctx context.Context + worker *copIteratorWorker +} + // copIteratorWorker receives tasks from copIteratorTaskSender, handles tasks and sends the copResponse to respChan. type copIteratorWorker struct { taskCh <-chan *copTask @@ -858,8 +861,11 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context) { - if it.liteReqSender { - it.ctxForLite = ctx + if (it.concurrency + it.smallTaskConcurrency) <= 1 { + it.liteWorker = &liteCopIteratorWorker{ + ctx: ctx, // the ctx contains some info(such as rpc interceptor), this ctx is used for handle cop task later. + worker: newCopIteratorWorker(it, nil), + } return } taskCh := make(chan *copTask, 1) @@ -1081,8 +1087,8 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { // If data order matters, response should be returned in the same order as copTask slice. // Otherwise all responses are returned from a single channel. - if it.liteReqSender { - resp = it.liteSendReq(it.ctxForLite) + if it.liteWorker != nil { + resp = it.liteSendReq() if resp == nil { return nil, nil } @@ -1147,11 +1153,10 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { return resp, nil } -func (it *copIterator) liteSendReq(ctx context.Context) (resp *copResponse) { +func (it *copIterator) liteSendReq() (resp *copResponse) { if len(it.tasks) == 0 { return nil } - worker := newCopIteratorWorker(it, nil) defer func() { r := recover() if r != nil { @@ -1162,6 +1167,8 @@ func (it *copIterator) liteSendReq(ctx context.Context) (resp *copResponse) { } }() + worker := it.liteWorker.worker + ctx := it.liteWorker.ctx backoffermap := make(map[uint64]*Backoffer) for len(it.tasks) > 0 { curTask := it.tasks[0] From e3cc04bbe628c93d4acb216639e6f715fa31d077 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Tue, 26 Nov 2024 11:39:20 +0800 Subject: [PATCH 19/47] fix test Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index f330f921708a4..9698462bd969a 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -247,9 +247,7 @@ func (c *CopClient) BuildCopIterator(ctx context.Context, req *kv.Request, vars if option.SessionMemTracker != nil && option.EnabledRateLimitAction { option.SessionMemTracker.FallbackOldAndSetNewAction(it.actionOnExceed) } - if (it.concurrency + it.smallTaskConcurrency) > 1 { - it.actionOnExceed.setEnabled(option.EnabledRateLimitAction) - } + it.actionOnExceed.setEnabled(option.EnabledRateLimitAction) return it, nil } @@ -1090,8 +1088,10 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { if it.liteWorker != nil { resp = it.liteSendReq() if resp == nil { + it.actionOnExceed.close() return nil, nil } + it.actionOnExceed.destroyTokenIfNeeded(func() {}) if it.memTracker != nil { consumed := resp.MemSize() failpoint.Inject("testRateLimitActionMockConsumeAndAssert", func(val failpoint.Value) { From 640ba956424a27b43e094c6a68c0c2302cc84ecf Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Thu, 28 Nov 2024 11:15:38 +0800 Subject: [PATCH 20/47] tiny fix Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 9698462bd969a..d9efcad083882 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -859,7 +859,7 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context) { - if (it.concurrency + it.smallTaskConcurrency) <= 1 { + if len(it.tasks) == 1 && len(it.tasks[0].batchTaskList) == 0 { it.liteWorker = &liteCopIteratorWorker{ ctx: ctx, // the ctx contains some info(such as rpc interceptor), this ctx is used for handle cop task later. worker: newCopIteratorWorker(it, nil), From cd8962d7fe1588124b107bab0eb631ecfa1ab985 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Mon, 2 Dec 2024 11:13:59 +0800 Subject: [PATCH 21/47] refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 43 +++++++++++++---------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index d9efcad083882..bc6c7ee6e687e 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -679,7 +679,6 @@ type copIterator struct { concurrency int smallTaskConcurrency int liteWorker *liteCopIteratorWorker - ctxForLite context.Context finishCh chan struct{} // If keepOrder, results are stored in copTask.respChan, read them out one by one. @@ -965,17 +964,7 @@ func (it *copIterator) recvFromRespCh(ctx context.Context, respCh <-chan *copRes for { select { case resp, ok = <-respCh: - if it.memTracker != nil && resp != nil { - consumed := resp.MemSize() - failpoint.Inject("testRateLimitActionMockConsumeAndAssert", func(val failpoint.Value) { - if val.(bool) { - if resp != finCopResp { - consumed = MockResponseSizeForTest - } - } - }) - it.memTracker.Consume(-consumed) - } + memTrackerConsumeResp(it.memTracker, resp) return case <-it.finishCh: exit = true @@ -991,6 +980,20 @@ func (it *copIterator) recvFromRespCh(ctx context.Context, respCh <-chan *copRes } } +func memTrackerConsumeResp(memTracker *memory.Tracker, resp *copResponse) { + if memTracker != nil && resp != nil { + consumed := resp.MemSize() + failpoint.Inject("testRateLimitActionMockConsumeAndAssert", func(val failpoint.Value) { + if val.(bool) { + if resp != finCopResp { + consumed = MockResponseSizeForTest + } + } + }) + memTracker.Consume(-consumed) + } +} + // GetConcurrency returns the concurrency and small task concurrency. func (it *copIterator) GetConcurrency() (int, int) { return it.concurrency, it.smallTaskConcurrency @@ -1092,18 +1095,7 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { return nil, nil } it.actionOnExceed.destroyTokenIfNeeded(func() {}) - if it.memTracker != nil { - consumed := resp.MemSize() - failpoint.Inject("testRateLimitActionMockConsumeAndAssert", func(val failpoint.Value) { - if val.(bool) { - if resp != finCopResp { - consumed = MockResponseSizeForTest - } - } - }) - it.memTracker.Consume(-consumed) - } - //it.actionOnExceed.destroyTokenIfNeeded(func() {}) + memTrackerConsumeResp(it.memTracker, resp) } else if it.respChan != nil { // Get next fetched resp from chan resp, ok, closed = it.recvFromRespCh(ctx, it.respChan) @@ -1154,9 +1146,6 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { } func (it *copIterator) liteSendReq() (resp *copResponse) { - if len(it.tasks) == 0 { - return nil - } defer func() { r := recover() if r != nil { From f6d6b23953b3b55df3cd6758037603681bdfb689 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 4 Dec 2024 15:07:59 +0800 Subject: [PATCH 22/47] fix test Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index bc6c7ee6e687e..a1d3ed6fbe796 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -960,7 +960,6 @@ func (sender *copIteratorTaskSender) run(connID uint64, checker resourcegroup.Ru } func (it *copIterator) recvFromRespCh(ctx context.Context, respCh <-chan *copResponse) (resp *copResponse, ok bool, exit bool) { - failpoint.InjectCall("CtxCancelBeforeReceive", ctx) for { select { case resp, ok = <-respCh: @@ -1088,6 +1087,7 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { // If data order matters, response should be returned in the same order as copTask slice. // Otherwise all responses are returned from a single channel. + failpoint.InjectCall("CtxCancelBeforeReceive", ctx) if it.liteWorker != nil { resp = it.liteSendReq() if resp == nil { From e103ffaeb908f6ac88bcf772f56224f372e5ccbd Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Mon, 9 Dec 2024 11:51:47 +0800 Subject: [PATCH 23/47] address comment Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index a1d3ed6fbe796..204f5b84a8269 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1089,7 +1089,7 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { failpoint.InjectCall("CtxCancelBeforeReceive", ctx) if it.liteWorker != nil { - resp = it.liteSendReq() + resp = it.liteSendReq(ctx) if resp == nil { it.actionOnExceed.close() return nil, nil @@ -1145,11 +1145,11 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { return resp, nil } -func (it *copIterator) liteSendReq() (resp *copResponse) { +func (it *copIterator) liteSendReq(ctx context.Context) (resp *copResponse) { defer func() { r := recover() if r != nil { - logutil.BgLogger().Error("copIteratorWork meet panic", + logutil.Logger(ctx).Error("copIteratorWork meet panic", zap.Any("r", r), zap.Stack("stack trace")) resp = &copResponse{err: util2.GetRecoverError(r)} @@ -1157,13 +1157,13 @@ func (it *copIterator) liteSendReq() (resp *copResponse) { }() worker := it.liteWorker.worker - ctx := it.liteWorker.ctx + taskCtx := it.liteWorker.ctx backoffermap := make(map[uint64]*Backoffer) for len(it.tasks) > 0 { curTask := it.tasks[0] - respCh := make(chan *copResponse, 2) + respCh := make(chan *copResponse, 2+len(curTask.batchTaskList)) curTask.respChan = respCh - bo := chooseBackoffer(ctx, backoffermap, curTask, worker) + bo := chooseBackoffer(taskCtx, backoffermap, curTask, worker) tasks, err := worker.handleTaskOnce(bo, curTask, respCh) if err != nil { resp = &copResponse{err: errors.Trace(err)} From 9c56deffff08e89fb31f6a516c74fff3d585b083 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Mon, 9 Dec 2024 15:32:35 +0800 Subject: [PATCH 24/47] fix for batch_tasks Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 204f5b84a8269..89a380406f8bf 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -858,7 +858,7 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context) { - if len(it.tasks) == 1 && len(it.tasks[0].batchTaskList) == 0 { + if len(it.tasks) == 1 { it.liteWorker = &liteCopIteratorWorker{ ctx: ctx, // the ctx contains some info(such as rpc interceptor), this ctx is used for handle cop task later. worker: newCopIteratorWorker(it, nil), @@ -1157,13 +1157,25 @@ func (it *copIterator) liteSendReq(ctx context.Context) (resp *copResponse) { }() worker := it.liteWorker.worker - taskCtx := it.liteWorker.ctx backoffermap := make(map[uint64]*Backoffer) - for len(it.tasks) > 0 { + for { + for len(it.respChan) > 0 { + select { + case resp = <-it.respChan: + return resp + default: + } + } + if len(it.tasks) == 0 { + return nil + } curTask := it.tasks[0] - respCh := make(chan *copResponse, 2+len(curTask.batchTaskList)) + if cap(it.respChan) < (len(curTask.batchTaskList) + 2) { + it.respChan = make(chan *copResponse, 2+len(curTask.batchTaskList)) + } + respCh := it.respChan curTask.respChan = respCh - bo := chooseBackoffer(taskCtx, backoffermap, curTask, worker) + bo := chooseBackoffer(it.liteWorker.ctx, backoffermap, curTask, worker) tasks, err := worker.handleTaskOnce(bo, curTask, respCh) if err != nil { resp = &copResponse{err: errors.Trace(err)} @@ -1175,16 +1187,7 @@ func (it *copIterator) liteSendReq(ctx context.Context) (resp *copResponse) { } else { it.tasks = it.tasks[1:] } - select { - case resp = <-respCh: - if resp != nil { - return resp - } - default: - continue - } } - return nil } // HasUnconsumedCopRuntimeStats indicate whether has unconsumed CopRuntimeStats. From b88709a3604a4d9758407a044280f8bf451cb4e4 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Mon, 9 Dec 2024 18:01:29 +0800 Subject: [PATCH 25/47] address comment Signed-off-by: crazycs520 --- pkg/distsql/context/context.go | 3 +++ pkg/distsql/distsql.go | 1 + pkg/kv/kv.go | 1 + pkg/store/copr/coprocessor.go | 7 ++++--- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pkg/distsql/context/context.go b/pkg/distsql/context/context.go index f0327326e3854..fdebf09b61484 100644 --- a/pkg/distsql/context/context.go +++ b/pkg/distsql/context/context.go @@ -85,6 +85,9 @@ type DistSQLContext struct { SessionAlias string ExecDetails *execdetails.SyncExecDetails + + // Only one cop-reader can use lite worker. Using lite-worker in multiple readers will affect the concurrent execution of readers. + TryCopLiteWorker uint32 } // AppendWarning appends the warning to the warning handler. diff --git a/pkg/distsql/distsql.go b/pkg/distsql/distsql.go index 3949b30ea8dc9..2404c1fade1b3 100644 --- a/pkg/distsql/distsql.go +++ b/pkg/distsql/distsql.go @@ -80,6 +80,7 @@ func Select(ctx context.Context, dctx *distsqlctx.DistSQLContext, kvReq *kv.Requ EnabledRateLimitAction: enabledRateLimitAction, EventCb: eventCb, EnableCollectExecutionInfo: config.GetGlobalConfig().Instance.EnableCollectExecutionInfo.Load(), + TryCopLiteWorker: &dctx.TryCopLiteWorker, } if kvReq.StoreType == kv.TiFlash { diff --git a/pkg/kv/kv.go b/pkg/kv/kv.go index 26a5ee7a4fccf..e01152f7dd643 100644 --- a/pkg/kv/kv.go +++ b/pkg/kv/kv.go @@ -327,6 +327,7 @@ type ClientSendOption struct { EnableCollectExecutionInfo bool TiFlashReplicaRead tiflash.ReplicaRead AppendWarning func(warn error) + TryCopLiteWorker *uint32 } // ReqTypes. diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 89a380406f8bf..7237ba9500b24 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -103,7 +103,7 @@ func (c *CopClient) Send(ctx context.Context, req *kv.Request, variables any, op if ctx.Value(util.RUDetailsCtxKey) == nil { ctx = context.WithValue(ctx, util.RUDetailsCtxKey, util.NewRUDetails()) } - it.open(ctx) + it.open(ctx, option.TryCopLiteWorker) return it } @@ -857,8 +857,9 @@ func (worker *copIteratorWorker) run(ctx context.Context) { } // open starts workers and sender goroutines. -func (it *copIterator) open(ctx context.Context) { - if len(it.tasks) == 1 { +func (it *copIterator) open(ctx context.Context, tryCopLiteWorker *uint32) { + if len(it.tasks) == 1 && tryCopLiteWorker != nil && atomic.LoadUint32(tryCopLiteWorker) == 0 { + atomic.StoreUint32(tryCopLiteWorker, 1) it.liteWorker = &liteCopIteratorWorker{ ctx: ctx, // the ctx contains some info(such as rpc interceptor), this ctx is used for handle cop task later. worker: newCopIteratorWorker(it, nil), From 9bf22a9fa4444a9e61510323c32e8adcaacda220 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Mon, 9 Dec 2024 19:33:23 +0800 Subject: [PATCH 26/47] fix test Signed-off-by: crazycs520 --- pkg/distsql/context/context_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/distsql/context/context_test.go b/pkg/distsql/context/context_test.go index d65f801207e85..92cccd5d3c1b1 100644 --- a/pkg/distsql/context/context_test.go +++ b/pkg/distsql/context/context_test.go @@ -89,6 +89,7 @@ func TestContextDetach(t *testing.T) { ReplicaClosestReadThreshold: 1, ConnectionID: 1, SessionAlias: "c", + TryCopLiteWorker: 1, } obj.AppendWarning(errors.New("test warning")) From f88c931a15773a8bad8f536d9af49f329f7fc6dd Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Mon, 9 Dec 2024 21:06:34 +0800 Subject: [PATCH 27/47] address comment Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 7237ba9500b24..2f3945b4241bc 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -858,8 +858,7 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context, tryCopLiteWorker *uint32) { - if len(it.tasks) == 1 && tryCopLiteWorker != nil && atomic.LoadUint32(tryCopLiteWorker) == 0 { - atomic.StoreUint32(tryCopLiteWorker, 1) + if len(it.tasks) == 1 && tryCopLiteWorker != nil && atomic.CompareAndSwapUint32(tryCopLiteWorker, 0, 1) { it.liteWorker = &liteCopIteratorWorker{ ctx: ctx, // the ctx contains some info(such as rpc interceptor), this ctx is used for handle cop task later. worker: newCopIteratorWorker(it, nil), From cfbf4f9df224eb597818de7fb7113aa8efc24b48 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Mon, 9 Dec 2024 21:20:06 +0800 Subject: [PATCH 28/47] refine comment Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 2f3945b4241bc..930888c38745e 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -721,6 +721,7 @@ type copIterator struct { } type liteCopIteratorWorker struct { + // ctx contains some info(such as rpc interceptor(WithSQLKvExecCounterInterceptor)), it is used for handle cop task later. ctx context.Context worker *copIteratorWorker } @@ -860,7 +861,7 @@ func (worker *copIteratorWorker) run(ctx context.Context) { func (it *copIterator) open(ctx context.Context, tryCopLiteWorker *uint32) { if len(it.tasks) == 1 && tryCopLiteWorker != nil && atomic.CompareAndSwapUint32(tryCopLiteWorker, 0, 1) { it.liteWorker = &liteCopIteratorWorker{ - ctx: ctx, // the ctx contains some info(such as rpc interceptor), this ctx is used for handle cop task later. + ctx: ctx, worker: newCopIteratorWorker(it, nil), } return From 632f81fbfcad28b43d9bc53979a6fa44b7180c67 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Tue, 10 Dec 2024 09:36:21 +0800 Subject: [PATCH 29/47] address comment Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 930888c38745e..b42f44c273b58 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1921,6 +1921,9 @@ func (worker *copIteratorWorker) collectCopRuntimeStats(copStats *CopRuntimeStat } func (worker *copIteratorWorker) collectKVClientRuntimeStats(copStats *CopRuntimeStats, bo *Backoffer, rpcCtx *tikv.RPCContext) { + if worker.kvclient.Stats == nil { + return + } defer func() { worker.kvclient.Stats = nil }() From 3e516f308f759af24fce44bb3a00ee842e4593fc Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Tue, 10 Dec 2024 23:38:53 +0800 Subject: [PATCH 30/47] refactor Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 183 ++++++++++++++++++++-------------- 1 file changed, 106 insertions(+), 77 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index b42f44c273b58..ac7bf140cd4da 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -722,8 +722,9 @@ type copIterator struct { type liteCopIteratorWorker struct { // ctx contains some info(such as rpc interceptor(WithSQLKvExecCounterInterceptor)), it is used for handle cop task later. - ctx context.Context - worker *copIteratorWorker + ctx context.Context + worker *copIteratorWorker + batchCopRespList []*copResponse } // copIteratorWorker receives tasks from copIteratorTaskSender, handles tasks and sends the copResponse to respChan. @@ -768,6 +769,12 @@ type copResponse struct { respTime time.Duration } +type copTaskResponse struct { + resp *copResponse + batchRespList []*copResponse + remains []*copTask +} + const sizeofExecDetails = int(unsafe.Sizeof(execdetails.ExecDetails{})) // GetData implements the kv.ResultSubset GetData interface. @@ -1157,38 +1164,36 @@ func (it *copIterator) liteSendReq(ctx context.Context) (resp *copResponse) { } }() + if len(it.liteWorker.batchCopRespList) > 0 { + resp := it.liteWorker.batchCopRespList[0] + it.liteWorker.batchCopRespList = it.liteWorker.batchCopRespList[1:] + return resp + } + worker := it.liteWorker.worker backoffermap := make(map[uint64]*Backoffer) - for { - for len(it.respChan) > 0 { - select { - case resp = <-it.respChan: - return resp - default: - } - } - if len(it.tasks) == 0 { - return nil - } + for len(it.tasks) > 0 { curTask := it.tasks[0] - if cap(it.respChan) < (len(curTask.batchTaskList) + 2) { - it.respChan = make(chan *copResponse, 2+len(curTask.batchTaskList)) - } - respCh := it.respChan - curTask.respChan = respCh bo := chooseBackoffer(it.liteWorker.ctx, backoffermap, curTask, worker) - tasks, err := worker.handleTaskOnce(bo, curTask, respCh) - if err != nil { + taskResp, err := worker.handleTaskOnce(bo, curTask) + if err != nil || taskResp == nil { resp = &copResponse{err: errors.Trace(err)} worker.checkRespOOM(resp, true) return resp } - if len(tasks) > 0 { - it.tasks = append(tasks, it.tasks[1:]...) + if len(taskResp.batchRespList) > 0 { + it.tasks = append(taskResp.remains, it.tasks[1:]...) } else { it.tasks = it.tasks[1:] } + resp = taskResp.resp + if resp != nil { + worker.checkRespOOM(resp, true) + it.liteWorker.batchCopRespList = taskResp.batchRespList + return resp + } } + return nil } // HasUnconsumedCopRuntimeStats indicate whether has unconsumed CopRuntimeStats. @@ -1244,7 +1249,15 @@ func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, for len(remainTasks) > 0 { curTask := remainTasks[0] bo := chooseBackoffer(ctx, backoffermap, curTask, worker) - tasks, err := worker.handleTaskOnce(bo, curTask, respCh) + taskResp, err := worker.handleTaskOnce(bo, curTask) + if taskResp != nil { + if taskResp.resp != nil { + worker.sendToRespCh(taskResp.resp, respCh, true) + } + for _, resp := range taskResp.batchRespList { + worker.sendToRespCh(resp, respCh, true) + } + } if err != nil { resp := &copResponse{err: errors.Trace(err)} worker.sendToRespCh(resp, respCh, true) @@ -1253,8 +1266,8 @@ func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, if worker.finished() { break } - if len(tasks) > 0 { - remainTasks = append(tasks, remainTasks[1:]...) + if len(taskResp.remains) > 0 { + remainTasks = append(taskResp.remains, remainTasks[1:]...) } else { remainTasks = remainTasks[1:] } @@ -1263,7 +1276,7 @@ func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, // handleTaskOnce handles single copTask, successful results are send to channel. // If error happened, returns error. If region split or meet lock, returns the remain tasks. -func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask, ch chan<- *copResponse) ([]*copTask, error) { +func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask) (*copTaskResponse, error) { failpoint.Inject("handleTaskOnceError", func(val failpoint.Value) { if val.(bool) { failpoint.Return(nil, errors.New("mock handleTaskOnce error")) @@ -1362,8 +1375,7 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask, ch } if err != nil { if task.storeType == kv.TiDB { - err = worker.handleTiDBSendReqErr(err, task, ch) - return nil, err + return worker.handleTiDBSendReqErr(err, task) } worker.collectUnconsumedCopRuntimeStats(bo, rpcCtx) return nil, errors.Trace(err) @@ -1390,19 +1402,19 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask, ch tidbmetrics.DistSQLCoprRespBodySize.WithLabelValues(storeAddr).Observe(float64(len(copResp.Data))) } - var remains []*copTask + var taskResp *copTaskResponse if worker.req.Paging.Enable { - remains, err = worker.handleCopPagingResult(bo, rpcCtx, &copResponse{pbResp: copResp}, cacheKey, cacheValue, task, ch, costTime) + taskResp, err = worker.handleCopPagingResult(bo, rpcCtx, &copResponse{pbResp: copResp}, cacheKey, cacheValue, task, costTime) } else { // Handles the response for non-paging copTask. - remains, err = worker.handleCopResponse(bo, rpcCtx, &copResponse{pbResp: copResp}, cacheKey, cacheValue, task, ch, costTime) + taskResp, err = worker.handleCopResponse(bo, rpcCtx, &copResponse{pbResp: copResp}, cacheKey, cacheValue, task, costTime) } - if req.ReadType != "" { - for _, remain := range remains { + if req.ReadType != "" && taskResp != nil { + for _, remain := range taskResp.remains { remain.firstReadType = req.ReadType } } - return remains, err + return taskResp, err } const ( @@ -1464,46 +1476,50 @@ func appendScanDetail(logStr string, columnFamily string, scanInfo *kvrpcpb.Scan return logStr } -func (worker *copIteratorWorker) handleCopPagingResult(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, ch chan<- *copResponse, costTime time.Duration) ([]*copTask, error) { - remainedTasks, err := worker.handleCopResponse(bo, rpcCtx, resp, cacheKey, cacheValue, task, ch, costTime) - if err != nil || len(remainedTasks) != 0 { +func (worker *copIteratorWorker) handleCopPagingResult(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) (*copTaskResponse, error) { + taskResp, err := worker.handleCopResponse(bo, rpcCtx, resp, cacheKey, cacheValue, task, costTime) + if err != nil || (taskResp != nil && len(taskResp.remains) != 0) { // If there is region error or lock error, keep the paging size and retry. - for _, remainedTask := range remainedTasks { - remainedTask.pagingSize = task.pagingSize + if taskResp != nil { + for _, remainedTask := range taskResp.remains { + remainedTask.pagingSize = task.pagingSize + } } - return remainedTasks, errors.Trace(err) + return taskResp, errors.Trace(err) } pagingRange := resp.pbResp.Range // only paging requests need to calculate the next ranges if pagingRange == nil { // If the storage engine doesn't support paging protocol, it should have return all the region data. // So we finish here. - return nil, nil + return taskResp, nil } // calculate next ranges and grow the paging size task.ranges = worker.calculateRemain(task.ranges, pagingRange, worker.req.Desc) if task.ranges.Len() == 0 { - return nil, nil + return taskResp, nil } task.pagingSize = paging.GrowPagingSize(task.pagingSize, worker.req.Paging.MaxPagingSize) - return []*copTask{task}, nil + taskResp.remains = append(taskResp.remains, task) + return taskResp, nil } // handleCopResponse checks coprocessor Response for region split and lock, // returns more tasks when that happens, or handles the response if no error. // if we're handling coprocessor paging response, lastRange is the range of last // successful response, otherwise it's nil. -func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, ch chan<- *copResponse, costTime time.Duration) ([]*copTask, error) { +func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) (*copTaskResponse, error) { + taskResp := &copTaskResponse{resp: resp} if ver := resp.pbResp.GetLatestBucketsVersion(); task.bucketsVer < ver { worker.store.GetRegionCache().UpdateBucketsIfNeeded(task.region, ver) } if regionErr := resp.pbResp.GetRegionError(); regionErr != nil { if rpcCtx != nil && task.storeType == kv.TiDB { resp.err = errors.Errorf("error: %v", regionErr) - worker.sendToRespCh(resp, ch, true) - return nil, nil + //worker.sendToRespCh(resp, ch, true) + return taskResp, nil } errStr := fmt.Sprintf("region_id:%v, region_ver:%v, store_type:%s, peer_addr:%s, error:%s", task.region.GetID(), task.region.GetVer(), task.storeType.Name(), task.storeAddr, regionErr.String()) @@ -1511,7 +1527,8 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R return nil, errors.Trace(err) } // We may meet RegionError at the first packet, but not during visiting the stream. - remains, err := buildCopTasks(bo, task.ranges, &buildCopTaskOpt{ + var err error + taskResp.remains, err = buildCopTasks(bo, task.ranges, &buildCopTaskOpt{ req: worker.req, cache: worker.store.GetRegionCache(), respChan: false, @@ -1519,16 +1536,16 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R ignoreTiKVClientReadTimeout: true, }) if err != nil { - return remains, err + return taskResp, err } - return worker.handleBatchRemainsOnErr(bo, rpcCtx, remains, resp.pbResp, task, ch) + return worker.handleBatchRemainsOnErr(bo, rpcCtx, taskResp, task) } if lockErr := resp.pbResp.GetLocked(); lockErr != nil { if err := worker.handleLockErr(bo, lockErr, task); err != nil { return nil, err } task.meetLockFallback = true - return worker.handleBatchRemainsOnErr(bo, rpcCtx, []*copTask{task}, resp.pbResp, task, ch) + return worker.handleBatchRemainsOnErr(bo, rpcCtx, taskResp, task) } if otherErr := resp.pbResp.GetOtherError(); otherErr != "" { err := errors.Errorf("other error: %s", otherErr) @@ -1569,29 +1586,37 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R } pbResp := resp.pbResp - worker.sendToRespCh(resp, ch, true) - return worker.handleBatchCopResponse(bo, rpcCtx, pbResp, task.batchTaskList, ch) + //worker.sendToRespCh(resp, ch, true) + batchCopResps, batchedRemains, err := worker.handleBatchCopResponse(bo, rpcCtx, pbResp, task.batchTaskList) + if err != nil { + return nil, err + } + taskResp.remains = append(taskResp.remains, batchedRemains...) + taskResp.batchRespList = batchCopResps + return taskResp, nil } -func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, rpcCtx *tikv.RPCContext, remains []*copTask, resp *coprocessor.Response, task *copTask, ch chan<- *copResponse) ([]*copTask, error) { +func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, rpcCtx *tikv.RPCContext, taskResp *copTaskResponse, task *copTask) (*copTaskResponse, error) { if len(task.batchTaskList) == 0 { - return remains, nil + return taskResp, nil } batchedTasks := task.batchTaskList task.batchTaskList = nil - batchedRemains, err := worker.handleBatchCopResponse(bo, rpcCtx, resp, batchedTasks, ch) + batchCopResps, batchedRemains, err := worker.handleBatchCopResponse(bo, rpcCtx, taskResp.resp.pbResp, batchedTasks) if err != nil { return nil, err } - return append(remains, batchedRemains...), nil + taskResp.remains = append(taskResp.remains, batchedRemains...) + taskResp.batchRespList = batchCopResps + return taskResp, nil } // handle the batched cop response. // tasks will be changed, so the input tasks should not be used after calling this function. func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *coprocessor.Response, - tasks map[uint64]*batchedCopTask, ch chan<- *copResponse) (remainTasks []*copTask, err error) { + tasks map[uint64]*batchedCopTask) (batchCopResps []*copResponse, remainTasks []*copTask, err error) { if len(tasks) == 0 { - return nil, nil + return nil, nil, nil } batchedNum := len(tasks) busyThresholdFallback := false @@ -1619,11 +1644,12 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t } } batchResps := resp.GetBatchResponses() + batchCopResps = make([]*copResponse, 0, len(batchResps)) for _, batchResp := range batchResps { taskID := batchResp.GetTaskId() batchedTask, ok := tasks[taskID] if !ok { - return nil, errors.Errorf("task id %d not found", batchResp.GetTaskId()) + return nil, nil, errors.Errorf("task id %d not found", batchResp.GetTaskId()) } delete(tasks, taskID) resp := &copResponse{ @@ -1640,7 +1666,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t errStr := fmt.Sprintf("region_id:%v, region_ver:%v, store_type:%s, peer_addr:%s, error:%s", task.region.GetID(), task.region.GetVer(), task.storeType.Name(), task.storeAddr, regionErr.String()) if err := bo.Backoff(tikv.BoRegionMiss(), errors.New(errStr)); err != nil { - return nil, errors.Trace(err) + return nil, nil, errors.Trace(err) } remains, err := buildCopTasks(bo, task.ranges, &buildCopTaskOpt{ req: worker.req, @@ -1650,7 +1676,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t ignoreTiKVClientReadTimeout: true, }) if err != nil { - return nil, err + return nil, nil, err } appendRemainTasks(remains...) continue @@ -1658,7 +1684,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t //TODO: handle locks in batch if lockErr := batchResp.GetLocked(); lockErr != nil { if err := worker.handleLockErr(bo, resp.pbResp.GetLocked(), task); err != nil { - return nil, err + return nil, nil, err } task.meetLockFallback = true appendRemainTasks(task) @@ -1684,14 +1710,15 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t zap.String("storeAddr", task.storeAddr), zap.Error(err)) if strings.Contains(err.Error(), "write conflict") { - return nil, kv.ErrWriteConflict.FastGen("%s", otherErr) + return nil, nil, kv.ErrWriteConflict.FastGen("%s", otherErr) } - return nil, errors.Trace(err) + return nil, nil, errors.Trace(err) } if err := worker.handleCollectExecutionInfo(bo, dummyRPCCtx, resp); err != nil { - return nil, err + return nil, nil, err } - worker.sendToRespCh(resp, ch, true) + //worker.sendToRespCh(resp, ch, true) + batchCopResps = append(batchCopResps, resp) } for _, t := range tasks { task := t.task @@ -1717,7 +1744,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t if regionErr := resp.GetRegionError(); regionErr != nil && regionErr.ServerIsBusy != nil && regionErr.ServerIsBusy.EstimatedWaitMs > 0 && len(remainTasks) != 0 { if len(batchResps) != 0 { - return nil, errors.New("store batched coprocessor with server is busy error shouldn't contain responses") + return nil, nil, errors.New("store batched coprocessor with server is busy error shouldn't contain responses") } busyThresholdFallback = true handler := newBatchTaskBuilder(bo, worker.req, worker.store.GetRegionCache(), kv.ReplicaReadFollower) @@ -1725,12 +1752,12 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t // do not set busy threshold again. task.busyThreshold = 0 if err = handler.handle(task); err != nil { - return nil, err + return nil, nil, err } } remainTasks = handler.build() } - return remainTasks, nil + return batchCopResps, remainTasks, nil } func (worker *copIteratorWorker) handleLockErr(bo *Backoffer, lockErr *kvrpcpb.LockInfo, task *copTask) error { @@ -1964,7 +1991,7 @@ type copIteratorRuntimeStats struct { stats []*CopRuntimeStats } -func (worker *copIteratorWorker) handleTiDBSendReqErr(err error, task *copTask, ch chan<- *copResponse) error { +func (worker *copIteratorWorker) handleTiDBSendReqErr(err error, task *copTask) (*copTaskResponse, error) { errCode := errno.ErrUnknown errMsg := err.Error() if terror.ErrorEqual(err, derr.ErrTiKVServerTimeout) { @@ -1985,16 +2012,18 @@ func (worker *copIteratorWorker) handleTiDBSendReqErr(err error, task *copTask, } data, err := proto.Marshal(&selResp) if err != nil { - return errors.Trace(err) + return nil, errors.Trace(err) } - resp := &copResponse{ - pbResp: &coprocessor.Response{ - Data: data, + resp := &copTaskResponse{ + resp: &copResponse{ + pbResp: &coprocessor.Response{ + Data: data, + }, + detail: &CopRuntimeStats{}, }, - detail: &CopRuntimeStats{}, } - worker.sendToRespCh(resp, ch, true) - return nil + //worker.sendToRespCh(resp, ch, true) + return resp, nil } // calculateRetry splits the input ranges into two, and take one of them according to desc flag. From a832de9e01966f0aebcf179c04f5b6ab3e70bb0e Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 10:44:37 +0800 Subject: [PATCH 31/47] refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 66 +++++++++++++++++------------------ 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index ac7bf140cd4da..91698b0f7ebc9 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -866,13 +866,13 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context, tryCopLiteWorker *uint32) { - if len(it.tasks) == 1 && tryCopLiteWorker != nil && atomic.CompareAndSwapUint32(tryCopLiteWorker, 0, 1) { - it.liteWorker = &liteCopIteratorWorker{ - ctx: ctx, - worker: newCopIteratorWorker(it, nil), - } - return - } + //if len(it.tasks) == 1 && tryCopLiteWorker != nil && atomic.CompareAndSwapUint32(tryCopLiteWorker, 0, 1) { + // it.liteWorker = &liteCopIteratorWorker{ + // ctx: ctx, + // worker: newCopIteratorWorker(it, nil), + // } + // return + //} taskCh := make(chan *copTask, 1) it.wg.Add(it.concurrency + it.smallTaskConcurrency) var smallTaskCh chan *copTask @@ -1402,12 +1402,12 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask) (* tidbmetrics.DistSQLCoprRespBodySize.WithLabelValues(storeAddr).Observe(float64(len(copResp.Data))) } - var taskResp *copTaskResponse + taskResp := &copTaskResponse{resp: &copResponse{pbResp: copResp}} if worker.req.Paging.Enable { - taskResp, err = worker.handleCopPagingResult(bo, rpcCtx, &copResponse{pbResp: copResp}, cacheKey, cacheValue, task, costTime) + err = worker.handleCopPagingResult(bo, rpcCtx, taskResp, cacheKey, cacheValue, task, costTime) } else { // Handles the response for non-paging copTask. - taskResp, err = worker.handleCopResponse(bo, rpcCtx, &copResponse{pbResp: copResp}, cacheKey, cacheValue, task, costTime) + err = worker.handleCopResponse(bo, rpcCtx, taskResp, cacheKey, cacheValue, task, costTime) } if req.ReadType != "" && taskResp != nil { for _, remain := range taskResp.remains { @@ -1476,8 +1476,8 @@ func appendScanDetail(logStr string, columnFamily string, scanInfo *kvrpcpb.Scan return logStr } -func (worker *copIteratorWorker) handleCopPagingResult(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) (*copTaskResponse, error) { - taskResp, err := worker.handleCopResponse(bo, rpcCtx, resp, cacheKey, cacheValue, task, costTime) +func (worker *copIteratorWorker) handleCopPagingResult(bo *Backoffer, rpcCtx *tikv.RPCContext, taskResp *copTaskResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) error { + err := worker.handleCopResponse(bo, rpcCtx, taskResp, cacheKey, cacheValue, task, costTime) if err != nil || (taskResp != nil && len(taskResp.remains) != 0) { // If there is region error or lock error, keep the paging size and retry. if taskResp != nil { @@ -1485,33 +1485,33 @@ func (worker *copIteratorWorker) handleCopPagingResult(bo *Backoffer, rpcCtx *ti remainedTask.pagingSize = task.pagingSize } } - return taskResp, errors.Trace(err) + return errors.Trace(err) } - pagingRange := resp.pbResp.Range + pagingRange := taskResp.resp.pbResp.Range // only paging requests need to calculate the next ranges if pagingRange == nil { // If the storage engine doesn't support paging protocol, it should have return all the region data. // So we finish here. - return taskResp, nil + return nil } // calculate next ranges and grow the paging size task.ranges = worker.calculateRemain(task.ranges, pagingRange, worker.req.Desc) if task.ranges.Len() == 0 { - return taskResp, nil + return nil } task.pagingSize = paging.GrowPagingSize(task.pagingSize, worker.req.Paging.MaxPagingSize) taskResp.remains = append(taskResp.remains, task) - return taskResp, nil + return nil } // handleCopResponse checks coprocessor Response for region split and lock, // returns more tasks when that happens, or handles the response if no error. // if we're handling coprocessor paging response, lastRange is the range of last // successful response, otherwise it's nil. -func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) (*copTaskResponse, error) { - taskResp := &copTaskResponse{resp: resp} +func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, taskResp *copTaskResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) error { + resp := taskResp.resp if ver := resp.pbResp.GetLatestBucketsVersion(); task.bucketsVer < ver { worker.store.GetRegionCache().UpdateBucketsIfNeeded(task.region, ver) } @@ -1519,12 +1519,12 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R if rpcCtx != nil && task.storeType == kv.TiDB { resp.err = errors.Errorf("error: %v", regionErr) //worker.sendToRespCh(resp, ch, true) - return taskResp, nil + return nil } errStr := fmt.Sprintf("region_id:%v, region_ver:%v, store_type:%s, peer_addr:%s, error:%s", task.region.GetID(), task.region.GetVer(), task.storeType.Name(), task.storeAddr, regionErr.String()) if err := bo.Backoff(tikv.BoRegionMiss(), errors.New(errStr)); err != nil { - return nil, errors.Trace(err) + return errors.Trace(err) } // We may meet RegionError at the first packet, but not during visiting the stream. var err error @@ -1536,13 +1536,13 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R ignoreTiKVClientReadTimeout: true, }) if err != nil { - return taskResp, err + return err } return worker.handleBatchRemainsOnErr(bo, rpcCtx, taskResp, task) } if lockErr := resp.pbResp.GetLocked(); lockErr != nil { if err := worker.handleLockErr(bo, lockErr, task); err != nil { - return nil, err + return err } task.meetLockFallback = true return worker.handleBatchRemainsOnErr(bo, rpcCtx, taskResp, task) @@ -1566,9 +1566,9 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R zap.String("storeAddr", task.storeAddr), zap.String("error", otherErr)) if strings.Contains(err.Error(), "write conflict") { - return nil, kv.ErrWriteConflict.FastGen("%s", otherErr) + return kv.ErrWriteConflict.FastGen("%s", otherErr) } - return nil, errors.Trace(err) + return errors.Trace(err) } // When the request is using paging API, the `Range` is not nil. if resp.pbResp.Range != nil { @@ -1577,38 +1577,38 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R resp.startKey = task.ranges.At(0).StartKey } if err := worker.handleCollectExecutionInfo(bo, rpcCtx, resp); err != nil { - return nil, err + return err } resp.respTime = costTime if err := worker.handleCopCache(task, resp, cacheKey, cacheValue); err != nil { - return nil, err + return err } pbResp := resp.pbResp //worker.sendToRespCh(resp, ch, true) batchCopResps, batchedRemains, err := worker.handleBatchCopResponse(bo, rpcCtx, pbResp, task.batchTaskList) if err != nil { - return nil, err + return err } taskResp.remains = append(taskResp.remains, batchedRemains...) taskResp.batchRespList = batchCopResps - return taskResp, nil + return nil } -func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, rpcCtx *tikv.RPCContext, taskResp *copTaskResponse, task *copTask) (*copTaskResponse, error) { +func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, rpcCtx *tikv.RPCContext, taskResp *copTaskResponse, task *copTask) error { if len(task.batchTaskList) == 0 { - return taskResp, nil + return nil } batchedTasks := task.batchTaskList task.batchTaskList = nil batchCopResps, batchedRemains, err := worker.handleBatchCopResponse(bo, rpcCtx, taskResp.resp.pbResp, batchedTasks) if err != nil { - return nil, err + return err } taskResp.remains = append(taskResp.remains, batchedRemains...) taskResp.batchRespList = batchCopResps - return taskResp, nil + return nil } // handle the batched cop response. From 05253f474c4fab5ef5f72e281aba54abf92ea97a Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 10:59:16 +0800 Subject: [PATCH 32/47] refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 91698b0f7ebc9..756c227204b3e 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1409,7 +1409,7 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask) (* // Handles the response for non-paging copTask. err = worker.handleCopResponse(bo, rpcCtx, taskResp, cacheKey, cacheValue, task, costTime) } - if req.ReadType != "" && taskResp != nil { + if req.ReadType != "" { for _, remain := range taskResp.remains { remain.firstReadType = req.ReadType } From c652cf9749c7c53088c2159393fd600a362e1c74 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 11:17:48 +0800 Subject: [PATCH 33/47] refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 68 ++++++++++++++++------------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 756c227204b3e..ec2b76e26b62f 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1250,6 +1250,11 @@ func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, curTask := remainTasks[0] bo := chooseBackoffer(ctx, backoffermap, curTask, worker) taskResp, err := worker.handleTaskOnce(bo, curTask) + if err != nil { + resp := &copResponse{err: errors.Trace(err)} + worker.sendToRespCh(resp, respCh, true) + return + } if taskResp != nil { if taskResp.resp != nil { worker.sendToRespCh(taskResp.resp, respCh, true) @@ -1258,11 +1263,6 @@ func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, worker.sendToRespCh(resp, respCh, true) } } - if err != nil { - resp := &copResponse{err: errors.Trace(err)} - worker.sendToRespCh(resp, respCh, true) - return - } if worker.finished() { break } @@ -1527,8 +1527,7 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R return errors.Trace(err) } // We may meet RegionError at the first packet, but not during visiting the stream. - var err error - taskResp.remains, err = buildCopTasks(bo, task.ranges, &buildCopTaskOpt{ + remains, err := buildCopTasks(bo, task.ranges, &buildCopTaskOpt{ req: worker.req, cache: worker.store.GetRegionCache(), respChan: false, @@ -1538,6 +1537,7 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R if err != nil { return err } + taskResp.remains = append(taskResp.remains, remains...) return worker.handleBatchRemainsOnErr(bo, rpcCtx, taskResp, task) } if lockErr := resp.pbResp.GetLocked(); lockErr != nil { @@ -1585,15 +1585,8 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R return err } - pbResp := resp.pbResp //worker.sendToRespCh(resp, ch, true) - batchCopResps, batchedRemains, err := worker.handleBatchCopResponse(bo, rpcCtx, pbResp, task.batchTaskList) - if err != nil { - return err - } - taskResp.remains = append(taskResp.remains, batchedRemains...) - taskResp.batchRespList = batchCopResps - return nil + return worker.handleBatchCopResponse(bo, rpcCtx, taskResp, task.batchTaskList) } func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, rpcCtx *tikv.RPCContext, taskResp *copTaskResponse, task *copTask) error { @@ -1602,24 +1595,20 @@ func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, rpcCtx * } batchedTasks := task.batchTaskList task.batchTaskList = nil - batchCopResps, batchedRemains, err := worker.handleBatchCopResponse(bo, rpcCtx, taskResp.resp.pbResp, batchedTasks) - if err != nil { - return err - } - taskResp.remains = append(taskResp.remains, batchedRemains...) - taskResp.batchRespList = batchCopResps - return nil + return worker.handleBatchCopResponse(bo, rpcCtx, taskResp, batchedTasks) } // handle the batched cop response. // tasks will be changed, so the input tasks should not be used after calling this function. -func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *coprocessor.Response, - tasks map[uint64]*batchedCopTask) (batchCopResps []*copResponse, remainTasks []*copTask, err error) { - if len(tasks) == 0 { - return nil, nil, nil +func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, taskResp *copTaskResponse, + tasks map[uint64]*batchedCopTask) (err error) { + if len(tasks) == 0 || taskResp == nil || taskResp.resp == nil { + return nil } batchedNum := len(tasks) busyThresholdFallback := false + var batchRespList []*copResponse + var remainTasks []*copTask defer func() { if err != nil { return @@ -1628,6 +1617,8 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t worker.storeBatchedNum.Add(uint64(batchedNum - len(remainTasks))) worker.storeBatchedFallbackNum.Add(uint64(len(remainTasks))) } + taskResp.remains = append(taskResp.remains, remainTasks...) + taskResp.batchRespList = batchRespList }() appendRemainTasks := func(tasks ...*copTask) { if remainTasks == nil { @@ -1643,13 +1634,14 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t Addr: rpcCtx.Addr, } } + resp := taskResp.resp.pbResp batchResps := resp.GetBatchResponses() - batchCopResps = make([]*copResponse, 0, len(batchResps)) + batchRespList = make([]*copResponse, 0, len(batchResps)) for _, batchResp := range batchResps { taskID := batchResp.GetTaskId() batchedTask, ok := tasks[taskID] if !ok { - return nil, nil, errors.Errorf("task id %d not found", batchResp.GetTaskId()) + return errors.Errorf("task id %d not found", batchResp.GetTaskId()) } delete(tasks, taskID) resp := &copResponse{ @@ -1666,7 +1658,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t errStr := fmt.Sprintf("region_id:%v, region_ver:%v, store_type:%s, peer_addr:%s, error:%s", task.region.GetID(), task.region.GetVer(), task.storeType.Name(), task.storeAddr, regionErr.String()) if err := bo.Backoff(tikv.BoRegionMiss(), errors.New(errStr)); err != nil { - return nil, nil, errors.Trace(err) + return errors.Trace(err) } remains, err := buildCopTasks(bo, task.ranges, &buildCopTaskOpt{ req: worker.req, @@ -1676,7 +1668,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t ignoreTiKVClientReadTimeout: true, }) if err != nil { - return nil, nil, err + return err } appendRemainTasks(remains...) continue @@ -1684,7 +1676,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t //TODO: handle locks in batch if lockErr := batchResp.GetLocked(); lockErr != nil { if err := worker.handleLockErr(bo, resp.pbResp.GetLocked(), task); err != nil { - return nil, nil, err + return err } task.meetLockFallback = true appendRemainTasks(task) @@ -1710,15 +1702,15 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t zap.String("storeAddr", task.storeAddr), zap.Error(err)) if strings.Contains(err.Error(), "write conflict") { - return nil, nil, kv.ErrWriteConflict.FastGen("%s", otherErr) + return kv.ErrWriteConflict.FastGen("%s", otherErr) } - return nil, nil, errors.Trace(err) + return errors.Trace(err) } if err := worker.handleCollectExecutionInfo(bo, dummyRPCCtx, resp); err != nil { - return nil, nil, err + return err } //worker.sendToRespCh(resp, ch, true) - batchCopResps = append(batchCopResps, resp) + batchRespList = append(batchRespList, resp) } for _, t := range tasks { task := t.task @@ -1744,7 +1736,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t if regionErr := resp.GetRegionError(); regionErr != nil && regionErr.ServerIsBusy != nil && regionErr.ServerIsBusy.EstimatedWaitMs > 0 && len(remainTasks) != 0 { if len(batchResps) != 0 { - return nil, nil, errors.New("store batched coprocessor with server is busy error shouldn't contain responses") + return errors.New("store batched coprocessor with server is busy error shouldn't contain responses") } busyThresholdFallback = true handler := newBatchTaskBuilder(bo, worker.req, worker.store.GetRegionCache(), kv.ReplicaReadFollower) @@ -1752,12 +1744,12 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t // do not set busy threshold again. task.busyThreshold = 0 if err = handler.handle(task); err != nil { - return nil, nil, err + return err } } remainTasks = handler.build() } - return batchCopResps, remainTasks, nil + return nil } func (worker *copIteratorWorker) handleLockErr(bo *Backoffer, lockErr *kvrpcpb.LockInfo, task *copTask) error { From 0854b6eb6eadb3ade9eda50d2e6227c348049859 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 11:26:38 +0800 Subject: [PATCH 34/47] remove lite worker Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 72 ++--------------------------------- 1 file changed, 3 insertions(+), 69 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index ec2b76e26b62f..9755608471ce2 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -103,7 +103,7 @@ func (c *CopClient) Send(ctx context.Context, req *kv.Request, variables any, op if ctx.Value(util.RUDetailsCtxKey) == nil { ctx = context.WithValue(ctx, util.RUDetailsCtxKey, util.NewRUDetails()) } - it.open(ctx, option.TryCopLiteWorker) + it.open(ctx) return it } @@ -678,7 +678,6 @@ type copIterator struct { req *kv.Request concurrency int smallTaskConcurrency int - liteWorker *liteCopIteratorWorker finishCh chan struct{} // If keepOrder, results are stored in copTask.respChan, read them out one by one. @@ -720,13 +719,6 @@ type copIterator struct { stats *copIteratorRuntimeStats } -type liteCopIteratorWorker struct { - // ctx contains some info(such as rpc interceptor(WithSQLKvExecCounterInterceptor)), it is used for handle cop task later. - ctx context.Context - worker *copIteratorWorker - batchCopRespList []*copResponse -} - // copIteratorWorker receives tasks from copIteratorTaskSender, handles tasks and sends the copResponse to respChan. type copIteratorWorker struct { taskCh <-chan *copTask @@ -865,14 +857,7 @@ func (worker *copIteratorWorker) run(ctx context.Context) { } // open starts workers and sender goroutines. -func (it *copIterator) open(ctx context.Context, tryCopLiteWorker *uint32) { - //if len(it.tasks) == 1 && tryCopLiteWorker != nil && atomic.CompareAndSwapUint32(tryCopLiteWorker, 0, 1) { - // it.liteWorker = &liteCopIteratorWorker{ - // ctx: ctx, - // worker: newCopIteratorWorker(it, nil), - // } - // return - //} +func (it *copIterator) open(ctx context.Context) { taskCh := make(chan *copTask, 1) it.wg.Add(it.concurrency + it.smallTaskConcurrency) var smallTaskCh chan *copTask @@ -1096,15 +1081,7 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { // Otherwise all responses are returned from a single channel. failpoint.InjectCall("CtxCancelBeforeReceive", ctx) - if it.liteWorker != nil { - resp = it.liteSendReq(ctx) - if resp == nil { - it.actionOnExceed.close() - return nil, nil - } - it.actionOnExceed.destroyTokenIfNeeded(func() {}) - memTrackerConsumeResp(it.memTracker, resp) - } else if it.respChan != nil { + if it.respChan != nil { // Get next fetched resp from chan resp, ok, closed = it.recvFromRespCh(ctx, it.respChan) if !ok || closed { @@ -1153,49 +1130,6 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { return resp, nil } -func (it *copIterator) liteSendReq(ctx context.Context) (resp *copResponse) { - defer func() { - r := recover() - if r != nil { - logutil.Logger(ctx).Error("copIteratorWork meet panic", - zap.Any("r", r), - zap.Stack("stack trace")) - resp = &copResponse{err: util2.GetRecoverError(r)} - } - }() - - if len(it.liteWorker.batchCopRespList) > 0 { - resp := it.liteWorker.batchCopRespList[0] - it.liteWorker.batchCopRespList = it.liteWorker.batchCopRespList[1:] - return resp - } - - worker := it.liteWorker.worker - backoffermap := make(map[uint64]*Backoffer) - for len(it.tasks) > 0 { - curTask := it.tasks[0] - bo := chooseBackoffer(it.liteWorker.ctx, backoffermap, curTask, worker) - taskResp, err := worker.handleTaskOnce(bo, curTask) - if err != nil || taskResp == nil { - resp = &copResponse{err: errors.Trace(err)} - worker.checkRespOOM(resp, true) - return resp - } - if len(taskResp.batchRespList) > 0 { - it.tasks = append(taskResp.remains, it.tasks[1:]...) - } else { - it.tasks = it.tasks[1:] - } - resp = taskResp.resp - if resp != nil { - worker.checkRespOOM(resp, true) - it.liteWorker.batchCopRespList = taskResp.batchRespList - return resp - } - } - return nil -} - // HasUnconsumedCopRuntimeStats indicate whether has unconsumed CopRuntimeStats. type HasUnconsumedCopRuntimeStats interface { // CollectUnconsumedCopRuntimeStats returns unconsumed CopRuntimeStats. From 53248331bff3e666ad32d184e455b0cc325a1455 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 11:30:58 +0800 Subject: [PATCH 35/47] tiny fix Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 9755608471ce2..a862a52882f85 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1200,7 +1200,7 @@ func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, if worker.finished() { break } - if len(taskResp.remains) > 0 { + if taskResp != nil && len(taskResp.remains) > 0 { remainTasks = append(taskResp.remains, remainTasks[1:]...) } else { remainTasks = remainTasks[1:] From 2e6c5c782193a747a5dee888e664f5aeb5c1ce57 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 11:39:17 +0800 Subject: [PATCH 36/47] fix ci lint Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index a862a52882f85..c20006b338146 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1541,7 +1541,9 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t } batchedNum := len(tasks) busyThresholdFallback := false - var batchRespList []*copResponse + resp := taskResp.resp.pbResp + batchResps := resp.GetBatchResponses() + batchRespList := make([]*copResponse, 0, len(batchResps)) var remainTasks []*copTask defer func() { if err != nil { @@ -1568,8 +1570,6 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t Addr: rpcCtx.Addr, } } - resp := taskResp.resp.pbResp - batchResps := resp.GetBatchResponses() batchRespList = make([]*copResponse, 0, len(batchResps)) for _, batchResp := range batchResps { taskID := batchResp.GetTaskId() From c2e8f012cf9d16aa9e2af73148ffc251ff3e1a9b Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 15:42:54 +0800 Subject: [PATCH 37/47] refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 133 +++++++++++++++++++--------------- 1 file changed, 74 insertions(+), 59 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index c20006b338146..b96cb55d6f895 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -761,7 +761,7 @@ type copResponse struct { respTime time.Duration } -type copTaskResponse struct { +type copTaskResult struct { resp *copResponse batchRespList []*copResponse remains []*copTask @@ -1210,7 +1210,7 @@ func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, // handleTaskOnce handles single copTask, successful results are send to channel. // If error happened, returns error. If region split or meet lock, returns the remain tasks. -func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask) (*copTaskResponse, error) { +func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask) (*copTaskResult, error) { failpoint.Inject("handleTaskOnceError", func(val failpoint.Value) { if val.(bool) { failpoint.Return(nil, errors.New("mock handleTaskOnce error")) @@ -1336,19 +1336,19 @@ func (worker *copIteratorWorker) handleTaskOnce(bo *Backoffer, task *copTask) (* tidbmetrics.DistSQLCoprRespBodySize.WithLabelValues(storeAddr).Observe(float64(len(copResp.Data))) } - taskResp := &copTaskResponse{resp: &copResponse{pbResp: copResp}} + var result *copTaskResult if worker.req.Paging.Enable { - err = worker.handleCopPagingResult(bo, rpcCtx, taskResp, cacheKey, cacheValue, task, costTime) + result, err = worker.handleCopPagingResult(bo, rpcCtx, &copResponse{pbResp: copResp}, cacheKey, cacheValue, task, costTime) } else { // Handles the response for non-paging copTask. - err = worker.handleCopResponse(bo, rpcCtx, taskResp, cacheKey, cacheValue, task, costTime) + result, err = worker.handleCopResponse(bo, rpcCtx, &copResponse{pbResp: copResp}, cacheKey, cacheValue, task, costTime) } - if req.ReadType != "" { - for _, remain := range taskResp.remains { + if req.ReadType != "" && result != nil { + for _, remain := range result.remains { remain.firstReadType = req.ReadType } } - return taskResp, err + return result, err } const ( @@ -1410,42 +1410,40 @@ func appendScanDetail(logStr string, columnFamily string, scanInfo *kvrpcpb.Scan return logStr } -func (worker *copIteratorWorker) handleCopPagingResult(bo *Backoffer, rpcCtx *tikv.RPCContext, taskResp *copTaskResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) error { - err := worker.handleCopResponse(bo, rpcCtx, taskResp, cacheKey, cacheValue, task, costTime) - if err != nil || (taskResp != nil && len(taskResp.remains) != 0) { +func (worker *copIteratorWorker) handleCopPagingResult(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) (*copTaskResult, error) { + result, err := worker.handleCopResponse(bo, rpcCtx, resp, cacheKey, cacheValue, task, costTime) + if err != nil || (result != nil && len(result.remains) != 0) { // If there is region error or lock error, keep the paging size and retry. - if taskResp != nil { - for _, remainedTask := range taskResp.remains { - remainedTask.pagingSize = task.pagingSize - } + for _, remainedTask := range result.remains { + remainedTask.pagingSize = task.pagingSize } - return errors.Trace(err) + return result, errors.Trace(err) } - pagingRange := taskResp.resp.pbResp.Range + pagingRange := resp.pbResp.Range // only paging requests need to calculate the next ranges if pagingRange == nil { // If the storage engine doesn't support paging protocol, it should have return all the region data. // So we finish here. - return nil + return result, nil } // calculate next ranges and grow the paging size task.ranges = worker.calculateRemain(task.ranges, pagingRange, worker.req.Desc) if task.ranges.Len() == 0 { - return nil + return result, nil } task.pagingSize = paging.GrowPagingSize(task.pagingSize, worker.req.Paging.MaxPagingSize) - taskResp.remains = append(taskResp.remains, task) - return nil + result.remains = []*copTask{task} + return result, nil } // handleCopResponse checks coprocessor Response for region split and lock, // returns more tasks when that happens, or handles the response if no error. // if we're handling coprocessor paging response, lastRange is the range of last // successful response, otherwise it's nil. -func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, taskResp *copTaskResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) error { - resp := taskResp.resp +func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) (*copTaskResult, error) { + result := &copTaskResult{} if ver := resp.pbResp.GetLatestBucketsVersion(); task.bucketsVer < ver { worker.store.GetRegionCache().UpdateBucketsIfNeeded(task.region, ver) } @@ -1453,12 +1451,13 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R if rpcCtx != nil && task.storeType == kv.TiDB { resp.err = errors.Errorf("error: %v", regionErr) //worker.sendToRespCh(resp, ch, true) - return nil + result.resp = resp + return result, nil } errStr := fmt.Sprintf("region_id:%v, region_ver:%v, store_type:%s, peer_addr:%s, error:%s", task.region.GetID(), task.region.GetVer(), task.storeType.Name(), task.storeAddr, regionErr.String()) if err := bo.Backoff(tikv.BoRegionMiss(), errors.New(errStr)); err != nil { - return errors.Trace(err) + return nil, errors.Trace(err) } // We may meet RegionError at the first packet, but not during visiting the stream. remains, err := buildCopTasks(bo, task.ranges, &buildCopTaskOpt{ @@ -1469,17 +1468,31 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R ignoreTiKVClientReadTimeout: true, }) if err != nil { - return err + //result.remains = remains + return nil, err + } + //result.remains = append(result.remains, remains...) + batchRespList, batchRemainTasks, err := worker.handleBatchRemainsOnErr(bo, rpcCtx, resp.pbResp, task) + if err != nil { + return nil, err } - taskResp.remains = append(taskResp.remains, remains...) - return worker.handleBatchRemainsOnErr(bo, rpcCtx, taskResp, task) + result.batchRespList = batchRespList + result.remains = append(remains, batchRemainTasks...) + return result, err } if lockErr := resp.pbResp.GetLocked(); lockErr != nil { if err := worker.handleLockErr(bo, lockErr, task); err != nil { - return err + return nil, err } task.meetLockFallback = true - return worker.handleBatchRemainsOnErr(bo, rpcCtx, taskResp, task) + remains := []*copTask{task} + batchRespList, batchRemainTasks, err := worker.handleBatchRemainsOnErr(bo, rpcCtx, resp.pbResp, task) + if err != nil { + return nil, err + } + result.batchRespList = batchRespList + result.remains = append(remains, batchRemainTasks...) + return result, err } if otherErr := resp.pbResp.GetOtherError(); otherErr != "" { err := errors.Errorf("other error: %s", otherErr) @@ -1500,9 +1513,9 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R zap.String("storeAddr", task.storeAddr), zap.String("error", otherErr)) if strings.Contains(err.Error(), "write conflict") { - return kv.ErrWriteConflict.FastGen("%s", otherErr) + return nil, kv.ErrWriteConflict.FastGen("%s", otherErr) } - return errors.Trace(err) + return nil, errors.Trace(err) } // When the request is using paging API, the `Range` is not nil. if resp.pbResp.Range != nil { @@ -1511,40 +1524,44 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R resp.startKey = task.ranges.At(0).StartKey } if err := worker.handleCollectExecutionInfo(bo, rpcCtx, resp); err != nil { - return err + return nil, err } resp.respTime = costTime if err := worker.handleCopCache(task, resp, cacheKey, cacheValue); err != nil { - return err + return nil, err } //worker.sendToRespCh(resp, ch, true) - return worker.handleBatchCopResponse(bo, rpcCtx, taskResp, task.batchTaskList) + result.resp = resp + batchRespList, batchRemainTasks, err := worker.handleBatchCopResponse(bo, rpcCtx, resp.pbResp, task.batchTaskList) + if err != nil { + return result, err + } + result.batchRespList = batchRespList + result.remains = batchRemainTasks + return result, nil } -func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, rpcCtx *tikv.RPCContext, taskResp *copTaskResponse, task *copTask) error { +func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *coprocessor.Response, task *copTask) (batchRespList []*copResponse, remainTasks []*copTask, err error) { if len(task.batchTaskList) == 0 { - return nil + return nil, nil, nil } batchedTasks := task.batchTaskList task.batchTaskList = nil - return worker.handleBatchCopResponse(bo, rpcCtx, taskResp, batchedTasks) + return worker.handleBatchCopResponse(bo, rpcCtx, resp, batchedTasks) } // handle the batched cop response. // tasks will be changed, so the input tasks should not be used after calling this function. -func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, taskResp *copTaskResponse, - tasks map[uint64]*batchedCopTask) (err error) { - if len(tasks) == 0 || taskResp == nil || taskResp.resp == nil { - return nil +func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *coprocessor.Response, + tasks map[uint64]*batchedCopTask) (batchRespList []*copResponse, remainTasks []*copTask, err error) { + if len(tasks) == 0 { + return nil, nil, nil } batchedNum := len(tasks) busyThresholdFallback := false - resp := taskResp.resp.pbResp batchResps := resp.GetBatchResponses() - batchRespList := make([]*copResponse, 0, len(batchResps)) - var remainTasks []*copTask defer func() { if err != nil { return @@ -1553,8 +1570,6 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t worker.storeBatchedNum.Add(uint64(batchedNum - len(remainTasks))) worker.storeBatchedFallbackNum.Add(uint64(len(remainTasks))) } - taskResp.remains = append(taskResp.remains, remainTasks...) - taskResp.batchRespList = batchRespList }() appendRemainTasks := func(tasks ...*copTask) { if remainTasks == nil { @@ -1575,7 +1590,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t taskID := batchResp.GetTaskId() batchedTask, ok := tasks[taskID] if !ok { - return errors.Errorf("task id %d not found", batchResp.GetTaskId()) + return batchRespList, nil, errors.Errorf("task id %d not found", batchResp.GetTaskId()) } delete(tasks, taskID) resp := &copResponse{ @@ -1592,7 +1607,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t errStr := fmt.Sprintf("region_id:%v, region_ver:%v, store_type:%s, peer_addr:%s, error:%s", task.region.GetID(), task.region.GetVer(), task.storeType.Name(), task.storeAddr, regionErr.String()) if err := bo.Backoff(tikv.BoRegionMiss(), errors.New(errStr)); err != nil { - return errors.Trace(err) + return batchRespList, nil, errors.Trace(err) } remains, err := buildCopTasks(bo, task.ranges, &buildCopTaskOpt{ req: worker.req, @@ -1602,7 +1617,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t ignoreTiKVClientReadTimeout: true, }) if err != nil { - return err + return batchRespList, nil, err } appendRemainTasks(remains...) continue @@ -1610,7 +1625,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t //TODO: handle locks in batch if lockErr := batchResp.GetLocked(); lockErr != nil { if err := worker.handleLockErr(bo, resp.pbResp.GetLocked(), task); err != nil { - return err + return batchRespList, nil, err } task.meetLockFallback = true appendRemainTasks(task) @@ -1636,12 +1651,12 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t zap.String("storeAddr", task.storeAddr), zap.Error(err)) if strings.Contains(err.Error(), "write conflict") { - return kv.ErrWriteConflict.FastGen("%s", otherErr) + return batchRespList, nil, kv.ErrWriteConflict.FastGen("%s", otherErr) } - return errors.Trace(err) + return batchRespList, nil, errors.Trace(err) } if err := worker.handleCollectExecutionInfo(bo, dummyRPCCtx, resp); err != nil { - return err + return batchRespList, nil, err } //worker.sendToRespCh(resp, ch, true) batchRespList = append(batchRespList, resp) @@ -1670,7 +1685,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t if regionErr := resp.GetRegionError(); regionErr != nil && regionErr.ServerIsBusy != nil && regionErr.ServerIsBusy.EstimatedWaitMs > 0 && len(remainTasks) != 0 { if len(batchResps) != 0 { - return errors.New("store batched coprocessor with server is busy error shouldn't contain responses") + return batchRespList, nil, errors.New("store batched coprocessor with server is busy error shouldn't contain responses") } busyThresholdFallback = true handler := newBatchTaskBuilder(bo, worker.req, worker.store.GetRegionCache(), kv.ReplicaReadFollower) @@ -1678,12 +1693,12 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t // do not set busy threshold again. task.busyThreshold = 0 if err = handler.handle(task); err != nil { - return err + return batchRespList, nil, err } } remainTasks = handler.build() } - return nil + return batchRespList, remainTasks, nil } func (worker *copIteratorWorker) handleLockErr(bo *Backoffer, lockErr *kvrpcpb.LockInfo, task *copTask) error { @@ -1917,7 +1932,7 @@ type copIteratorRuntimeStats struct { stats []*CopRuntimeStats } -func (worker *copIteratorWorker) handleTiDBSendReqErr(err error, task *copTask) (*copTaskResponse, error) { +func (worker *copIteratorWorker) handleTiDBSendReqErr(err error, task *copTask) (*copTaskResult, error) { errCode := errno.ErrUnknown errMsg := err.Error() if terror.ErrorEqual(err, derr.ErrTiKVServerTimeout) { @@ -1940,7 +1955,7 @@ func (worker *copIteratorWorker) handleTiDBSendReqErr(err error, task *copTask) if err != nil { return nil, errors.Trace(err) } - resp := &copTaskResponse{ + resp := &copTaskResult{ resp: &copResponse{ pbResp: &coprocessor.Response{ Data: data, From 0fc1818dbe5308dab6c501e23393713f2baf29d2 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 17:01:12 +0800 Subject: [PATCH 38/47] refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 60 ++++++++++++++--------------------- 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index b96cb55d6f895..4edf5e8e4948e 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1183,25 +1183,25 @@ func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, for len(remainTasks) > 0 { curTask := remainTasks[0] bo := chooseBackoffer(ctx, backoffermap, curTask, worker) - taskResp, err := worker.handleTaskOnce(bo, curTask) + result, err := worker.handleTaskOnce(bo, curTask) if err != nil { resp := &copResponse{err: errors.Trace(err)} worker.sendToRespCh(resp, respCh, true) return } - if taskResp != nil { - if taskResp.resp != nil { - worker.sendToRespCh(taskResp.resp, respCh, true) + if result != nil { + if result.resp != nil { + worker.sendToRespCh(result.resp, respCh, true) } - for _, resp := range taskResp.batchRespList { + for _, resp := range result.batchRespList { worker.sendToRespCh(resp, respCh, true) } } if worker.finished() { break } - if taskResp != nil && len(taskResp.remains) > 0 { - remainTasks = append(taskResp.remains, remainTasks[1:]...) + if result != nil && len(result.remains) > 0 { + remainTasks = append(result.remains, remainTasks[1:]...) } else { remainTasks = remainTasks[1:] } @@ -1443,16 +1443,13 @@ func (worker *copIteratorWorker) handleCopPagingResult(bo *Backoffer, rpcCtx *ti // if we're handling coprocessor paging response, lastRange is the range of last // successful response, otherwise it's nil. func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) (*copTaskResult, error) { - result := &copTaskResult{} if ver := resp.pbResp.GetLatestBucketsVersion(); task.bucketsVer < ver { worker.store.GetRegionCache().UpdateBucketsIfNeeded(task.region, ver) } if regionErr := resp.pbResp.GetRegionError(); regionErr != nil { if rpcCtx != nil && task.storeType == kv.TiDB { resp.err = errors.Errorf("error: %v", regionErr) - //worker.sendToRespCh(resp, ch, true) - result.resp = resp - return result, nil + return &copTaskResult{resp: resp}, nil } errStr := fmt.Sprintf("region_id:%v, region_ver:%v, store_type:%s, peer_addr:%s, error:%s", task.region.GetID(), task.region.GetVer(), task.storeType.Name(), task.storeAddr, regionErr.String()) @@ -1468,31 +1465,16 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R ignoreTiKVClientReadTimeout: true, }) if err != nil { - //result.remains = remains return nil, err } - //result.remains = append(result.remains, remains...) - batchRespList, batchRemainTasks, err := worker.handleBatchRemainsOnErr(bo, rpcCtx, resp.pbResp, task) - if err != nil { - return nil, err - } - result.batchRespList = batchRespList - result.remains = append(remains, batchRemainTasks...) - return result, err + return worker.handleBatchRemainsOnErr(bo, rpcCtx, remains, resp.pbResp, task) } if lockErr := resp.pbResp.GetLocked(); lockErr != nil { if err := worker.handleLockErr(bo, lockErr, task); err != nil { return nil, err } task.meetLockFallback = true - remains := []*copTask{task} - batchRespList, batchRemainTasks, err := worker.handleBatchRemainsOnErr(bo, rpcCtx, resp.pbResp, task) - if err != nil { - return nil, err - } - result.batchRespList = batchRespList - result.remains = append(remains, batchRemainTasks...) - return result, err + return worker.handleBatchRemainsOnErr(bo, rpcCtx, []*copTask{task}, resp.pbResp, task) } if otherErr := resp.pbResp.GetOtherError(); otherErr != "" { err := errors.Errorf("other error: %s", otherErr) @@ -1532,8 +1514,7 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R return nil, err } - //worker.sendToRespCh(resp, ch, true) - result.resp = resp + result := &copTaskResult{resp: resp} batchRespList, batchRemainTasks, err := worker.handleBatchCopResponse(bo, rpcCtx, resp.pbResp, task.batchTaskList) if err != nil { return result, err @@ -1543,13 +1524,20 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R return result, nil } -func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *coprocessor.Response, task *copTask) (batchRespList []*copResponse, remainTasks []*copTask, err error) { +func (worker *copIteratorWorker) handleBatchRemainsOnErr(bo *Backoffer, rpcCtx *tikv.RPCContext, remains []*copTask, resp *coprocessor.Response, task *copTask) (*copTaskResult, error) { if len(task.batchTaskList) == 0 { - return nil, nil, nil + return &copTaskResult{remains: remains}, nil } batchedTasks := task.batchTaskList task.batchTaskList = nil - return worker.handleBatchCopResponse(bo, rpcCtx, resp, batchedTasks) + batchRespList, remainTasks, err := worker.handleBatchCopResponse(bo, rpcCtx, resp, batchedTasks) + if err != nil { + return nil, err + } + return &copTaskResult{ + batchRespList: batchRespList, + remains: append(remains, remainTasks...), + }, nil } // handle the batched cop response. @@ -1955,16 +1943,14 @@ func (worker *copIteratorWorker) handleTiDBSendReqErr(err error, task *copTask) if err != nil { return nil, errors.Trace(err) } - resp := &copTaskResult{ + return &copTaskResult{ resp: &copResponse{ pbResp: &coprocessor.Response{ Data: data, }, detail: &CopRuntimeStats{}, }, - } - //worker.sendToRespCh(resp, ch, true) - return resp, nil + }, nil } // calculateRetry splits the input ranges into two, and take one of them according to desc flag. From e382b9c99490f73858b7579be2eda879fb02a0bf Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 18:30:17 +0800 Subject: [PATCH 39/47] add test Signed-off-by: crazycs520 --- pkg/store/copr/copr_test/coprocessor_test.go | 27 ++++++++++++++++++++ pkg/store/mockstore/unistore/rpc.go | 5 ++++ 2 files changed, 32 insertions(+) diff --git a/pkg/store/copr/copr_test/coprocessor_test.go b/pkg/store/copr/copr_test/coprocessor_test.go index ae2236958396e..bb7fd945b19e2 100644 --- a/pkg/store/copr/copr_test/coprocessor_test.go +++ b/pkg/store/copr/copr_test/coprocessor_test.go @@ -19,15 +19,18 @@ import ( "context" "encoding/json" "errors" + "fmt" "testing" "time" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/meta_storagepb" rmpb "github.com/pingcap/kvproto/pkg/resource_manager" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/resourcegroup/runaway" "github.com/pingcap/tidb/pkg/store/copr" "github.com/pingcap/tidb/pkg/store/mockstore" + "github.com/pingcap/tidb/pkg/testkit" "github.com/stretchr/testify/require" "github.com/tikv/client-go/v2/testutils" pd "github.com/tikv/pd/client" @@ -287,3 +290,27 @@ func TestBuildCopIteratorWithRunawayChecker(t *testing.T) { require.Equal(t, concurrency, 1) require.Equal(t, smallTaskConcurrency, 0) } + +func TestCoprocessorReadcs(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + // Test tidb_distsql_scan_concurrency for partition table. + tk.MustExec("create table t1 (id int key, b int, c int, index idx_b(b)) partition by hash(id) partitions 10;") + for i := 0; i < 10; i++ { + tk.MustExec(fmt.Sprintf("insert into t1 values (%v, %v, %v)", i, i, i)) + } + + tk.MustExec("set @@tidb_distsql_scan_concurrency=15") + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/store/mockstore/unistore/unistoreRPCSlowCop", `return(200)`)) + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + tk.MustQueryWithContext(ctx, "select sum(c) from t1 use index (idx_b) where b < 10;") + cancel() + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/store/mockstore/unistore/unistoreRPCSlowCop")) + + // Test query after split region. + tk.MustExec("create table t2 (id int key, b int, c int, index idx_b(b));") + tk.MustExec("insert into t2 select * from t1") + tk.MustQuery("split table t2 by (0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);").Check(testkit.Rows("11 1")) + tk.MustQuery("select sum(c) from t2 use index (idx_b) where b < 10;") +} diff --git a/pkg/store/mockstore/unistore/rpc.go b/pkg/store/mockstore/unistore/rpc.go index b255138dff63b..4c2f8ef19f2d3 100644 --- a/pkg/store/mockstore/unistore/rpc.go +++ b/pkg/store/mockstore/unistore/rpc.go @@ -97,6 +97,11 @@ func (c *RPCClient) SendRequest(ctx context.Context, addr string, req *tikvrpc.R time.Sleep(time.Duration(val.(int) * int(time.Millisecond))) failpoint.Return(tikvrpc.GenRegionErrorResp(req, &errorpb.Error{Message: "Deadline is exceeded"})) }) + failpoint.Inject("unistoreRPCSlowCop", func(val failpoint.Value) { + if req.Type == tikvrpc.CmdCop { + time.Sleep(time.Duration(val.(int) * int(time.Millisecond))) + } + }) select { case <-ctx.Done(): From 53010eec04db17c76fb3e3532585a8ea1db738d4 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 19:13:43 +0800 Subject: [PATCH 40/47] fix ci lint Signed-off-by: crazycs520 --- pkg/store/copr/copr_test/BUILD.bazel | 4 +++- pkg/store/copr/copr_test/coprocessor_test.go | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/store/copr/copr_test/BUILD.bazel b/pkg/store/copr/copr_test/BUILD.bazel index a0fccf46853c9..3953501f6448c 100644 --- a/pkg/store/copr/copr_test/BUILD.bazel +++ b/pkg/store/copr/copr_test/BUILD.bazel @@ -8,15 +8,17 @@ go_test( "main_test.go", ], flaky = True, - shard_count = 3, + shard_count = 4, deps = [ "//pkg/config", "//pkg/kv", "//pkg/resourcegroup/runaway", "//pkg/store/copr", "//pkg/store/mockstore", + "//pkg/testkit", "//pkg/testkit/testmain", "//pkg/testkit/testsetup", + "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_kvproto//pkg/meta_storagepb", "@com_github_pingcap_kvproto//pkg/resource_manager", "@com_github_stretchr_testify//require", diff --git a/pkg/store/copr/copr_test/coprocessor_test.go b/pkg/store/copr/copr_test/coprocessor_test.go index bb7fd945b19e2..0ce0ce0ccd0f2 100644 --- a/pkg/store/copr/copr_test/coprocessor_test.go +++ b/pkg/store/copr/copr_test/coprocessor_test.go @@ -291,7 +291,7 @@ func TestBuildCopIteratorWithRunawayChecker(t *testing.T) { require.Equal(t, smallTaskConcurrency, 0) } -func TestCoprocessorReadcs(t *testing.T) { +func TestCopQuery(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) tk.MustExec("use test") From 0339fd2017a3ea01d0b54314bbb175c182b2e95a Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 19:25:47 +0800 Subject: [PATCH 41/47] Revert "remove lite worker" This reverts commit 0854b6eb6eadb3ade9eda50d2e6227c348049859. Signed-off-by: crazycs520 --- pkg/store/copr/copr_test/coprocessor_test.go | 8 +-- pkg/store/copr/coprocessor.go | 76 ++++++++++++++++++-- 2 files changed, 73 insertions(+), 11 deletions(-) diff --git a/pkg/store/copr/copr_test/coprocessor_test.go b/pkg/store/copr/copr_test/coprocessor_test.go index 0ce0ce0ccd0f2..f65ed3e64bf5e 100644 --- a/pkg/store/copr/copr_test/coprocessor_test.go +++ b/pkg/store/copr/copr_test/coprocessor_test.go @@ -295,7 +295,7 @@ func TestCopQuery(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) tk.MustExec("use test") - // Test tidb_distsql_scan_concurrency for partition table. + // Test for https://github.com/pingcap/tidb/pull/57522#discussion_r1875515863 tk.MustExec("create table t1 (id int key, b int, c int, index idx_b(b)) partition by hash(id) partitions 10;") for i := 0; i < 10; i++ { tk.MustExec(fmt.Sprintf("insert into t1 values (%v, %v, %v)", i, i, i)) @@ -307,10 +307,4 @@ func TestCopQuery(t *testing.T) { tk.MustQueryWithContext(ctx, "select sum(c) from t1 use index (idx_b) where b < 10;") cancel() require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/store/mockstore/unistore/unistoreRPCSlowCop")) - - // Test query after split region. - tk.MustExec("create table t2 (id int key, b int, c int, index idx_b(b));") - tk.MustExec("insert into t2 select * from t1") - tk.MustQuery("split table t2 by (0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);").Check(testkit.Rows("11 1")) - tk.MustQuery("select sum(c) from t2 use index (idx_b) where b < 10;") } diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 4edf5e8e4948e..e466596678b71 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -103,7 +103,7 @@ func (c *CopClient) Send(ctx context.Context, req *kv.Request, variables any, op if ctx.Value(util.RUDetailsCtxKey) == nil { ctx = context.WithValue(ctx, util.RUDetailsCtxKey, util.NewRUDetails()) } - it.open(ctx) + it.open(ctx, option.TryCopLiteWorker) return it } @@ -678,7 +678,9 @@ type copIterator struct { req *kv.Request concurrency int smallTaskConcurrency int - finishCh chan struct{} + // liteWorker uses to send cop request without start new goroutine, it is only work when tasks count is 1, and used to improve the performance of small cop query. + liteWorker *liteCopIteratorWorker + finishCh chan struct{} // If keepOrder, results are stored in copTask.respChan, read them out one by one. tasks []*copTask @@ -719,6 +721,13 @@ type copIterator struct { stats *copIteratorRuntimeStats } +type liteCopIteratorWorker struct { + // ctx contains some info(such as rpc interceptor(WithSQLKvExecCounterInterceptor)), it is used for handle cop task later. + ctx context.Context + worker *copIteratorWorker + batchCopRespList []*copResponse +} + // copIteratorWorker receives tasks from copIteratorTaskSender, handles tasks and sends the copResponse to respChan. type copIteratorWorker struct { taskCh <-chan *copTask @@ -857,7 +866,14 @@ func (worker *copIteratorWorker) run(ctx context.Context) { } // open starts workers and sender goroutines. -func (it *copIterator) open(ctx context.Context) { +func (it *copIterator) open(ctx context.Context, tryCopLiteWorker *uint32) { + if len(it.tasks) == 1 && tryCopLiteWorker != nil && atomic.CompareAndSwapUint32(tryCopLiteWorker, 0, 1) { + it.liteWorker = &liteCopIteratorWorker{ + ctx: ctx, + worker: newCopIteratorWorker(it, nil), + } + return + } taskCh := make(chan *copTask, 1) it.wg.Add(it.concurrency + it.smallTaskConcurrency) var smallTaskCh chan *copTask @@ -1081,7 +1097,15 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { // Otherwise all responses are returned from a single channel. failpoint.InjectCall("CtxCancelBeforeReceive", ctx) - if it.respChan != nil { + if it.liteWorker != nil { + resp = it.liteWorker.liteSendReq(ctx, it) + if resp == nil { + it.actionOnExceed.close() + return nil, nil + } + it.actionOnExceed.destroyTokenIfNeeded(func() {}) + memTrackerConsumeResp(it.memTracker, resp) + } else if it.respChan != nil { // Get next fetched resp from chan resp, ok, closed = it.recvFromRespCh(ctx, it.respChan) if !ok || closed { @@ -1130,6 +1154,50 @@ func (it *copIterator) Next(ctx context.Context) (kv.ResultSubset, error) { return resp, nil } +func (w *liteCopIteratorWorker) liteSendReq(ctx context.Context, it *copIterator) (resp *copResponse) { + defer func() { + r := recover() + if r != nil { + logutil.Logger(ctx).Error("copIteratorWork meet panic", + zap.Any("r", r), + zap.Stack("stack trace")) + resp = &copResponse{err: util2.GetRecoverError(r)} + } + }() + + if len(w.batchCopRespList) > 0 { + resp := w.batchCopRespList[0] + w.batchCopRespList = w.batchCopRespList[1:] + return resp + } + + worker := w.worker + backoffermap := make(map[uint64]*Backoffer) + for len(it.tasks) > 0 { + curTask := it.tasks[0] + bo := chooseBackoffer(w.ctx, backoffermap, curTask, worker) + result, err := worker.handleTaskOnce(bo, curTask) + if err != nil { + resp = &copResponse{err: errors.Trace(err)} + worker.checkRespOOM(resp, true) + return resp + } + + if result != nil && len(result.batchRespList) > 0 { + it.tasks = append(result.remains, it.tasks[1:]...) + } else { + it.tasks = it.tasks[1:] + } + if result != nil && result.resp != nil { + resp = result.resp + worker.checkRespOOM(resp, true) + w.batchCopRespList = result.batchRespList + return resp + } + } + return nil +} + // HasUnconsumedCopRuntimeStats indicate whether has unconsumed CopRuntimeStats. type HasUnconsumedCopRuntimeStats interface { // CollectUnconsumedCopRuntimeStats returns unconsumed CopRuntimeStats. From a221597bc38e6f5a168652c2ffc3b187000af496 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 20:02:18 +0800 Subject: [PATCH 42/47] refine comment Signed-off-by: crazycs520 --- pkg/store/copr/copr_test/coprocessor_test.go | 2 +- pkg/store/copr/coprocessor.go | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/store/copr/copr_test/coprocessor_test.go b/pkg/store/copr/copr_test/coprocessor_test.go index f65ed3e64bf5e..f075525b4dd4b 100644 --- a/pkg/store/copr/copr_test/coprocessor_test.go +++ b/pkg/store/copr/copr_test/coprocessor_test.go @@ -291,7 +291,7 @@ func TestBuildCopIteratorWithRunawayChecker(t *testing.T) { require.Equal(t, smallTaskConcurrency, 0) } -func TestCopQuery(t *testing.T) { +func TestQueryWithConcurrentSmallCop(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) tk.MustExec("use test") diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index e466596678b71..8f09fbe5529b1 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -868,6 +868,8 @@ func (worker *copIteratorWorker) run(ctx context.Context) { // open starts workers and sender goroutines. func (it *copIterator) open(ctx context.Context, tryCopLiteWorker *uint32) { if len(it.tasks) == 1 && tryCopLiteWorker != nil && atomic.CompareAndSwapUint32(tryCopLiteWorker, 0, 1) { + // For a query, only one `copIterator` can use `liteWorker`, otherwise it will affect the performance of multiple cop iterators executed concurrently, + // see more detail in TestQueryWithConcurrentSmallCop. it.liteWorker = &liteCopIteratorWorker{ ctx: ctx, worker: newCopIteratorWorker(it, nil), From 03f245a11dfb537fec8e6ebb0f18536a6669be40 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 23:05:55 +0800 Subject: [PATCH 43/47] fix bug Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 46 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 8f09fbe5529b1..2f3d052ca11ce 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -853,7 +853,7 @@ func (worker *copIteratorWorker) run(ctx context.Context) { if worker.respChan != nil { // When a task is finished by the worker, send a finCopResp into channel to notify the copIterator that // there is a task finished. - worker.sendToRespCh(finCopResp, worker.respChan, false) + worker.sendToRespCh(finCopResp, worker.respChan) } if task.respChan != nil { close(task.respChan) @@ -1038,8 +1038,7 @@ func (sender *copIteratorTaskSender) sendToTaskCh(t *copTask, sendTo chan<- *cop return } -func (worker *copIteratorWorker) sendToRespCh(resp *copResponse, respCh chan<- *copResponse, checkOOM bool) (exit bool) { - worker.checkRespOOM(resp, checkOOM) +func (worker *copIteratorWorker) sendToRespCh(resp *copResponse, respCh chan<- *copResponse) (exit bool) { select { case respCh <- resp: case <-worker.finishCh: @@ -1048,8 +1047,8 @@ func (worker *copIteratorWorker) sendToRespCh(resp *copResponse, respCh chan<- * return } -func (worker *copIteratorWorker) checkRespOOM(resp *copResponse, checkOOM bool) { - if worker.memTracker != nil && checkOOM { +func (worker *copIteratorWorker) checkRespOOM(resp *copResponse) { + if worker.memTracker != nil { consumed := resp.MemSize() failpoint.Inject("testRateLimitActionMockConsumeAndAssert", func(val failpoint.Value) { if val.(bool) { @@ -1167,13 +1166,12 @@ func (w *liteCopIteratorWorker) liteSendReq(ctx context.Context, it *copIterator } }() + worker := w.worker if len(w.batchCopRespList) > 0 { resp := w.batchCopRespList[0] w.batchCopRespList = w.batchCopRespList[1:] return resp } - - worker := w.worker backoffermap := make(map[uint64]*Backoffer) for len(it.tasks) > 0 { curTask := it.tasks[0] @@ -1181,18 +1179,17 @@ func (w *liteCopIteratorWorker) liteSendReq(ctx context.Context, it *copIterator result, err := worker.handleTaskOnce(bo, curTask) if err != nil { resp = &copResponse{err: errors.Trace(err)} - worker.checkRespOOM(resp, true) + worker.checkRespOOM(resp) return resp } - if result != nil && len(result.batchRespList) > 0 { + if result != nil && len(result.remains) > 0 { it.tasks = append(result.remains, it.tasks[1:]...) } else { it.tasks = it.tasks[1:] } if result != nil && result.resp != nil { resp = result.resp - worker.checkRespOOM(resp, true) w.batchCopRespList = result.batchRespList return resp } @@ -1244,8 +1241,8 @@ func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, zap.Any("r", r), zap.Stack("stack trace")) resp := &copResponse{err: util2.GetRecoverError(r)} - // if panic has happened, set checkOOM to false to avoid another panic. - worker.sendToRespCh(resp, respCh, false) + // if panic has happened, not checkRespOOM to avoid another panic. + worker.sendToRespCh(resp, respCh) } }() remainTasks := []*copTask{task} @@ -1256,15 +1253,16 @@ func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, result, err := worker.handleTaskOnce(bo, curTask) if err != nil { resp := &copResponse{err: errors.Trace(err)} - worker.sendToRespCh(resp, respCh, true) + worker.checkRespOOM(resp) + worker.sendToRespCh(resp, respCh) return } if result != nil { if result.resp != nil { - worker.sendToRespCh(result.resp, respCh, true) + worker.sendToRespCh(result.resp, respCh) } for _, resp := range result.batchRespList { - worker.sendToRespCh(resp, respCh, true) + worker.sendToRespCh(resp, respCh) } } if worker.finished() { @@ -1519,6 +1517,7 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R if regionErr := resp.pbResp.GetRegionError(); regionErr != nil { if rpcCtx != nil && task.storeType == kv.TiDB { resp.err = errors.Errorf("error: %v", regionErr) + worker.checkRespOOM(resp) return &copTaskResult{resp: resp}, nil } errStr := fmt.Sprintf("region_id:%v, region_ver:%v, store_type:%s, peer_addr:%s, error:%s", @@ -1584,6 +1583,7 @@ func (worker *copIteratorWorker) handleCopResponse(bo *Backoffer, rpcCtx *tikv.R return nil, err } + worker.checkRespOOM(resp) result := &copTaskResult{resp: resp} batchRespList, batchRemainTasks, err := worker.handleBatchCopResponse(bo, rpcCtx, resp.pbResp, task.batchTaskList) if err != nil { @@ -1716,7 +1716,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t if err := worker.handleCollectExecutionInfo(bo, dummyRPCCtx, resp); err != nil { return batchRespList, nil, err } - //worker.sendToRespCh(resp, ch, true) + worker.checkRespOOM(resp) batchRespList = append(batchRespList, resp) } for _, t := range tasks { @@ -2013,14 +2013,14 @@ func (worker *copIteratorWorker) handleTiDBSendReqErr(err error, task *copTask) if err != nil { return nil, errors.Trace(err) } - return &copTaskResult{ - resp: &copResponse{ - pbResp: &coprocessor.Response{ - Data: data, - }, - detail: &CopRuntimeStats{}, + resp := &copResponse{ + pbResp: &coprocessor.Response{ + Data: data, }, - }, nil + detail: &CopRuntimeStats{}, + } + worker.checkRespOOM(resp) + return &copTaskResult{resp: resp}, nil } // calculateRetry splits the input ranges into two, and take one of them according to desc flag. From d4f5b0b6eb4d88f163f9c750700bf7ea1bc0ecf0 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Wed, 11 Dec 2024 23:11:07 +0800 Subject: [PATCH 44/47] fix Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 2f3d052ca11ce..1eb0ad08ec752 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1188,10 +1188,16 @@ func (w *liteCopIteratorWorker) liteSendReq(ctx context.Context, it *copIterator } else { it.tasks = it.tasks[1:] } - if result != nil && result.resp != nil { - resp = result.resp - w.batchCopRespList = result.batchRespList - return resp + if result != nil { + if result.resp != nil { + w.batchCopRespList = result.batchRespList + return result.resp + } + if len(result.batchRespList) > 0 { + resp := result.batchRespList[0] + w.batchCopRespList = result.batchRespList[1:] + return resp + } } } return nil From 6355e0d03b4c7fe2d501fca2eda40ac62883b413 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Thu, 12 Dec 2024 10:51:36 +0800 Subject: [PATCH 45/47] fix bug Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 1eb0ad08ec752..64b5e4050762c 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1168,7 +1168,7 @@ func (w *liteCopIteratorWorker) liteSendReq(ctx context.Context, it *copIterator worker := w.worker if len(w.batchCopRespList) > 0 { - resp := w.batchCopRespList[0] + resp = w.batchCopRespList[0] w.batchCopRespList = w.batchCopRespList[1:] return resp } @@ -1194,7 +1194,7 @@ func (w *liteCopIteratorWorker) liteSendReq(ctx context.Context, it *copIterator return result.resp } if len(result.batchRespList) > 0 { - resp := result.batchRespList[0] + resp = result.batchRespList[0] w.batchCopRespList = result.batchRespList[1:] return resp } @@ -1486,12 +1486,15 @@ func appendScanDetail(logStr string, columnFamily string, scanInfo *kvrpcpb.Scan func (worker *copIteratorWorker) handleCopPagingResult(bo *Backoffer, rpcCtx *tikv.RPCContext, resp *copResponse, cacheKey []byte, cacheValue *coprCacheValue, task *copTask, costTime time.Duration) (*copTaskResult, error) { result, err := worker.handleCopResponse(bo, rpcCtx, resp, cacheKey, cacheValue, task, costTime) - if err != nil || (result != nil && len(result.remains) != 0) { + if err != nil { + return nil, errors.Trace(err) + } + if result != nil && len(result.remains) > 0 { // If there is region error or lock error, keep the paging size and retry. for _, remainedTask := range result.remains { remainedTask.pagingSize = task.pagingSize } - return result, errors.Trace(err) + return result, nil } pagingRange := resp.pbResp.Range // only paging requests need to calculate the next ranges From a3c7e530163c2572dc988dbe908b509edae45bc3 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Thu, 12 Dec 2024 16:37:06 +0800 Subject: [PATCH 46/47] add test case Signed-off-by: crazycs520 --- pkg/store/copr/copr_test/coprocessor_test.go | 26 +++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/pkg/store/copr/copr_test/coprocessor_test.go b/pkg/store/copr/copr_test/coprocessor_test.go index f075525b4dd4b..e39648ef627d2 100644 --- a/pkg/store/copr/copr_test/coprocessor_test.go +++ b/pkg/store/copr/copr_test/coprocessor_test.go @@ -295,16 +295,30 @@ func TestQueryWithConcurrentSmallCop(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) tk.MustExec("use test") - // Test for https://github.com/pingcap/tidb/pull/57522#discussion_r1875515863 tk.MustExec("create table t1 (id int key, b int, c int, index idx_b(b)) partition by hash(id) partitions 10;") for i := 0; i < 10; i++ { tk.MustExec(fmt.Sprintf("insert into t1 values (%v, %v, %v)", i, i, i)) } - + tk.MustExec("create table t2 (id bigint unsigned key, b int, index idx_b (b));") + tk.MustExec("insert into t2 values (1,1), (18446744073709551615,2)") tk.MustExec("set @@tidb_distsql_scan_concurrency=15") - require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/store/mockstore/unistore/unistoreRPCSlowCop", `return(200)`)) - ctx, cancel := context.WithTimeout(context.Background(), time.Second) - tk.MustQueryWithContext(ctx, "select sum(c) from t1 use index (idx_b) where b < 10;") - cancel() + tk.MustExec("set @@tidb_executor_concurrency=15") + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/store/mockstore/unistore/unistoreRPCSlowCop", `return(100)`)) + // Test for https://github.com/pingcap/tidb/pull/57522#discussion_r1875515863 + start := time.Now() + tk.MustQuery("select sum(c) from t1 use index (idx_b) where b < 10;") + require.Less(t, time.Since(start), time.Millisecond*250) + // Test for index reader with partition table + start = time.Now() + tk.MustQuery("select id, b from t1 use index (idx_b) where b < 10;") + require.Less(t, time.Since(start), time.Millisecond*150) + // Test for table reader with partition table. + start = time.Now() + tk.MustQuery("select * from t1 where c < 10;") + require.Less(t, time.Since(start), time.Millisecond*150) + // // Test for table reader with 2 parts ranges. + start = time.Now() + tk.MustQuery("select * from t2 where id >= 1 and id <= 18446744073709551615 order by id;") + require.Less(t, time.Since(start), time.Millisecond*150) require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/store/mockstore/unistore/unistoreRPCSlowCop")) } From e3f13d18541cbe201cfd076e7f93e90caee16531 Mon Sep 17 00:00:00 2001 From: crazycs520 Date: Thu, 12 Dec 2024 17:21:21 +0800 Subject: [PATCH 47/47] refine Signed-off-by: crazycs520 --- pkg/store/copr/coprocessor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/store/copr/coprocessor.go b/pkg/store/copr/coprocessor.go index 64b5e4050762c..bf0b05ecd2280 100644 --- a/pkg/store/copr/coprocessor.go +++ b/pkg/store/copr/coprocessor.go @@ -1628,7 +1628,6 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t } batchedNum := len(tasks) busyThresholdFallback := false - batchResps := resp.GetBatchResponses() defer func() { if err != nil { return @@ -1652,6 +1651,7 @@ func (worker *copIteratorWorker) handleBatchCopResponse(bo *Backoffer, rpcCtx *t Addr: rpcCtx.Addr, } } + batchResps := resp.GetBatchResponses() batchRespList = make([]*copResponse, 0, len(batchResps)) for _, batchResp := range batchResps { taskID := batchResp.GetTaskId()