From af9385c82b5a17b069b963d9b13555cce170e22a Mon Sep 17 00:00:00 2001 From: Yujie Xia Date: Wed, 18 Jan 2023 14:25:12 +0800 Subject: [PATCH 1/4] lightning: check hasDupe and tableID when resolve duplicate rows --- br/pkg/lightning/backend/local/local.go | 22 +++++++++++-- br/pkg/lightning/restore/table_restore.go | 8 +++-- br/tests/lightning_issue_40657/config.toml | 6 ++++ .../data1/test.t-schema.sql | 6 ++++ .../lightning_issue_40657/data1/test.t.0.csv | 6 ++++ .../data2/test.t-schema.sql | 6 ++++ .../lightning_issue_40657/data2/test.t.0.csv | 6 ++++ br/tests/lightning_issue_40657/run.sh | 32 +++++++++++++++++++ 8 files changed, 87 insertions(+), 5 deletions(-) create mode 100644 br/tests/lightning_issue_40657/config.toml create mode 100644 br/tests/lightning_issue_40657/data1/test.t-schema.sql create mode 100644 br/tests/lightning_issue_40657/data1/test.t.0.csv create mode 100644 br/tests/lightning_issue_40657/data2/test.t-schema.sql create mode 100644 br/tests/lightning_issue_40657/data2/test.t.0.csv create mode 100644 br/tests/lightning_issue_40657/run.sh diff --git a/br/pkg/lightning/backend/local/local.go b/br/pkg/lightning/backend/local/local.go index 6ceb269f80248..ec6677c8dea42 100644 --- a/br/pkg/lightning/backend/local/local.go +++ b/br/pkg/lightning/backend/local/local.go @@ -69,6 +69,7 @@ import ( "go.uber.org/atomic" "go.uber.org/multierr" "go.uber.org/zap" + "golang.org/x/exp/slices" "golang.org/x/sync/errgroup" "golang.org/x/time/rate" "google.golang.org/grpc" @@ -1722,13 +1723,18 @@ func (local *local) ResolveDuplicateRows(ctx context.Context, tbl table.Table, t return err } + tableIDs := physicalTableIDs(tbl.Meta()) + keyInTable := func(key []byte) bool { + return slices.Contains(tableIDs, tablecodec.DecodeTableID(key)) + } + errLimiter := rate.NewLimiter(1, 1) pool := utils.NewWorkerPool(uint(local.dupeConcurrency), "resolve duplicate rows") err = local.errorMgr.ResolveAllConflictKeys( ctx, tableName, pool, func(ctx context.Context, handleRows [][2][]byte) error { for { - err := local.deleteDuplicateRows(ctx, logger, handleRows, decoder) + err := local.deleteDuplicateRows(ctx, logger, handleRows, decoder, keyInTable) if err == nil { return nil } @@ -1751,7 +1757,13 @@ func (local *local) ResolveDuplicateRows(ctx context.Context, tbl table.Table, t return errors.Trace(err) } -func (local *local) deleteDuplicateRows(ctx context.Context, logger *log.Task, handleRows [][2][]byte, decoder *kv.TableKVDecoder) (err error) { +func (local *local) deleteDuplicateRows( + ctx context.Context, + logger *log.Task, + handleRows [][2][]byte, + decoder *kv.TableKVDecoder, + keyInTable func(key []byte) bool, +) (err error) { // Starts a Delete transaction. txn, err := local.tikvCli.Begin() if err != nil { @@ -1776,6 +1788,12 @@ func (local *local) deleteDuplicateRows(ctx context.Context, logger *log.Task, h // (if the number of duplicates is small this should fit entirely in memory) // (Txn's MemBuf's bufferSizeLimit is currently infinity) for _, handleRow := range handleRows { + // Skip the row key if it's not in the table. + // This can happen if the table has been recreated or truncated, + // and the duplicate key is from the old table. + if !keyInTable(handleRow[0]) { + continue + } logger.Debug("[resolve-dupe] found row to resolve", logutil.Key("handle", handleRow[0]), logutil.Key("row", handleRow[1])) diff --git a/br/pkg/lightning/restore/table_restore.go b/br/pkg/lightning/restore/table_restore.go index 37ba113c82eed..311659f6ffa51 100644 --- a/br/pkg/lightning/restore/table_restore.go +++ b/br/pkg/lightning/restore/table_restore.go @@ -812,9 +812,11 @@ func (tr *TableRestore) postProcess( } hasDupe = hasDupe || hasRemoteDupe - if err = rc.backend.ResolveDuplicateRows(ctx, tr.encTable, tr.tableName, rc.cfg.TikvImporter.DuplicateResolution); err != nil { - tr.logger.Error("resolve remote duplicate keys failed", log.ShortError(err)) - return false, err + if hasDupe { + if err = rc.backend.ResolveDuplicateRows(ctx, tr.encTable, tr.tableName, rc.cfg.TikvImporter.DuplicateResolution); err != nil { + tr.logger.Error("resolve remote duplicate keys failed", log.ShortError(err)) + return false, err + } } } diff --git a/br/tests/lightning_issue_40657/config.toml b/br/tests/lightning_issue_40657/config.toml new file mode 100644 index 0000000000000..74561bc05f026 --- /dev/null +++ b/br/tests/lightning_issue_40657/config.toml @@ -0,0 +1,6 @@ +[tikv-importer] +backend = "local" +duplicate-resolution = "remove" + +[mydumper.csv] +header = true diff --git a/br/tests/lightning_issue_40657/data1/test.t-schema.sql b/br/tests/lightning_issue_40657/data1/test.t-schema.sql new file mode 100644 index 0000000000000..ef7136b531abc --- /dev/null +++ b/br/tests/lightning_issue_40657/data1/test.t-schema.sql @@ -0,0 +1,6 @@ +CREATE TABLE `t` ( + `id` int(11) NOT NULL, + `name` varchar(255) DEFAULT NULL, + PRIMARY KEY (`id`) /*T![clustered_index] CLUSTERED */, + UNIQUE KEY `uni_name` (`name`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; diff --git a/br/tests/lightning_issue_40657/data1/test.t.0.csv b/br/tests/lightning_issue_40657/data1/test.t.0.csv new file mode 100644 index 0000000000000..2987cee08b206 --- /dev/null +++ b/br/tests/lightning_issue_40657/data1/test.t.0.csv @@ -0,0 +1,6 @@ +id,name +1,"aaa01" +2,"aaa02" +3,"aaa03" +4,"aaa04" +5,"aaa04" diff --git a/br/tests/lightning_issue_40657/data2/test.t-schema.sql b/br/tests/lightning_issue_40657/data2/test.t-schema.sql new file mode 100644 index 0000000000000..ef7136b531abc --- /dev/null +++ b/br/tests/lightning_issue_40657/data2/test.t-schema.sql @@ -0,0 +1,6 @@ +CREATE TABLE `t` ( + `id` int(11) NOT NULL, + `name` varchar(255) DEFAULT NULL, + PRIMARY KEY (`id`) /*T![clustered_index] CLUSTERED */, + UNIQUE KEY `uni_name` (`name`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; diff --git a/br/tests/lightning_issue_40657/data2/test.t.0.csv b/br/tests/lightning_issue_40657/data2/test.t.0.csv new file mode 100644 index 0000000000000..f64aebd0630d9 --- /dev/null +++ b/br/tests/lightning_issue_40657/data2/test.t.0.csv @@ -0,0 +1,6 @@ +id,name +1,"aaa01" +2,"aaa02" +3,"aaa03" +4,"aaa04" +5,"aaa05" diff --git a/br/tests/lightning_issue_40657/run.sh b/br/tests/lightning_issue_40657/run.sh new file mode 100644 index 0000000000000..d8a4e90d085de --- /dev/null +++ b/br/tests/lightning_issue_40657/run.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux + +check_cluster_version 5 2 0 'duplicate detection' || exit 0 + +run_lightning -d "tests/$TEST_NAME/data1" +run_sql 'admin check table test.t' +run_sql 'select count(*) from test.t' +check_contains 'count(*): 3' +run_sql 'select count(*) from lightning_task_info.conflict_error_v1' +check_contains 'count(*): 2' + +run_sql 'truncate table test.t' +run_lightning -d "tests/$TEST_NAME/data2" +run_sql 'admin check table test.t' +run_sql 'select count(*) from test.t' +check_contains 'count(*): 5' From e49859b638336b40db6fdda3a71b476723894821 Mon Sep 17 00:00:00 2001 From: Yujie Xia Date: Wed, 18 Jan 2023 18:10:17 +0800 Subject: [PATCH 2/4] Update br/tests/lightning_issue_40657/run.sh --- br/tests/lightning_issue_40657/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/tests/lightning_issue_40657/run.sh b/br/tests/lightning_issue_40657/run.sh index d8a4e90d085de..a20600b79d14b 100644 --- a/br/tests/lightning_issue_40657/run.sh +++ b/br/tests/lightning_issue_40657/run.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright 2020 PingCAP, Inc. +# Copyright 2023 PingCAP, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 734f019c7e735a016823b83ce65ecf77e411de55 Mon Sep 17 00:00:00 2001 From: Yujie Xia Date: Sat, 28 Jan 2023 11:06:37 +0800 Subject: [PATCH 3/4] fix integration test --- br/tests/lightning_disable_scheduler_by_key_range/run.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/br/tests/lightning_disable_scheduler_by_key_range/run.sh b/br/tests/lightning_disable_scheduler_by_key_range/run.sh index 9df6067c8baf8..2a88f0e0cac8e 100644 --- a/br/tests/lightning_disable_scheduler_by_key_range/run.sh +++ b/br/tests/lightning_disable_scheduler_by_key_range/run.sh @@ -50,16 +50,16 @@ ready_for_import_engine run_curl "https://${PD_ADDR}/pd/api/v1/config/cluster-version" -length=$(run_curl "https://${PD_ADDR}/pd/api/v1/config/region-label/rules" | jq 'select(.[].rule_type == "key-range") | length') +length=$(run_curl "https://${PD_ADDR}/pd/api/v1/config/region-label/rules" | jq '[ .[] | select(.rule_type == "key-range" and .labels[0].key == "schedule") ] | length') if [ "$length" != "1" ]; then - echo "region-label key-range rules should be 1, but got $length" >&2 + echo "region-label key-range schedule rules should be 1, but got $length" >&2 exit 1 fi wait "$shpid" -length=$(run_curl "https://${PD_ADDR}/pd/api/v1/config/region-label/rules" | jq 'select(.[].rule_type == "key-range") | length') +length=$(run_curl "https://${PD_ADDR}/pd/api/v1/config/region-label/rules" | jq '[ .[] | select(.rule_type == "key-range" and .labels[0].key == "schedule") ] | length') if [ -n "$length" ] && [ "$length" -ne 0 ]; then - echo "region-label key-range rules should be 0, but got $length" >&2 + echo "region-label key-range schedule rules should be 0, but got $length" >&2 exit 1 fi From e065f4374590c3d4fba5cd0324f2e7c3c5245da6 Mon Sep 17 00:00:00 2001 From: Yujie Xia Date: Sat, 28 Jan 2023 12:15:03 +0800 Subject: [PATCH 4/4] fix lightning_reload_cert test --- br/tests/lightning_reload_cert/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/tests/lightning_reload_cert/run.sh b/br/tests/lightning_reload_cert/run.sh index e06ef8d7fbf51..be0c5ff40421e 100644 --- a/br/tests/lightning_reload_cert/run.sh +++ b/br/tests/lightning_reload_cert/run.sh @@ -29,7 +29,7 @@ shpid="$!" sleep 15 ok=0 for _ in {0..60}; do - if grep -Fq "connection closed before server preface received" "$TEST_DIR"/lightning.log; then + if grep -Fq "connection error" "$TEST_DIR"/lightning.log; then ok=1 break fi