From 8f79f8f216dab53a7a052d3bc210ffd11bd9e553 Mon Sep 17 00:00:00 2001 From: lyzx2001 Date: Mon, 13 May 2024 15:38:05 +0800 Subject: [PATCH 1/8] revise tidb-lightning.toml --- lightning/tidb-lightning.toml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/lightning/tidb-lightning.toml b/lightning/tidb-lightning.toml index e7945b1c4eafb..bab8531f714c9 100644 --- a/lightning/tidb-lightning.toml +++ b/lightning/tidb-lightning.toml @@ -99,9 +99,9 @@ driver = "file" # You can manually insert the correct records into the target table based on your application requirements. Note that the target TiKV must be v5.2.0 or later versions. # - "ignore": when encountering conflicting primary or unique key records, TiDB Lightning retains the old data and ignores the new data. This option can only be used in the logical import mode. strategy = "" -# Controls whether to enable preprocess conflict detection, which checks conflicts in data before importing it to TiDB. The default value is false, indicating that TiDB Lightning only checks conflicts after the import. If you set it to true, TiDB Lightning checks conflicts both before and after the import. This parameter can be used only in the physical import mode. It is not recommended to set `precheck-conflict-before-import = true` for now. +# Controls whether to enable preprocess conflict detection, which checks conflicts in data before importing it to TiDB. The default value is false, indicating that TiDB Lightning only checks conflicts after the import. If you set it to true, TiDB Lightning checks conflicts both before and after the import. This parameter can be used only in the physical import mode. In scenarios where the number of conflict records is greater than 1,000,000, it is recommended to set `precheck-conflict-before-import = true` for better performance in conflict detection. In other scenarios, it is recommended to disable it. # precheck-conflict-before-import = false -# Controls the maximum number of conflict errors that can be handled when strategy is "replace" or "ignore". You can set it only when strategy is "replace" or "ignore". The default value is 10000. If you set the value larger than 10000, it is possible that the import will have performance degradation or fail due to potential errors. +# Controls the maximum number of conflict errors that can be handled when strategy is "replace" or "ignore". You can set it only when the strategy is "replace" or "ignore". The default value is 10000. If you set a value larger than 10000, the import process might experience performance degradation. # threshold = 10000 # Controls the maximum number of records in the `conflict_records` table. The default value is 10000. # Starting from v8.1.0, there is no need to configure `max-record-rows` manually, because TiDB Lightning automatically assigns the value of `max-record-rows` with the value of `threshold`, regardless of the user input. `max-record-rows` will be deprecated in a future release. @@ -114,6 +114,19 @@ strategy = "" backend = "importer" # Address of tikv-importer when the backend is 'importer' addr = "127.0.0.1:8287" + +# The `duplicate-resolution` parameter is deprecated starting from v8.0.0 and will be removed in a future release. For more information, see . +# Whether to detect and resolve duplicate records (unique key conflict) in the physical import mode. +# The following resolution algorithms are supported: +# - none: does not detect duplicate records, which has the best performance of the two algorithms. +# But if there are duplicate records in the data source, it might lead to inconsistent data in the target TiDB. +# - remove: if there are primary key or unique key conflicts between the inserting data A and B, +# A and B will be removed from the target table and recorded +# in the `lightning_task_info.conflict_error_v1` table in the target TiDB. +# You can manually insert the correct records into the target table based on your business requirements. +# Note that the target TiKV must be v5.2.0 or later versions; otherwise it falls back to 'none'. +# The default value is 'none'. +# duplicate-resolution = 'none' # Maximum KV size of SST files produced in the 'local' backend. This should be the same as # the TiKV region size to avoid further region splitting. The default value is 96 MiB. #region-split-size = '96MiB' From ddc701980d28fa78d2b8f1bc410ad5371c1d3002 Mon Sep 17 00:00:00 2001 From: lyzx2001 Date: Wed, 15 May 2024 14:17:02 +0800 Subject: [PATCH 2/8] lightning: provide view for users when using conflict detection --- .../tests/lightning_config_max_error/run.sh | 5 +++ lightning/tests/lightning_issue_40657/run.sh | 2 +- lightning/tidb-lightning.toml | 4 +- pkg/lightning/errormanager/errormanager.go | 45 ++++++++++++------- 4 files changed, 37 insertions(+), 19 deletions(-) diff --git a/lightning/tests/lightning_config_max_error/run.sh b/lightning/tests/lightning_config_max_error/run.sh index b43f886a50197..b56d3c2632937 100755 --- a/lightning/tests/lightning_config_max_error/run.sh +++ b/lightning/tests/lightning_config_max_error/run.sh @@ -29,6 +29,7 @@ remaining_row_count=$(( ${uniq_row_count} + ${duplicated_row_count}/2 )) run_sql 'DROP TABLE IF EXISTS mytest.testtbl' run_sql 'DROP TABLE IF EXISTS lightning_task_info.conflict_error_v3' +run_sql 'DROP VIEW IF EXISTS lightning_task_info.conflict_view' stderr_file="/tmp/${TEST_NAME}.stderr" @@ -57,6 +58,7 @@ check_contains "COUNT(*): ${duplicated_row_count}" run_sql 'DROP TABLE IF EXISTS mytest.testtbl' run_sql 'DROP TABLE IF EXISTS lightning_task_info.conflict_error_v3' +run_sql 'DROP VIEW IF EXISTS lightning_task_info.conflict_view' run_lightning --backend local --config "${mydir}/normal_config.toml" @@ -71,6 +73,7 @@ check_contains "COUNT(*): ${remaining_row_count}" run_sql 'DROP TABLE IF EXISTS mytest.testtbl' run_sql 'DROP TABLE IF EXISTS lightning_task_info.conflict_error_v3' +run_sql 'DROP VIEW IF EXISTS lightning_task_info.conflict_view' run_lightning --backend local --config "${mydir}/normal_config_old_style.toml" @@ -83,12 +86,14 @@ check_contains "COUNT(*): ${remaining_row_count}" # import a fourth time run_sql 'DROP TABLE IF EXISTS lightning_task_info.conflict_records' +run_sql 'DROP VIEW IF EXISTS lightning_task_info.conflict_view' ! run_lightning --backend local --config "${mydir}/ignore_config.toml" [ $? -eq 0 ] tail -n 10 $TEST_DIR/lightning.log | grep "ERROR" | tail -n 1 | grep -Fq "[Lightning:Config:ErrInvalidConfig]conflict.strategy cannot be set to \\\"ignore\\\" when use tikv-importer.backend = \\\"local\\\"" # Check tidb backend record duplicate entry in conflict_records table run_sql 'DROP TABLE IF EXISTS lightning_task_info.conflict_records' +run_sql 'DROP VIEW IF EXISTS lightning_task_info.conflict_view' run_lightning --backend tidb --config "${mydir}/tidb.toml" run_sql 'SELECT COUNT(*) FROM lightning_task_info.conflict_records' check_contains "COUNT(*): 15" diff --git a/lightning/tests/lightning_issue_40657/run.sh b/lightning/tests/lightning_issue_40657/run.sh index 0f3d9ca5d15cb..a9b7d5baf8907 100644 --- a/lightning/tests/lightning_issue_40657/run.sh +++ b/lightning/tests/lightning_issue_40657/run.sh @@ -24,7 +24,7 @@ run_lightning -d "$CUR/data1" run_sql 'admin check table test.t' run_sql 'select count(*) from test.t' check_contains 'count(*): 4' -run_sql 'select count(*) from lightning_task_info.conflict_error_v3' +run_sql 'select count(*) from lightning_task_info.conflict_view' check_contains 'count(*): 2' run_sql 'truncate table test.t' diff --git a/lightning/tidb-lightning.toml b/lightning/tidb-lightning.toml index bab8531f714c9..04dd53af8dfe9 100644 --- a/lightning/tidb-lightning.toml +++ b/lightning/tidb-lightning.toml @@ -94,8 +94,8 @@ driver = "file" # - "": in the physical import mode, TiDB Lightning does not detect or handle conflicting data. If the source file contains conflicting primary or unique key records, the subsequent step reports an error. In the logical import mode, TiDB Lightning converts the "" strategy to the "error" strategy for processing. # - "error": when detecting conflicting primary or unique key records in the imported data, TiDB Lightning terminates the import and reports an error. # - "replace": when encountering conflicting primary or unique key records, TiDB Lightning retains the latest data and overwrites the old data. -# The conflicting data are recorded in the `lightning_task_info.conflict_error_v2` table (recording conflicting data detected by post-import conflict detection in the physical import mode) and the `conflict_records` table (recording conflicting data detected by preprocess conflict detection in both logical and physical import modes) of the target TiDB cluster. -# If you set `conflict.strategy = "replace"` in physical import mode, the conflicting data can be checked in the `lightning_task_info.conflict_view` view. +# The conflicting data are recorded in the `lightning_task_info.conflict_view` view of the target TiDB cluster. +# If the value for column is_precheck_conflict is 0, it stands for conflicting data detected by post-import conflict detection in the physical import mode; If the value for column is_precheck_conflict is 1, it stands for conflicting data detected by preprocess conflict detection in both logical and physical import modes. # You can manually insert the correct records into the target table based on your application requirements. Note that the target TiKV must be v5.2.0 or later versions. # - "ignore": when encountering conflicting primary or unique key records, TiDB Lightning retains the old data and ignores the new data. This option can only be used in the logical import mode. strategy = "" diff --git a/pkg/lightning/errormanager/errormanager.go b/pkg/lightning/errormanager/errormanager.go index d62d8792fa8eb..4f65e70c6cfa1 100644 --- a/pkg/lightning/errormanager/errormanager.go +++ b/pkg/lightning/errormanager/errormanager.go @@ -119,7 +119,21 @@ const ( ); ` - createConflictView = ` + createConflictV1View = ` + CREATE OR REPLACE VIEW %s.` + ConflictViewName + ` + AS SELECT 0 AS is_precheck_conflict, task_id, create_time, table_name, index_name, key_data, row_data, + raw_key, raw_value, raw_handle, raw_row, kv_type, NULL AS path, NULL AS offset, NULL AS error, NULL AS row_id + FROM %s.` + ConflictErrorTableName + `; + ` + + createConflictV2View = ` + CREATE OR REPLACE VIEW %s.` + ConflictViewName + ` + AS SELECT 1 AS is_precheck_conflict, task_id, create_time, table_name, NULL AS index_name, NULL AS key_data, + row_data, NULL AS raw_key, NULL AS raw_value, NULL AS raw_handle, NULL AS raw_row, NULL AS kv_type, path, + offset, error, row_id FROM %s.` + DupRecordTableName + `; + ` + + createConflictV1V2View = ` CREATE OR REPLACE VIEW %s.` + ConflictViewName + ` AS SELECT 0 AS is_precheck_conflict, task_id, create_time, table_name, index_name, key_data, row_data, raw_key, raw_value, raw_handle, raw_row, kv_type, NULL AS path, NULL AS offset, NULL AS error, NULL AS row_id @@ -285,9 +299,18 @@ func (em *ErrorManager) Init(ctx context.Context) error { } } - // TODO: return VIEW to users regardless of the lightning configuration if em.conflictV1Enabled && em.conflictV2Enabled { - err := exec.Exec(ctx, "create conflict view", strings.TrimSpace(common.SprintfWithIdentifiers(createConflictView, em.schema, em.schema, em.schema))) + err := exec.Exec(ctx, "create conflict view", strings.TrimSpace(common.SprintfWithIdentifiers(createConflictV1V2View, em.schema, em.schema, em.schema))) + if err != nil { + return err + } + } else if em.conflictV1Enabled { + err := exec.Exec(ctx, "create conflict view", strings.TrimSpace(common.SprintfWithIdentifiers(createConflictV1View, em.schema, em.schema))) + if err != nil { + return err + } + } else if em.conflictV2Enabled { + err := exec.Exec(ctx, "create conflict view", strings.TrimSpace(common.SprintfWithIdentifiers(createConflictV2View, em.schema, em.schema))) if err != nil { return err } @@ -1062,14 +1085,8 @@ func (em *ErrorManager) LogErrorDetails() { em.logger.Warn(fmtErrMsg(errCnt, "data charset", "")) } errCnt := em.conflictError() - if errCnt > 0 { - if em.conflictV1Enabled && em.conflictV2Enabled { - em.logger.Warn(fmtErrMsg(errCnt, "conflict", ConflictViewName)) - } else if em.conflictV1Enabled { - em.logger.Warn(fmtErrMsg(errCnt, "conflict", ConflictErrorTableName)) - } else if em.conflictV2Enabled { - em.logger.Warn(fmtErrMsg(errCnt, "conflict", DupRecordTableName)) - } + if errCnt > 0 && (em.conflictV1Enabled || em.conflictV2Enabled) { + em.logger.Warn(fmtErrMsg(errCnt, "conflict", ConflictViewName)) } } @@ -1111,12 +1128,8 @@ func (em *ErrorManager) Output() string { } if errCnt := em.conflictError(); errCnt > 0 { count++ - if em.conflictV1Enabled && em.conflictV2Enabled { + if em.conflictV1Enabled || em.conflictV2Enabled { t.AppendRow(table.Row{count, "Unique Key Conflict", errCnt, em.fmtTableName(ConflictViewName)}) - } else if em.conflictV1Enabled { - t.AppendRow(table.Row{count, "Unique Key Conflict", errCnt, em.fmtTableName(ConflictErrorTableName)}) - } else if em.conflictV2Enabled { - t.AppendRow(table.Row{count, "Unique Key Conflict", errCnt, em.fmtTableName(DupRecordTableName)}) } } From a3a245a0e27f67dc79f1050a4e7c17a2257d9972 Mon Sep 17 00:00:00 2001 From: lyzx2001 Date: Fri, 17 May 2024 11:43:47 +0800 Subject: [PATCH 3/8] fix IT --- lightning/tests/lightning_config_max_error/run.sh | 2 +- lightning/tests/lightning_duplicate_resolution_error/run.sh | 1 + .../run.sh | 1 + .../run.sh | 1 + .../run.sh | 1 + pkg/lightning/config/config.go | 2 +- 6 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lightning/tests/lightning_config_max_error/run.sh b/lightning/tests/lightning_config_max_error/run.sh index b56d3c2632937..d0464245a573e 100755 --- a/lightning/tests/lightning_config_max_error/run.sh +++ b/lightning/tests/lightning_config_max_error/run.sh @@ -104,7 +104,7 @@ check_contains "row_data: ('5','bbb05')" # Check max-error-record can limit the size of conflict_records table run_sql 'DROP DATABASE IF EXISTS lightning_task_info' run_sql 'DROP DATABASE IF EXISTS mytest' -run_lightning --backend tidb --config "${mydir}/tidb-limit-record.toml" 2>&1 | grep "\`lightning_task_info\`.\`conflict_records\`" | grep -q "5" +run_lightning --backend tidb --config "${mydir}/tidb-limit-record.toml" 2>&1 | grep "\`lightning_task_info\`.\`conflict_view\`" | grep -q "5" run_sql 'SELECT COUNT(*) FROM lightning_task_info.conflict_records' check_contains "COUNT(*): 5" diff --git a/lightning/tests/lightning_duplicate_resolution_error/run.sh b/lightning/tests/lightning_duplicate_resolution_error/run.sh index 164ae140cc10b..bd2978802923f 100644 --- a/lightning/tests/lightning_duplicate_resolution_error/run.sh +++ b/lightning/tests/lightning_duplicate_resolution_error/run.sh @@ -22,6 +22,7 @@ mydir=$(dirname "${BASH_SOURCE[0]}") run_sql 'DROP TABLE IF EXISTS dup_resolve.a' run_sql 'DROP TABLE IF EXISTS lightning_task_info.conflict_error_v3' +run_sql 'DROP VIEW IF EXISTS lightning_task_info.conflict_view' ! run_lightning --backend local --config "${mydir}/config.toml" [ $? -eq 0 ] diff --git a/lightning/tests/lightning_duplicate_resolution_error_pk_multiple_files/run.sh b/lightning/tests/lightning_duplicate_resolution_error_pk_multiple_files/run.sh index b207b55ef8cbb..e6988d22b9699 100644 --- a/lightning/tests/lightning_duplicate_resolution_error_pk_multiple_files/run.sh +++ b/lightning/tests/lightning_duplicate_resolution_error_pk_multiple_files/run.sh @@ -22,6 +22,7 @@ mydir=$(dirname "${BASH_SOURCE[0]}") run_sql 'DROP TABLE IF EXISTS dup_resolve.a' run_sql 'DROP TABLE IF EXISTS lightning_task_info.conflict_error_v3' +run_sql 'DROP VIEW IF EXISTS lightning_task_info.conflict_view' ! run_lightning --backend local --config "${mydir}/config.toml" [ $? -eq 0 ] diff --git a/lightning/tests/lightning_duplicate_resolution_error_uk_multiple_files/run.sh b/lightning/tests/lightning_duplicate_resolution_error_uk_multiple_files/run.sh index 65a20892ce342..e346b3961977b 100644 --- a/lightning/tests/lightning_duplicate_resolution_error_uk_multiple_files/run.sh +++ b/lightning/tests/lightning_duplicate_resolution_error_uk_multiple_files/run.sh @@ -22,6 +22,7 @@ mydir=$(dirname "${BASH_SOURCE[0]}") run_sql 'DROP TABLE IF EXISTS dup_resolve.a' run_sql 'DROP TABLE IF EXISTS lightning_task_info.conflict_error_v3' +run_sql 'DROP VIEW IF EXISTS lightning_task_info.conflict_view' ! run_lightning --backend local --config "${mydir}/config.toml" [ $? -eq 0 ] diff --git a/lightning/tests/lightning_duplicate_resolution_error_uk_multiple_files_multicol_index/run.sh b/lightning/tests/lightning_duplicate_resolution_error_uk_multiple_files_multicol_index/run.sh index ef72491c0a114..02b441d5ca058 100644 --- a/lightning/tests/lightning_duplicate_resolution_error_uk_multiple_files_multicol_index/run.sh +++ b/lightning/tests/lightning_duplicate_resolution_error_uk_multiple_files_multicol_index/run.sh @@ -22,6 +22,7 @@ mydir=$(dirname "${BASH_SOURCE[0]}") run_sql 'DROP TABLE IF EXISTS dup_resolve.a' run_sql 'DROP TABLE IF EXISTS lightning_task_info.conflict_error_v3' +run_sql 'DROP VIEW IF EXISTS lightning_task_info.conflict_view' ! run_lightning --backend local --config "${mydir}/config.toml" [ $? -eq 0 ] diff --git a/pkg/lightning/config/config.go b/pkg/lightning/config/config.go index 3e52545ea6e05..3a2133cbcfc70 100644 --- a/pkg/lightning/config/config.go +++ b/pkg/lightning/config/config.go @@ -606,7 +606,7 @@ const ( // ReplaceOnDup indicates using REPLACE INTO to insert data for TiDB backend. // ReplaceOnDup records all duplicate records, remove some rows with conflict // and reserve other rows that can be kept and not cause conflict anymore for local backend. - // Users need to analyze the lightning_task_info.conflict_error_v3 table to check whether the reserved data + // Users need to analyze the lightning_task_info.conflict_view table to check whether the reserved data // cater to their need and check whether they need to add back the correct rows. ReplaceOnDup // IgnoreOnDup indicates using INSERT IGNORE INTO to insert data for TiDB backend. From 803be36193b3c6d03cba330802e5c92e368891d1 Mon Sep 17 00:00:00 2001 From: lyzx2001 Date: Fri, 17 May 2024 14:49:34 +0800 Subject: [PATCH 4/8] fix UT --- .../errormanager/errormanager_test.go | 38 +++++++++++-------- .../errormanager/resolveconflict_test.go | 8 ++++ 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/pkg/lightning/errormanager/errormanager_test.go b/pkg/lightning/errormanager/errormanager_test.go index 0008ea32a037a..d140ac429adfa 100644 --- a/pkg/lightning/errormanager/errormanager_test.go +++ b/pkg/lightning/errormanager/errormanager_test.go @@ -67,6 +67,8 @@ func TestInit(t *testing.T) { WillReturnResult(sqlmock.NewResult(1, 1)) mock.ExpectExec("CREATE TABLE IF NOT EXISTS `lightning_errors`\\.conflict_error_v3.*"). WillReturnResult(sqlmock.NewResult(2, 1)) + mock.ExpectExec("CREATE OR REPLACE VIEW `lightning_errors`\\.conflict_view.*"). + WillReturnResult(sqlmock.NewResult(3, 1)) err = em.Init(ctx) require.NoError(t, err) require.NoError(t, mock.ExpectationsWereMet()) @@ -288,6 +290,8 @@ func TestReplaceConflictOneKey(t *testing.T) { WillReturnResult(sqlmock.NewResult(1, 1)) mockDB.ExpectExec("CREATE TABLE IF NOT EXISTS `lightning_task_info`\\.conflict_error_v3.*"). WillReturnResult(sqlmock.NewResult(2, 1)) + mockDB.ExpectExec("CREATE OR REPLACE VIEW `lightning_task_info`\\.conflict_view.*"). + WillReturnResult(sqlmock.NewResult(3, 1)) mockDB.ExpectQuery("\\QSELECT _tidb_rowid, raw_key, index_name, raw_value, raw_handle FROM `lightning_task_info`.conflict_error_v3 WHERE table_name = ? AND kv_type = 0 AND _tidb_rowid >= ? and _tidb_rowid < ? ORDER BY _tidb_rowid LIMIT ?\\E"). WillReturnRows(sqlmock.NewRows([]string{"_tidb_rowid", "raw_key", "index_name", "raw_value", "raw_handle"})) mockDB.ExpectQuery("\\QSELECT _tidb_rowid, raw_key, raw_value FROM `lightning_task_info`.conflict_error_v3 WHERE table_name = ? AND kv_type <> 0 AND _tidb_rowid >= ? and _tidb_rowid < ? ORDER BY _tidb_rowid LIMIT ?\\E"). @@ -485,6 +489,8 @@ func TestReplaceConflictOneUniqueKey(t *testing.T) { WillReturnResult(sqlmock.NewResult(1, 1)) mockDB.ExpectExec("CREATE TABLE IF NOT EXISTS `lightning_task_info`\\.conflict_error_v3.*"). WillReturnResult(sqlmock.NewResult(2, 1)) + mockDB.ExpectExec("CREATE OR REPLACE VIEW `lightning_task_info`\\.conflict_view.*"). + WillReturnResult(sqlmock.NewResult(3, 1)) mockDB.ExpectQuery("\\QSELECT _tidb_rowid, raw_key, index_name, raw_value, raw_handle FROM `lightning_task_info`.conflict_error_v3 WHERE table_name = ? AND kv_type = 0 AND _tidb_rowid >= ? and _tidb_rowid < ? ORDER BY _tidb_rowid LIMIT ?\\E"). WillReturnRows(sqlmock.NewRows([]string{"_tidb_rowid", "raw_key", "index_name", "raw_value", "raw_handle"}). AddRow(1, data1IndexKey, "uni_b", data1IndexValue, data1RowKey). @@ -663,14 +669,14 @@ func TestErrorMgrErrorOutput(t *testing.T) { output = em.Output() expected = "\n" + "Import Data Error Summary: \n" + - "+---+---------------------+-------------+----------------------------------+\n" + - "| # | ERROR TYPE | ERROR COUNT | ERROR DATA TABLE |\n" + - "+---+---------------------+-------------+----------------------------------+\n" + - "|\x1b[31m 1 \x1b[0m|\x1b[31m Data Type \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`type_error_v1` \x1b[0m|\n" + - "|\x1b[31m 2 \x1b[0m|\x1b[31m Data Syntax \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`syntax_error_v1` \x1b[0m|\n" + - "|\x1b[31m 3 \x1b[0m|\x1b[31m Charset Error \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m \x1b[0m|\n" + - "|\x1b[31m 4 \x1b[0m|\x1b[31m Unique Key Conflict \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`conflict_error_v3` \x1b[0m|\n" + - "+---+---------------------+-------------+----------------------------------+\n" + "+---+---------------------+-------------+--------------------------------+\n" + + "| # | ERROR TYPE | ERROR COUNT | ERROR DATA TABLE |\n" + + "+---+---------------------+-------------+--------------------------------+\n" + + "|\x1b[31m 1 \x1b[0m|\x1b[31m Data Type \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`type_error_v1` \x1b[0m|\n" + + "|\x1b[31m 2 \x1b[0m|\x1b[31m Data Syntax \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`syntax_error_v1` \x1b[0m|\n" + + "|\x1b[31m 3 \x1b[0m|\x1b[31m Charset Error \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m \x1b[0m|\n" + + "|\x1b[31m 4 \x1b[0m|\x1b[31m Unique Key Conflict \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`conflict_view` \x1b[0m|\n" + + "+---+---------------------+-------------+--------------------------------+\n" require.Equal(t, expected, output) em.conflictV2Enabled = true @@ -678,14 +684,14 @@ func TestErrorMgrErrorOutput(t *testing.T) { output = em.Output() expected = "\n" + "Import Data Error Summary: \n" + - "+---+---------------------+-------------+---------------------------------+\n" + - "| # | ERROR TYPE | ERROR COUNT | ERROR DATA TABLE |\n" + - "+---+---------------------+-------------+---------------------------------+\n" + - "|\x1b[31m 1 \x1b[0m|\x1b[31m Data Type \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`type_error_v1` \x1b[0m|\n" + - "|\x1b[31m 2 \x1b[0m|\x1b[31m Data Syntax \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`syntax_error_v1` \x1b[0m|\n" + - "|\x1b[31m 3 \x1b[0m|\x1b[31m Charset Error \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m \x1b[0m|\n" + - "|\x1b[31m 4 \x1b[0m|\x1b[31m Unique Key Conflict \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`conflict_records` \x1b[0m|\n" + - "+---+---------------------+-------------+---------------------------------+\n" + "+---+---------------------+-------------+--------------------------------+\n" + + "| # | ERROR TYPE | ERROR COUNT | ERROR DATA TABLE |\n" + + "+---+---------------------+-------------+--------------------------------+\n" + + "|\x1b[31m 1 \x1b[0m|\x1b[31m Data Type \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`type_error_v1` \x1b[0m|\n" + + "|\x1b[31m 2 \x1b[0m|\x1b[31m Data Syntax \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`syntax_error_v1` \x1b[0m|\n" + + "|\x1b[31m 3 \x1b[0m|\x1b[31m Charset Error \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m \x1b[0m|\n" + + "|\x1b[31m 4 \x1b[0m|\x1b[31m Unique Key Conflict \x1b[0m|\x1b[31m 100 \x1b[0m|\x1b[31m `error_info`.`conflict_view` \x1b[0m|\n" + + "+---+---------------------+-------------+--------------------------------+\n" require.Equal(t, expected, output) em.conflictV2Enabled = true diff --git a/pkg/lightning/errormanager/resolveconflict_test.go b/pkg/lightning/errormanager/resolveconflict_test.go index 11020f005f0e9..42a1c1c122e2e 100644 --- a/pkg/lightning/errormanager/resolveconflict_test.go +++ b/pkg/lightning/errormanager/resolveconflict_test.go @@ -170,6 +170,8 @@ func TestReplaceConflictMultipleKeysNonclusteredPk(t *testing.T) { WillReturnResult(sqlmock.NewResult(1, 1)) mockDB.ExpectExec("CREATE TABLE IF NOT EXISTS `lightning_task_info`\\.conflict_error_v3.*"). WillReturnResult(sqlmock.NewResult(2, 1)) + mockDB.ExpectExec("CREATE OR REPLACE VIEW `lightning_task_info`\\.conflict_view.*"). + WillReturnResult(sqlmock.NewResult(3, 1)) mockDB.ExpectQuery("\\QSELECT _tidb_rowid, raw_key, index_name, raw_value, raw_handle FROM `lightning_task_info`.conflict_error_v3 WHERE table_name = ? AND kv_type = 0 AND _tidb_rowid >= ? and _tidb_rowid < ? ORDER BY _tidb_rowid LIMIT ?\\E"). WillReturnRows(sqlmock.NewRows([]string{"_tidb_rowid", "raw_key", "index_name", "raw_value", "raw_handle"}). AddRow(1, data2RowKey, "PRIMARY", data2RowValue, data1RowKey). @@ -354,6 +356,8 @@ func TestReplaceConflictOneKeyNonclusteredPk(t *testing.T) { WillReturnResult(sqlmock.NewResult(1, 1)) mockDB.ExpectExec("CREATE TABLE IF NOT EXISTS `lightning_task_info`\\.conflict_error_v3.*"). WillReturnResult(sqlmock.NewResult(2, 1)) + mockDB.ExpectExec("CREATE OR REPLACE VIEW `lightning_task_info`\\.conflict_view.*"). + WillReturnResult(sqlmock.NewResult(3, 1)) mockDB.ExpectQuery("\\QSELECT _tidb_rowid, raw_key, index_name, raw_value, raw_handle FROM `lightning_task_info`.conflict_error_v3 WHERE table_name = ? AND kv_type = 0 AND _tidb_rowid >= ? and _tidb_rowid < ? ORDER BY _tidb_rowid LIMIT ?\\E"). WillReturnRows(sqlmock.NewRows([]string{"_tidb_rowid", "raw_key", "index_name", "raw_value", "raw_handle"}). AddRow(1, data3IndexKey, "PRIMARY", data3IndexValue, data3RowKey). @@ -535,6 +539,8 @@ func TestReplaceConflictOneUniqueKeyNonclusteredPk(t *testing.T) { WillReturnResult(sqlmock.NewResult(1, 1)) mockDB.ExpectExec("CREATE TABLE IF NOT EXISTS `lightning_task_info`\\.conflict_error_v3.*"). WillReturnResult(sqlmock.NewResult(2, 1)) + mockDB.ExpectExec("CREATE OR REPLACE VIEW `lightning_task_info`\\.conflict_view.*"). + WillReturnResult(sqlmock.NewResult(3, 1)) mockDB.ExpectQuery("\\QSELECT _tidb_rowid, raw_key, index_name, raw_value, raw_handle FROM `lightning_task_info`.conflict_error_v3 WHERE table_name = ? AND kv_type = 0 AND _tidb_rowid >= ? and _tidb_rowid < ? ORDER BY _tidb_rowid LIMIT ?\\E"). WillReturnRows(sqlmock.NewRows([]string{"_tidb_rowid", "raw_key", "index_name", "raw_value", "raw_handle"}). AddRow(1, data4NonclusteredKey, "uni_b", data4NonclusteredValue, data4RowKey). @@ -741,6 +747,8 @@ func TestReplaceConflictOneUniqueKeyNonclusteredVarcharPk(t *testing.T) { WillReturnResult(sqlmock.NewResult(1, 1)) mockDB.ExpectExec("CREATE TABLE IF NOT EXISTS `lightning_task_info`\\.conflict_error_v3.*"). WillReturnResult(sqlmock.NewResult(2, 1)) + mockDB.ExpectExec("CREATE OR REPLACE VIEW `lightning_task_info`\\.conflict_view.*"). + WillReturnResult(sqlmock.NewResult(3, 1)) mockDB.ExpectQuery("\\QSELECT _tidb_rowid, raw_key, index_name, raw_value, raw_handle FROM `lightning_task_info`.conflict_error_v3 WHERE table_name = ? AND kv_type = 0 AND _tidb_rowid >= ? and _tidb_rowid < ? ORDER BY _tidb_rowid LIMIT ?\\E"). WillReturnRows(sqlmock.NewRows([]string{"_tidb_rowid", "raw_key", "index_name", "raw_value", "raw_handle"}). AddRow(1, data4NonclusteredKey, "uni_b", data4NonclusteredValue, data4RowKey). From 28d987c657c0dd2884433f281a16f2265921d971 Mon Sep 17 00:00:00 2001 From: lyzx2001 Date: Mon, 20 May 2024 15:52:08 +0800 Subject: [PATCH 5/8] revise tidb-lightning.toml --- lightning/tidb-lightning.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightning/tidb-lightning.toml b/lightning/tidb-lightning.toml index 04dd53af8dfe9..de605059b1d47 100644 --- a/lightning/tidb-lightning.toml +++ b/lightning/tidb-lightning.toml @@ -115,7 +115,7 @@ backend = "importer" # Address of tikv-importer when the backend is 'importer' addr = "127.0.0.1:8287" -# The `duplicate-resolution` parameter is deprecated starting from v8.0.0 and will be removed in a future release. For more information, see . +# The `duplicate-resolution` parameter is deprecated starting from v8.0.0 and will be removed in a future release. If you set `tikv-importer.duplicate-resolution = "remove"` and do not set `conflict.strategy`, TiDB Lightning will automatically assign `"replace"` to `conflict.strategy` and enable the new version of conflict detection. For more information, see . # Whether to detect and resolve duplicate records (unique key conflict) in the physical import mode. # The following resolution algorithms are supported: # - none: does not detect duplicate records, which has the best performance of the two algorithms. From 588b1cc9e69e71704c05129f72b341582dbd4714 Mon Sep 17 00:00:00 2001 From: lyzx2001 Date: Tue, 21 May 2024 13:54:26 +0800 Subject: [PATCH 6/8] revise tidb-lightning.toml --- lightning/tidb-lightning.toml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lightning/tidb-lightning.toml b/lightning/tidb-lightning.toml index de605059b1d47..fc391ce21d96a 100644 --- a/lightning/tidb-lightning.toml +++ b/lightning/tidb-lightning.toml @@ -120,11 +120,7 @@ addr = "127.0.0.1:8287" # The following resolution algorithms are supported: # - none: does not detect duplicate records, which has the best performance of the two algorithms. # But if there are duplicate records in the data source, it might lead to inconsistent data in the target TiDB. -# - remove: if there are primary key or unique key conflicts between the inserting data A and B, -# A and B will be removed from the target table and recorded -# in the `lightning_task_info.conflict_error_v1` table in the target TiDB. -# You can manually insert the correct records into the target table based on your business requirements. -# Note that the target TiKV must be v5.2.0 or later versions; otherwise it falls back to 'none'. +# If you set `tikv-importer.duplicate-resolution = "none"` and do not set `conflict.strategy`, TiDB Lightning will automatically assign `"none"` to `conflict.strategy`. # The default value is 'none'. # duplicate-resolution = 'none' # Maximum KV size of SST files produced in the 'local' backend. This should be the same as From 38000d86f821ed25b60b456ffe099ca90a31b51a Mon Sep 17 00:00:00 2001 From: lyzx2001 Date: Tue, 21 May 2024 14:29:06 +0800 Subject: [PATCH 7/8] update --- lightning/tidb-lightning.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lightning/tidb-lightning.toml b/lightning/tidb-lightning.toml index fc391ce21d96a..dd8130a236ef1 100644 --- a/lightning/tidb-lightning.toml +++ b/lightning/tidb-lightning.toml @@ -118,9 +118,9 @@ addr = "127.0.0.1:8287" # The `duplicate-resolution` parameter is deprecated starting from v8.0.0 and will be removed in a future release. If you set `tikv-importer.duplicate-resolution = "remove"` and do not set `conflict.strategy`, TiDB Lightning will automatically assign `"replace"` to `conflict.strategy` and enable the new version of conflict detection. For more information, see . # Whether to detect and resolve duplicate records (unique key conflict) in the physical import mode. # The following resolution algorithms are supported: -# - none: does not detect duplicate records, which has the best performance of the two algorithms. -# But if there are duplicate records in the data source, it might lead to inconsistent data in the target TiDB. -# If you set `tikv-importer.duplicate-resolution = "none"` and do not set `conflict.strategy`, TiDB Lightning will automatically assign `"none"` to `conflict.strategy`. +# - none: does not detect duplicate records, which has the best performance of the two algorithms. +# But if there are duplicate records in the data source, it might lead to inconsistent data in the target TiDB. +# If you set `tikv-importer.duplicate-resolution = "none"` and do not set `conflict.strategy`, TiDB Lightning will automatically assign `""` to `conflict.strategy`. # The default value is 'none'. # duplicate-resolution = 'none' # Maximum KV size of SST files produced in the 'local' backend. This should be the same as From c1b63126e31f595504fe673657b784a8e730442c Mon Sep 17 00:00:00 2001 From: lyzx2001 Date: Tue, 21 May 2024 14:31:21 +0800 Subject: [PATCH 8/8] update --- lightning/tidb-lightning.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightning/tidb-lightning.toml b/lightning/tidb-lightning.toml index dd8130a236ef1..15791b7dc36f0 100644 --- a/lightning/tidb-lightning.toml +++ b/lightning/tidb-lightning.toml @@ -118,8 +118,8 @@ addr = "127.0.0.1:8287" # The `duplicate-resolution` parameter is deprecated starting from v8.0.0 and will be removed in a future release. If you set `tikv-importer.duplicate-resolution = "remove"` and do not set `conflict.strategy`, TiDB Lightning will automatically assign `"replace"` to `conflict.strategy` and enable the new version of conflict detection. For more information, see . # Whether to detect and resolve duplicate records (unique key conflict) in the physical import mode. # The following resolution algorithms are supported: -# - none: does not detect duplicate records, which has the best performance of the two algorithms. -# But if there are duplicate records in the data source, it might lead to inconsistent data in the target TiDB. +# - none: does not detect duplicate records. +# If there are duplicate records in the data source, it might lead to inconsistent data in the target TiDB. # If you set `tikv-importer.duplicate-resolution = "none"` and do not set `conflict.strategy`, TiDB Lightning will automatically assign `""` to `conflict.strategy`. # The default value is 'none'. # duplicate-resolution = 'none'