From e6dcba9e04e48f6cf4473c0810ec55fc31c2356b Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Thu, 28 Oct 2021 17:30:31 +0800 Subject: [PATCH 1/3] fix bug if load data with long content --- executor/load_data.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/executor/load_data.go b/executor/load_data.go index 03539a4f8c9a9..319502d77bceb 100644 --- a/executor/load_data.go +++ b/executor/load_data.go @@ -421,7 +421,7 @@ func (e *LoadDataInfo) isInQuoter(bs []byte) bool { // indexOfTerminator return index of terminator, if not, return -1. // normally, the field terminator and line terminator is short, so we just use brute force algorithm. -func (e *LoadDataInfo) indexOfTerminator(bs []byte, isInQuoter bool) int { +func (e *LoadDataInfo) indexOfTerminator(bs []byte, inQuoter bool) int { fieldTerm := []byte(e.FieldsInfo.Terminated) fieldTermLen := len(fieldTerm) lineTerm := []byte(e.LinesInfo.Terminated) @@ -459,13 +459,10 @@ func (e *LoadDataInfo) indexOfTerminator(bs []byte, isInQuoter bool) int { } } atFieldStart := true - inQuoter := false loop: for i := 0; i < len(bs); i++ { if atFieldStart && bs[i] == e.FieldsInfo.Enclosed { - if !isInQuoter { - inQuoter = true - } + inQuoter = !inQuoter atFieldStart = false continue } From a1d532bbe6bfe383b8e5ae65bff2057198fd2888 Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Mon, 29 Nov 2021 20:01:23 +0800 Subject: [PATCH 2/3] add ut --- executor/load_data.go | 10 +++++----- executor/write_test.go | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/executor/load_data.go b/executor/load_data.go index 319502d77bceb..1202675ebbce0 100644 --- a/executor/load_data.go +++ b/executor/load_data.go @@ -419,9 +419,9 @@ func (e *LoadDataInfo) isInQuoter(bs []byte) bool { return inQuoter } -// indexOfTerminator return index of terminator, if not, return -1. +// IndexOfTerminator return index of terminator, if not, return -1. // normally, the field terminator and line terminator is short, so we just use brute force algorithm. -func (e *LoadDataInfo) indexOfTerminator(bs []byte, inQuoter bool) int { +func (e *LoadDataInfo) IndexOfTerminator(bs []byte, inQuoter bool) int { fieldTerm := []byte(e.FieldsInfo.Terminated) fieldTermLen := len(fieldTerm) lineTerm := []byte(e.LinesInfo.Terminated) @@ -522,7 +522,7 @@ func (e *LoadDataInfo) getLine(prevData, curData []byte, ignore bool) ([]byte, [ if ignore { endIdx = strings.Index(string(hack.String(curData[curStartIdx:])), e.LinesInfo.Terminated) } else { - endIdx = e.indexOfTerminator(curData[curStartIdx:], inquotor) + endIdx = e.IndexOfTerminator(curData[curStartIdx:], inquotor) } } if endIdx == -1 { @@ -536,7 +536,7 @@ func (e *LoadDataInfo) getLine(prevData, curData []byte, ignore bool) ([]byte, [ if ignore { endIdx = strings.Index(string(hack.String(curData[startingLen:])), e.LinesInfo.Terminated) } else { - endIdx = e.indexOfTerminator(curData[startingLen:], inquotor) + endIdx = e.IndexOfTerminator(curData[startingLen:], inquotor) } if endIdx != -1 { nextDataIdx := startingLen + endIdx + terminatedLen @@ -557,7 +557,7 @@ func (e *LoadDataInfo) getLine(prevData, curData []byte, ignore bool) ([]byte, [ if ignore { endIdx = strings.Index(string(hack.String(prevData[startingLen:])), e.LinesInfo.Terminated) } else { - endIdx = e.indexOfTerminator(prevData[startingLen:], inquotor) + endIdx = e.IndexOfTerminator(prevData[startingLen:], inquotor) } if endIdx >= prevLen { return prevData[startingLen : startingLen+endIdx], curData[nextDataIdx:], true diff --git a/executor/write_test.go b/executor/write_test.go index 879c8c76c035f..7e6a9dbb606c1 100644 --- a/executor/write_test.go +++ b/executor/write_test.go @@ -18,11 +18,13 @@ import ( "context" "errors" "fmt" + "github.com/stretchr/testify/require" "strconv" "testing" "github.com/pingcap/tidb/executor" "github.com/pingcap/tidb/kv" + "github.com/pingcap/tidb/parser/ast" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/planner/core" @@ -2129,6 +2131,27 @@ func TestLoadDataEscape(t *testing.T) { checkCases(tests, ld, t, tk, ctx, selectSQL, deleteSQL) } +func TestLoadDataWithLongContent(t *testing.T) { + e := &executor.LoadDataInfo{ + FieldsInfo: &ast.FieldsClause{Terminated: ",", Escaped: '\\', Enclosed: '"'}, + LinesInfo: &ast.LinesClause{Terminated: "\n"}, + } + tests := []struct { + content string + inQuoter bool + expectedIndex int + }{ + {"123,123\n123,123", false, 7}, + {"123123\\n123123", false, -1}, + {"123123\n123123", true, -1}, + {"123123\n123123\"\n", true, 14}, + } + + for _, tt := range tests { + require.Equal(t, tt.expectedIndex, e.IndexOfTerminator([]byte(tt.content), tt.inQuoter)) + } +} + // TestLoadDataSpecifiedColumns reuse TestLoadDataEscape's test case :-) func TestLoadDataSpecifiedColumns(t *testing.T) { trivialMsg := "Records: 1 Deleted: 0 Skipped: 0 Warnings: 0" From e1a8d79a692fa1f0046c61b9a36e688a65cb03e6 Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Mon, 29 Nov 2021 20:15:25 +0800 Subject: [PATCH 3/3] remove import --- executor/write_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/executor/write_test.go b/executor/write_test.go index 7e6a9dbb606c1..d30c1f19ba8d2 100644 --- a/executor/write_test.go +++ b/executor/write_test.go @@ -18,7 +18,6 @@ import ( "context" "errors" "fmt" - "github.com/stretchr/testify/require" "strconv" "testing"