Skip to content

Commit

Permalink
test: add fail over ddl test case L (pingcap#795)
Browse files Browse the repository at this point in the history
  • Loading branch information
hongyunyan authored Jan 7, 2025
1 parent 07103f7 commit a1a38ba
Show file tree
Hide file tree
Showing 17 changed files with 550 additions and 369 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/integration_test_mysql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -225,3 +225,33 @@ jobs:
uses: ./.github/actions/upload-test-logs
with:
log-name: failover_e2e_test3

failover_e2e_test4:
runs-on: ubuntu-latest
name: Failover E2E Test[L-O]
steps:
- name: Check out code
uses: actions/checkout@v2

- name: Setup Go environment
uses: actions/setup-go@v3
with:
go-version: '1.23'

- name: Integration Build
run: |
tests/scripts/download-integration-test-binaries.sh master true
go build -o ./tools/bin/failpoint-ctl github.com/pingcap/failpoint/failpoint-ctl
make integration_test_build
ls -l bin/ && ls -l tools/bin/
- name: Test fail_over_ddl_L
run: |
pwd && ls -l bin/ && ls -l tools/bin/
export TICDC_NEWARCH=true && make integration_test CASE=fail_over_ddl_L
- name: Upload test logs
if: always()
uses: ./.github/actions/upload-test-logs
with:
log-name: failover_e2e_test4
14 changes: 3 additions & 11 deletions downstreamadapter/dispatcher/dispatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,7 @@ func (d *Dispatcher) HandleDispatcherStatus(dispatcherStatus *heartbeatpb.Dispat
if pendingEvent != nil && action.CommitTs == pendingEvent.GetCommitTs() && blockStatus == heartbeatpb.BlockStage_WAITING {
d.blockEventStatus.updateBlockStage(heartbeatpb.BlockStage_WRITING)
if action.Action == heartbeatpb.Action_Write {
failpoint.Inject("WaitBeforeWrite", func() {
// we use the failpoint to make the ddl event is not written to downstream before the other node finish restarting
time.Sleep(30 * time.Second)
})
failpoint.Inject("BlockBeforeWrite", nil)
failpoint.Inject("BlockOrWaitBeforeWrite", nil)
err := d.AddBlockEventToSink(pendingEvent)
if err != nil {
select {
Expand All @@ -247,13 +243,9 @@ func (d *Dispatcher) HandleDispatcherStatus(dispatcherStatus *heartbeatpb.Dispat
}
return
}
failpoint.Inject("BlockReportAfterWrite", nil)
failpoint.Inject("WaitBeforeReport", func() {
time.Sleep(30 * time.Second)
})
failpoint.Inject("BlockOrWaitReportAfterWrite", nil)
} else {
failpoint.Inject("WaitBeforePass", nil)
failpoint.Inject("BlockBeforePass", nil)
failpoint.Inject("BlockOrWaitBeforePass", nil)
d.PassBlockEventToSink(pendingEvent)
failpoint.Inject("BlockAfterPass", nil)
}
Expand Down
38 changes: 38 additions & 0 deletions tests/integration_tests/_utils/move_table_with_retry
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash
# parameter 1: ip addr with port
# parameter 2: table id
# parameter 3: changefeed id
# parameter 4: retry count

set -e

ipAddr=${1}
tableID=${2}
changefeedID=${3}
retryCount=${4}

echo "move table with retry"
count=0

while [[ $count -lt $retryCount ]]; do
ans=$(run_cdc_cli capture list)
node2ID=$(echo $ans | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r ".[] | select(.address == \"$ipAddr\") | .id")
if [ -z "$node2ID" ]; then
echo "Failed to extract node2 ID"
continue
fi

# move table 1 to node2
result=$(run_cdc_cli changefeed move-table -c "$changefeedID" -t $tableID -d "$node2ID")
echo $result
success=$(echo $result | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.success')

if [ "$success" == "true" ]; then
exit 0
fi

count=$((count + 1))
done

echo "move table 1 to node2 failed after $retryCount retries"
exit 1
8 changes: 4 additions & 4 deletions tests/integration_tests/fail_over_ddl_A/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ function failOverCaseA-1() {
# restart cdc server to enable failpoint
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
kill_cdc_pid $cdc_pid_1
export GO_FAILPOINTS='github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockReportAfterWrite=pause'
export GO_FAILPOINTS='github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=pause'
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"
Expand Down Expand Up @@ -119,7 +119,7 @@ function failOverCaseA-2() {
# restart cdc server to enable failpoint
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
kill_cdc_pid $cdc_pid_1
export GO_FAILPOINTS='github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockReportAfterWrite=pause'
export GO_FAILPOINTS='github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=pause'
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"
Expand Down Expand Up @@ -174,7 +174,7 @@ function failOverCaseA-3() {
# restart cdc server to enable failpoint
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
kill_cdc_pid $cdc_pid_1
export GO_FAILPOINTS='github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockReportAfterWrite=pause'
export GO_FAILPOINTS='github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=pause'
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"
Expand Down Expand Up @@ -231,7 +231,7 @@ function failOverCaseA-5() {
# restart cdc server to enable failpoint
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
kill_cdc_pid $cdc_pid_1
export GO_FAILPOINTS='github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockReportAfterWrite=pause'
export GO_FAILPOINTS='github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=pause'
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"
Expand Down
60 changes: 12 additions & 48 deletions tests/integration_tests/fail_over_ddl_B/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ function failOverCaseB-1() {

sleep 10

export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockReportAfterWrite=pause'
export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=pause'

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
Expand All @@ -86,17 +86,8 @@ function failOverCaseB-1() {

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"

ans=$(run_cdc_cli capture list)
node2ID=`echo $ans | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.[] | select(.address == "127.0.0.1:8301") | .id'`

# move table 1 to node2
result=$(run_cdc_cli changefeed move-table -c "test" -t 106 -d "$node2ID")
echo $result
success=$(echo $result | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.success')
if [ "$success" != "true" ]; then
echo "move table 1 to node2 failed"
exit 1
fi
# move table 1 to node 2
move_table_with_retry "127.0.0.1:8301" 106 "test" 10

run_sql "drop database fail_over_ddl_test;" ${UP_TIDB_HOST} ${UP_TIDB_PORT}

Expand Down Expand Up @@ -139,7 +130,7 @@ function failOverCaseB-2() {

sleep 10

export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockReportAfterWrite=pause'
export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=pause'

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
Expand All @@ -149,17 +140,8 @@ function failOverCaseB-2() {

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"

ans=$(run_cdc_cli capture list)
node2ID=`echo $ans | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.[] | select(.address == "127.0.0.1:8301") | .id'`

# move table 1 to node2
result=$(run_cdc_cli changefeed move-table -c "test" -t 106 -d "$node2ID")
echo $result
success=$(echo $result | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.success')
if [ "$success" != "true" ]; then
echo "move table 1 to node2 failed"
exit 1
fi
# move table 1 to node 2
move_table_with_retry "127.0.0.1:8301" 106 "test" 10

run_sql "drop table fail_over_ddl_test.test1;" ${UP_TIDB_HOST} ${UP_TIDB_PORT}

Expand Down Expand Up @@ -210,7 +192,7 @@ function failOverCaseB-3() {

sleep 10

export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockReportAfterWrite=pause'
export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=pause'

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
Expand All @@ -220,17 +202,8 @@ function failOverCaseB-3() {

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"

ans=$(run_cdc_cli capture list)
node2ID=`echo $ans | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.[] | select(.address == "127.0.0.1:8301") | .id'`

# move table 1 to node2
result=$(run_cdc_cli changefeed move-table -c "test" -t 106 -d "$node2ID")
echo $result
success=$(echo $result | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.success')
if [ "$success" != "true" ]; then
echo "move table 1 to node2 failed"
exit 1
fi
# move table 1 to node 2
move_table_with_retry "127.0.0.1:8301" 106 "test" 10

run_sql "rename table fail_over_ddl_test.test1 to fail_over_ddl_test.test4;" ${UP_TIDB_HOST} ${UP_TIDB_PORT}

Expand Down Expand Up @@ -282,7 +255,7 @@ function failOverCaseB-5() {

sleep 10

export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockReportAfterWrite=pause'
export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=pause'

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"
cdc_pid_1=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
Expand All @@ -292,17 +265,8 @@ function failOverCaseB-5() {

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"

ans=$(run_cdc_cli capture list)
node2ID=`echo $ans | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.[] | select(.address == "127.0.0.1:8301") | .id'`

# move table 1 to node2
result=$(run_cdc_cli changefeed move-table -c "test" -t 106 -d "$node2ID")
echo $result
success=$(echo $result | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.success')
if [ "$success" != "true" ]; then
echo "move table 1 to node2 failed"
exit 1
fi
# move table 1 to node 2
move_table_with_retry "127.0.0.1:8301" 106 "test" 10

run_sql "insert into fail_over_ddl_test.test1 values (2, 2);" ${UP_TIDB_HOST} ${UP_TIDB_PORT}
ensure 10 "run_sql 'select id from fail_over_ddl_test.test1;' ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} && check_contains '2'"
Expand Down
60 changes: 12 additions & 48 deletions tests/integration_tests/fail_over_ddl_C/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ function failOverCaseC-1() {

sleep 10

export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/WaitBeforeReport=return(true)'
export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=sleep(30)'

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"

Expand All @@ -88,17 +88,8 @@ function failOverCaseC-1() {
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"
cdc_pid_2=$(ps aux | grep cdc | grep 8301 | awk '{print $2}')

ans=$(run_cdc_cli capture list)
node2ID=`echo $ans | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.[] | select(.address == "127.0.0.1:8301") | .id'`

# move table 1 to node2
result=$(run_cdc_cli changefeed move-table -c "test" -t 106 -d "$node2ID")
echo $result
success=$(echo $result | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.success')
if [ "$success" != "true" ]; then
echo "move table 1 to node2 failed"
exit 1
fi
# move table 1 to node 2
move_table_with_retry "127.0.0.1:8301" 106 "test" 10

run_sql "drop database fail_over_ddl_test;" ${UP_TIDB_HOST} ${UP_TIDB_PORT}

Expand Down Expand Up @@ -141,7 +132,7 @@ function failOverCaseC-2() {

sleep 10

export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/WaitBeforeReport=return(true)'
export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=sleep(30)'

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"

Expand All @@ -151,17 +142,8 @@ function failOverCaseC-2() {
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"
cdc_pid_2=$(ps aux | grep cdc | grep 8301 | awk '{print $2}')

ans=$(run_cdc_cli capture list)
node2ID=`echo $ans | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.[] | select(.address == "127.0.0.1:8301") | .id'`

# move table 1 to node2
result=$(run_cdc_cli changefeed move-table -c "test" -t 106 -d "$node2ID")
echo $result
success=$(echo $result | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.success')
if [ "$success" != "true" ]; then
echo "move table 1 to node2 failed"
exit 1
fi
# move table 1 to node 2
move_table_with_retry "127.0.0.1:8301" 106 "test" 10

run_sql "drop table fail_over_ddl_test.test1;" ${UP_TIDB_HOST} ${UP_TIDB_PORT}

Expand Down Expand Up @@ -212,7 +194,7 @@ function failOverCaseC-3() {

sleep 10

export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/WaitBeforeReport=return(true)'
export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=sleep(30)'

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"

Expand All @@ -222,17 +204,8 @@ function failOverCaseC-3() {
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"
cdc_pid_2=$(ps aux | grep cdc | grep 8301 | awk '{print $2}')

ans=$(run_cdc_cli capture list)
node2ID=`echo $ans | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.[] | select(.address == "127.0.0.1:8301") | .id'`

# move table 1 to node2
result=$(run_cdc_cli changefeed move-table -c "test" -t 106 -d "$node2ID")
echo $result
success=$(echo $result | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.success')
if [ "$success" != "true" ]; then
echo "move table 1 to node2 failed"
exit 1
fi
# move table 1 to node 2
move_table_with_retry "127.0.0.1:8301" 106 "test" 10

run_sql "rename table fail_over_ddl_test.test1 to fail_over_ddl_test.test4;" ${UP_TIDB_HOST} ${UP_TIDB_PORT}

Expand Down Expand Up @@ -284,7 +257,7 @@ function failOverCaseC-5() {

sleep 10

export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/WaitBeforeReport=return(true)'
export GO_FAILPOINTS='github.com/pingcap/ticdc/pkg/scheduler/StopBalanceScheduler=return(true);github.com/pingcap/ticdc/downstreamadapter/dispatcher/BlockOrWaitReportAfterWrite=sleep(30)'

run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "0-1" --addr "127.0.0.1:8300"

Expand All @@ -294,17 +267,8 @@ function failOverCaseC-5() {
run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix "1-1" --addr "127.0.0.1:8301"
cdc_pid_2=$(ps aux | grep cdc | grep 8301 | awk '{print $2}')

ans=$(run_cdc_cli capture list)
node2ID=`echo $ans | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.[] | select(.address == "127.0.0.1:8301") | .id'`

# move table 1 to node2
result=$(run_cdc_cli changefeed move-table -c "test" -t 106 -d "$node2ID")
echo $result
success=$(echo $result | sed 's/ PASS.*//' | sed 's/^=== Command to ticdc(new arch). //' | jq -r '.success')
if [ "$success" != "true" ]; then
echo "move table 1 to node2 failed"
exit 1
fi
# move table 1 to node 2
move_table_with_retry "127.0.0.1:8301" 106 "test" 10

run_sql "insert into fail_over_ddl_test.test1 values (2, 2);" ${UP_TIDB_HOST} ${UP_TIDB_PORT}
ensure 10 "run_sql 'select id from fail_over_ddl_test.test1;' ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} && check_contains '2'"
Expand Down
Loading

0 comments on commit a1a38ba

Please sign in to comment.