-
Notifications
You must be signed in to change notification settings - Fork 75
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Retry statements and transactions that fail due to `lock_timeout` errors. DDL operations and backfills are run in a session in which `SET lock_timout TO xms'` has been set (`x` defaults to `500` but can be specified with the `--lock-timeout` parameter). This ensures that a long running query can't cause other queries to queue up behind a DDL operation as it waits to acquire its lock. The current behaviour if a DDL operation or backfill batch times out when requesting a lock is to fail, forcing the user to retry the migration operation (start, rollback, or complete). This PR retries individual statements (like the DDL operations run by migration operations) and transactions (used by backfills) if they fail due to a `lock_timeout` error. The retry uses an exponential backoff with jitter. Fixes #171
- Loading branch information
1 parent
4f0a715
commit 5c1aef2
Showing
33 changed files
with
365 additions
and
166 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package db | ||
|
||
import ( | ||
"context" | ||
"database/sql" | ||
"errors" | ||
"time" | ||
|
||
"github.com/cloudflare/backoff" | ||
"github.com/lib/pq" | ||
) | ||
|
||
const ( | ||
lockNotAvailableErrorCode pq.ErrorCode = "55P03" | ||
maxBackoffDuration = 1 * time.Minute | ||
backoffInterval = 1 * time.Second | ||
) | ||
|
||
type DB interface { | ||
ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) | ||
WithRetryableTransaction(ctx context.Context, f func(context.Context, *sql.Tx) error) error | ||
Close() error | ||
} | ||
|
||
// RDB wraps a *sql.DB and retries queries using an exponential backoff (with | ||
// jitter) on lock_timeout errors. | ||
type RDB struct { | ||
DB *sql.DB | ||
} | ||
|
||
// ExecContext wraps sql.DB.ExecContext, retrying queries on lock_timeout errors. | ||
func (db *RDB) ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) { | ||
b := backoff.New(maxBackoffDuration, backoffInterval) | ||
|
||
for { | ||
res, err := db.DB.ExecContext(ctx, query, args...) | ||
if err == nil { | ||
return res, nil | ||
} | ||
|
||
pqErr := &pq.Error{} | ||
if errors.As(err, &pqErr) && pqErr.Code == lockNotAvailableErrorCode { | ||
<-time.After(b.Duration()) | ||
} else { | ||
return nil, err | ||
} | ||
} | ||
} | ||
|
||
// WithRetryableTransaction runs `f` in a transaction, retrying on lock_timeout errors. | ||
func (db *RDB) WithRetryableTransaction(ctx context.Context, f func(context.Context, *sql.Tx) error) error { | ||
b := backoff.New(maxBackoffDuration, backoffInterval) | ||
|
||
for { | ||
tx, err := db.DB.BeginTx(ctx, nil) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
err = f(ctx, tx) | ||
if err == nil { | ||
return tx.Commit() | ||
} | ||
|
||
if errRollback := tx.Rollback(); errRollback != nil { | ||
return errRollback | ||
} | ||
|
||
pqErr := &pq.Error{} | ||
if errors.As(err, &pqErr) && pqErr.Code == lockNotAvailableErrorCode { | ||
<-time.After(b.Duration()) | ||
} else { | ||
return err | ||
} | ||
} | ||
} | ||
|
||
func (db *RDB) Close() error { | ||
return db.DB.Close() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package db_test | ||
|
||
import ( | ||
"context" | ||
"database/sql" | ||
"fmt" | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/require" | ||
"github.com/xataio/pgroll/pkg/db" | ||
"github.com/xataio/pgroll/pkg/testutils" | ||
) | ||
|
||
func TestMain(m *testing.M) { | ||
testutils.SharedTestMain(m) | ||
} | ||
|
||
func TestExecContext(t *testing.T) { | ||
t.Parallel() | ||
|
||
testutils.WithConnectionToContainer(t, func(conn *sql.DB, connStr string) { | ||
ctx := context.Background() | ||
// create a table on which an exclusive lock is held for 2 seconds | ||
setupTableLock(t, connStr, 2*time.Second) | ||
|
||
// set the lock timeout to 100ms | ||
ensureLockTimeout(t, conn, 100) | ||
|
||
// execute a query that should retry until the lock is released | ||
rdb := &db.RDB{DB: conn} | ||
_, err := rdb.ExecContext(ctx, "INSERT INTO test(id) VALUES (1)") | ||
require.NoError(t, err) | ||
}) | ||
} | ||
|
||
func TestWithRetryableTransaction(t *testing.T) { | ||
t.Parallel() | ||
|
||
testutils.WithConnectionToContainer(t, func(conn *sql.DB, connStr string) { | ||
ctx := context.Background() | ||
|
||
// create a table on which an exclusive lock is held for 2 seconds | ||
setupTableLock(t, connStr, 2*time.Second) | ||
|
||
// set the lock timeout to 100ms | ||
ensureLockTimeout(t, conn, 100) | ||
|
||
// run a transaction that should retry until the lock is released | ||
rdb := &db.RDB{DB: conn} | ||
err := rdb.WithRetryableTransaction(ctx, func(ctx context.Context, tx *sql.Tx) error { | ||
return tx.QueryRowContext(ctx, "SELECT 1 FROM test").Err() | ||
}) | ||
require.NoError(t, err) | ||
}) | ||
} | ||
|
||
// setupTableLock: | ||
// * connects to the database | ||
// * creates a table in the database | ||
// * starts a transaction that temporarily locks the table | ||
func setupTableLock(t *testing.T, connStr string, d time.Duration) { | ||
t.Helper() | ||
ctx := context.Background() | ||
|
||
// connect to the database | ||
conn2, err := sql.Open("postgres", connStr) | ||
require.NoError(t, err) | ||
|
||
// create a table in the database | ||
_, err = conn2.ExecContext(ctx, "CREATE TABLE test (id INT PRIMARY KEY)") | ||
require.NoError(t, err) | ||
|
||
// start a transaction that takes a temporary lock on the table | ||
errCh := make(chan error) | ||
go func() { | ||
// begin a transaction | ||
tx, err := conn2.Begin() | ||
if err != nil { | ||
errCh <- err | ||
return | ||
} | ||
|
||
// lock the table | ||
_, err = tx.ExecContext(ctx, "LOCK TABLE test IN ACCESS EXCLUSIVE MODE") | ||
if err != nil { | ||
errCh <- err | ||
return | ||
} | ||
|
||
// signal that the lock is obtained | ||
errCh <- nil | ||
|
||
// temporarily hold the lock | ||
time.Sleep(d) | ||
|
||
// commit the transaction | ||
tx.Commit() | ||
}() | ||
|
||
// wait for the lock to be obtained | ||
err = <-errCh | ||
require.NoError(t, err) | ||
} | ||
|
||
func ensureLockTimeout(t *testing.T, conn *sql.DB, ms int) { | ||
t.Helper() | ||
|
||
// Set the lock timeout | ||
query := fmt.Sprintf("SET lock_timeout = '%dms'", ms) | ||
_, err := conn.ExecContext(context.Background(), query) | ||
require.NoError(t, err) | ||
|
||
// Ensure the lock timeout is set | ||
var lockTimeout string | ||
err = conn.QueryRowContext(context.Background(), "SHOW lock_timeout").Scan(&lockTimeout) | ||
require.NoError(t, err) | ||
require.Equal(t, fmt.Sprintf("%dms", ms), lockTimeout) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.