From 863eb59c24f769d395359d11725ffd3a2cfdd281 Mon Sep 17 00:00:00 2001 From: Andriy Svyryd Date: Wed, 1 Sep 2021 14:45:10 -0700 Subject: [PATCH] Update SQL Server transient error list Fixes #25050 Fixes #15644 --- .../Internal/SqlServerDatabaseCreator.cs | 5 +- .../SqlServerTransientExceptionDetector.cs | 81 +++++++++++++++++++ .../TestSqlServerRetryingExecutionStrategy.cs | 1 - 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/src/EFCore.SqlServer/Storage/Internal/SqlServerDatabaseCreator.cs b/src/EFCore.SqlServer/Storage/Internal/SqlServerDatabaseCreator.cs index efd009a958b..0aacc0d8c30 100644 --- a/src/EFCore.SqlServer/Storage/Internal/SqlServerDatabaseCreator.cs +++ b/src/EFCore.SqlServer/Storage/Internal/SqlServerDatabaseCreator.cs @@ -337,11 +337,14 @@ private bool RetryOnExistsFailure(SqlException exception) // Microsoft.Data.SqlClient.SqlException: Unable to Attach database file as database xxxxxxx. // And (Number 5120) // Microsoft.Data.SqlClient.SqlException: Unable to open the physical file xxxxxxx. + // And (Number 18456) + // Microsoft.Data.SqlClient.SqlException: Login failed for user 'xxxxxxx'. if (exception.Number == 233 || exception.Number == -2 || exception.Number == 4060 || exception.Number == 1832 - || exception.Number == 5120) + || exception.Number == 5120 + || exception.Number == 18456) { ClearPool(); return true; diff --git a/src/EFCore.SqlServer/Storage/Internal/SqlServerTransientExceptionDetector.cs b/src/EFCore.SqlServer/Storage/Internal/SqlServerTransientExceptionDetector.cs index d9d0a0360ad..a9b49e21043 100644 --- a/src/EFCore.SqlServer/Storage/Internal/SqlServerTransientExceptionDetector.cs +++ b/src/EFCore.SqlServer/Storage/Internal/SqlServerTransientExceptionDetector.cs @@ -67,6 +67,16 @@ public static bool ShouldRetryOn(Exception? ex) // SQL Error Code: 40197 // The service has encountered an error processing your request. Please try again. case 40197: + // SQL Error Code: 20041 + // Transaction rolled back. Could not execute trigger. Retry your transaction. + case 20041: + // SQL Error Code: 17197 + // Login failed due to timeout; the connection has been closed. This error may indicate heavy server load. + // Reduce the load on the server and retry login. + case 17197: + // SQL Error Code: 14355 + // The MSSQLServerADHelper service is busy. Retry this operation later. + case 14355: // SQL Error Code: 10936 // Resource ID : %d. The request limit for the elastic pool is %d and has been reached. // See 'http://go.microsoft.com/fwlink/?LinkId=267637' for assistance. @@ -80,6 +90,9 @@ public static bool ShouldRetryOn(Exception? ex) // Resource ID: %d. The %s limit for the database is %d and has been reached. For more information, // see http://go.microsoft.com/fwlink/?LinkId=267637. case 10928: + // SQL Error Code: 10922 + // %ls failed. Rerun the statement. + case 10922: // SQL Error Code: 10060 // A network-related or instance-specific error occurred while establishing a connection to SQL Server. // The server was not found or was not accessible. Verify that the instance name is correct and that SQL Server @@ -95,9 +108,77 @@ public static bool ShouldRetryOn(Exception? ex) // A transport-level error has occurred when receiving results from the server. // An established connection was aborted by the software in your host machine. case 10053: + // SQL Error Code: 9515 + // An XML schema has been altered or dropped, and the query plan is no longer valid. Please rerun the query batch. + case 9515: + // SQL Error Code: 8651 + // Could not perform the operation because the requested memory grant was not available in resource pool '%ls' (%ld). + // Rerun the query, reduce the query load, or check resource governor configuration setting. + case 8651: + // SQL Error Code: 8645 + // A timeout occurred while waiting for memory resources to execute the query in resource pool '%ls' (%ld). Rerun the query. + case 8645: + // SQL Error Code: 8628 + // A time out occurred while waiting to optimize the query. Rerun the query. + case 8628: + // SQL Error Code: 4221 + // Login to read-secondary failed due to long wait on 'HADR_DATABASE_WAIT_FOR_TRANSITION_TO_VERSIONING'. + // The replica is not available for login because row versions are missing for transactions that were in-flight + // when the replica was recycled. The issue can be resolved by rolling back or committing the active transactions + // on the primary replica. Occurrences of this condition can be minimized by avoiding long write transactions on the primary. + case 4221: + // SQL Error Code: 4060 + // Cannot open database "%.*ls" requested by the login. The login failed. + case 4060: + // SQL Error Code: 3966 + // Transaction is rolled back when accessing version store. It was earlier marked as victim when the version store + // was shrunk due to insufficient space in tempdb. This transaction was marked as a victim earlier because it may need + // the row version(s) that have already been removed to make space in tempdb. Retry the transaction + case 3966: + // SQL Error Code: 3960 + // Snapshot isolation transaction aborted due to update conflict. You cannot use snapshot isolation to access table '%.*ls' + // directly or indirectly in database '%.*ls' to update, delete, or insert the row that has been modified or deleted + // by another transaction. Retry the transaction or change the isolation level for the update/delete statement. + case 3960: + // SQL Error Code: 3935 + // A FILESTREAM transaction context could not be initialized. This might be caused by a resource shortage. Retry the operation. + case 3935: + // SQL Error Code: 1807 + // Could not obtain exclusive lock on database 'model'. Retry the operation later. + case 1807: + // SQL Error Code: 1221 + // The Database Engine is attempting to release a group of locks that are not currently held by the transaction. + // Retry the transaction. If the problem persists, contact your support provider. + case 1221: // SQL Error Code: 1205 // Deadlock case 1205: + // SQL Error Code: 1204 + // The instance of the SQL Server Database Engine cannot obtain a LOCK resource at this time. Rerun your statement + // when there are fewer active users. Ask the database administrator to check the lock and memory configuration for + // this instance, or to check for long-running transactions. + case 1204: + // SQL Error Code: 1203 + // Process ID %d attempted to unlock a resource it does not own: %.*ls. Retry the transaction, because this error + // may be caused by a timing condition. If the problem persists, contact the database administrator. + case 1203: + // SQL Error Code: 997 + // A connection was successfully established with the server, but then an error occurred during the login process. + // (provider: Named Pipes Provider, error: 0 - Overlapped I/O operation is in progress) + case 997: + // SQL Error Code: 921 + // Database '%.*ls' has not been recovered yet. Wait and try again. + case 921: + // SQL Error Code: 669 + // The row object is inconsistent. Please rerun the query. + case 669: + // SQL Error Code: 617 + // Descriptor for object ID %ld in database ID %d not found in the hash table during attempt to unhash it. + // A work table is missing an entry. Rerun the query. If a cursor is involved, close and reopen the cursor. + case 617: + // SQL Error Code: 601 + // Could not continue scan with NOLOCK due to data movement. + case 601: // SQL Error Code: 233 // The client was unable to establish a connection because of an error during connection initialization process before login. // Possible causes include the following: the client tried to connect to an unsupported version of SQL Server; diff --git a/test/EFCore.SqlServer.FunctionalTests/TestUtilities/TestSqlServerRetryingExecutionStrategy.cs b/test/EFCore.SqlServer.FunctionalTests/TestUtilities/TestSqlServerRetryingExecutionStrategy.cs index 3fd9d55eadb..8161cc1a836 100644 --- a/test/EFCore.SqlServer.FunctionalTests/TestUtilities/TestSqlServerRetryingExecutionStrategy.cs +++ b/test/EFCore.SqlServer.FunctionalTests/TestUtilities/TestSqlServerRetryingExecutionStrategy.cs @@ -15,7 +15,6 @@ public class TestSqlServerRetryingExecutionStrategy : SqlServerRetryingExecution { -1, // Physical connection is not usable -2, // Timeout - 1807, // Could not obtain exclusive lock on database 'model' 42008, // Mirroring (Only when a database is deleted and another one is created in fast succession) 42019 // CREATE DATABASE operation failed };