Skip to content

Commit

Permalink
Implemented split datashard status (wrong shard state) (#11955)
Browse files Browse the repository at this point in the history
  • Loading branch information
dahbka-lis authored Nov 26, 2024
1 parent 6ed5294 commit 2a7eb6b
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 7 deletions.
37 changes: 31 additions & 6 deletions ydb/core/kqp/runtime/kqp_write_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -535,25 +535,38 @@ class TKqpTableWriteActor : public TActorBootstrapped<TKqpTableWriteActor> {
getIssues());
return;
}
case NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR: {
CA_LOG_E("Got INTERNAL ERROR for table `"
case NKikimrDataEvents::TEvWriteResult::STATUS_WRONG_SHARD_STATE:
CA_LOG_E("Got WRONG SHARD STATE for table `"
<< SchemeEntry->TableId.PathId.ToString() << "`."
<< " ShardID=" << ev->Get()->Record.GetOrigin() << ","
<< " Sink=" << this->SelfId() << "."
<< getIssues().ToOneLineString());
// TODO: Add new status for splits in datashard. This is tmp solution.
if (getIssues().ToOneLineString().Contains("in a pre/offline state assuming this is due to a finished split (wrong shard state)")) {

if (InconsistentTx) {
ResetShardRetries(ev->Get()->Record.GetOrigin(), ev->Cookie);
RetryResolveTable();
} else {
RuntimeError(
TStringBuilder() << "Internal error for table `"
TStringBuilder() << "Wrong shard state for table `"
<< TablePath << "`. "
<< getIssues().ToOneLineString(),
NYql::NDqProto::StatusIds::INTERNAL_ERROR,
NYql::NDqProto::StatusIds::PRECONDITION_FAILED,
getIssues());
}
return;
case NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR: {
CA_LOG_E("Got INTERNAL ERROR for table `"
<< SchemeEntry->TableId.PathId.ToString() << "`."
<< " ShardID=" << ev->Get()->Record.GetOrigin() << ","
<< " Sink=" << this->SelfId() << "."
<< getIssues().ToOneLineString());
RuntimeError(
TStringBuilder() << "Internal error for table `"
<< TablePath << "`. "
<< getIssues().ToOneLineString(),
NYql::NDqProto::StatusIds::INTERNAL_ERROR,
getIssues());
return;
}
case NKikimrDataEvents::TEvWriteResult::STATUS_DISK_SPACE_EXHAUSTED: {
CA_LOG_E("Got DISK_SPACE_EXHAUSTED for table `"
Expand Down Expand Up @@ -1798,6 +1811,18 @@ class TKqpBufferWriteActor :public TActorBootstrapped<TKqpBufferWriteActor>, pub
getIssues());
return;
}
case NKikimrDataEvents::TEvWriteResult::STATUS_WRONG_SHARD_STATE: {
CA_LOG_E("Got WRONG SHARD STATE for table."
<< " ShardID=" << ev->Get()->Record.GetOrigin() << ","
<< " Sink=" << this->SelfId() << "."
<< getIssues().ToOneLineString());
ReplyErrorAndDie(
TStringBuilder() << "Wrong shard state for table. "
<< getIssues().ToOneLineString(),
NYql::NDqProto::StatusIds::INTERNAL_ERROR,
getIssues());
return;
}
case NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR: {
CA_LOG_E("Got INTERNAL ERROR for table."
<< " ShardID=" << ev->Get()->Record.GetOrigin() << ","
Expand Down
1 change: 1 addition & 0 deletions ydb/core/protos/data_events.proto
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ message TEvWriteResult {
STATUS_SCHEME_CHANGED = 8;
STATUS_LOCKS_BROKEN = 9;
STATUS_DISK_SPACE_EXHAUSTED = 10;
STATUS_WRONG_SHARD_STATE = 11;
}

// Status
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/tx/data_events/common/error_codes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ TConclusion<NErrorCodes::TOperator::TYdbStatusInfo> TOperator::GetStatusInfo(
case NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN: {
return TYdbStatusInfo(Ydb::StatusIds::ABORTED, NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, "Transaction locks invalidated.");
}
case NKikimrDataEvents::TEvWriteResult::STATUS_WRONG_SHARD_STATE:
return TYdbStatusInfo(Ydb::StatusIds::PRECONDITION_FAILED, NYql::TIssuesIds::KIKIMR_PRECONDITION_FAILED, "Wrong shard state");
}
}

Expand Down
6 changes: 5 additions & 1 deletion ydb/core/tx/datashard/datashard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3054,7 +3054,11 @@ bool TDataShard::CheckDataTxRejectAndReply(const NEvents::TDataEvents::TEvWrite:
status = NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED;
break;
case NKikimrTxDataShard::TEvProposeTransactionResult::ERROR:
status = NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR;
if ((rejectReasons & ERejectReasons::WrongState) != ERejectReasons::None) {
status = NKikimrDataEvents::TEvWriteResult::STATUS_WRONG_SHARD_STATE;
} else {
status = NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR;
}
break;
default:
Y_FAIL_S("Unexpected rejectStatus " << rejectStatus);
Expand Down

0 comments on commit 2a7eb6b

Please sign in to comment.