From 7ee0148d6c7f22037939a858d0faeadf95553e91 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Wed, 25 Jan 2023 18:16:12 +0530 Subject: [PATCH 01/20] Removes used field --- ent/src/yb/cdc/cdc_service.cc | 6 +--- .../yb/integration-tests/twodc_ysql-test.cc | 4 +-- src/yb/client/ql-stress-test.cc | 4 +-- src/yb/consensus/log-dump.cc | 6 ++-- src/yb/consensus/log-test.cc | 20 +++-------- src/yb/consensus/log.cc | 4 +-- src/yb/consensus/log.proto | 4 +-- src/yb/consensus/log_cache.cc | 36 +------------------ src/yb/consensus/log_cache.h | 2 -- src/yb/consensus/log_reader.cc | 7 ---- src/yb/consensus/log_reader.h | 4 +-- src/yb/tools/data-patcher.cc | 2 +- 12 files changed, 18 insertions(+), 81 deletions(-) diff --git a/ent/src/yb/cdc/cdc_service.cc b/ent/src/yb/cdc/cdc_service.cc index b8bc2659c516..a9f4a0f7aa32 100644 --- a/ent/src/yb/cdc/cdc_service.cc +++ b/ent/src/yb/cdc/cdc_service.cc @@ -4031,17 +4031,13 @@ void CDCServiceImpl::IsBootstrapRequired( int64_t next_index = op_id.index + 1; consensus::ReplicateMsgs replicates; int64_t starting_op_segment_seq_num; - yb::SchemaPB schema; - uint32_t schema_version; auto log_result = log->GetLogReader()->ReadReplicatesInRange( next_index, next_index, 0, &replicates, - &starting_op_segment_seq_num, - &schema, - &schema_version); + &starting_op_segment_seq_num); // TODO: We should limit this to the specific Status error associated with missing logs. bool missing_logs = !log_result.ok(); diff --git a/ent/src/yb/integration-tests/twodc_ysql-test.cc b/ent/src/yb/integration-tests/twodc_ysql-test.cc index fd0cd8e1415b..ee88a966ab45 100644 --- a/ent/src/yb/integration-tests/twodc_ysql-test.cc +++ b/ent/src/yb/integration-tests/twodc_ysql-test.cc @@ -2378,10 +2378,8 @@ TEST_P(TwoDCYsqlTest, IsBootstrapRequiredFlushed) { // Check that first log was garbage collected, so remote bootstrap will be required. consensus::ReplicateMsgs replicates; int64_t starting_op; - yb::SchemaPB schema; - uint32_t schema_version; return !tablet_peer->log()->GetLogReader()->ReadReplicatesInRange( - 1, 2, 0, &replicates, &starting_op, &schema, &schema_version).ok(); + 1, 2, 0, &replicates, &starting_op).ok(); }, MonoDelta::FromSeconds(30), "Logs cleaned")); auto leaders = ListTabletPeers(producer_cluster(), ListPeersFilter::kLeaders); diff --git a/src/yb/client/ql-stress-test.cc b/src/yb/client/ql-stress-test.cc index 5bec9827924c..ddc8ca8a99a9 100644 --- a/src/yb/client/ql-stress-test.cc +++ b/src/yb/client/ql-stress-test.cc @@ -987,10 +987,8 @@ TEST_F_EX(QLStressTest, LongRemoteBootstrap, QLStressTestLongRemoteBootstrap) { // Check that first log was garbage collected, so remote bootstrap will be required. consensus::ReplicateMsgs replicates; int64_t starting_op_segment_seq_num; - yb::SchemaPB schema; - uint32_t schema_version; return !leaders.front()->log()->GetLogReader()->ReadReplicatesInRange( - 100, 101, 0, &replicates, &starting_op_segment_seq_num, &schema, &schema_version).ok(); + 100, 101, 0, &replicates, &starting_op_segment_seq_num).ok(); }, 30s, "Logs cleaned")); LOG(INFO) << "Bring replica back, keys written: " << key.load(std::memory_order_acquire); diff --git a/src/yb/consensus/log-dump.cc b/src/yb/consensus/log-dump.cc index 3df98a6fd1d5..9f0a6673e274 100644 --- a/src/yb/consensus/log-dump.cc +++ b/src/yb/consensus/log-dump.cc @@ -281,7 +281,7 @@ Status PrintSegment(const scoped_refptr& segment) { if (print_type == DONT_PRINT) return Status::OK(); Schema tablet_schema; - RETURN_NOT_OK(SchemaFromPB(segment->header().schema(), &tablet_schema)); + RETURN_NOT_OK(SchemaFromPB(segment->header().unused_schema(), &tablet_schema)); for (const auto& lw_entry : read_entries.entries) { auto entry = lw_entry->ToGoogleProtobuf(); @@ -357,7 +357,7 @@ Status FilterLogSegment(const string& segment_path) { Schema tablet_schema; const auto& segment_header = segment->header(); - RETURN_NOT_OK(SchemaFromPB(segment->header().schema(), &tablet_schema)); + RETURN_NOT_OK(SchemaFromPB(segment->header().unused_schema(), &tablet_schema)); auto log_options = LogOptions(); log_options.env = env; @@ -420,7 +420,7 @@ Status FilterLogSegment(const string& segment_path) { output_wal_dir, "log-dump-tool", tablet_schema, - segment_header.schema_version(), + segment_header.unused_schema_version(), /* table_metric_entity */ nullptr, /* tablet_metric_entity */ nullptr, log_thread_pool.get(), diff --git a/src/yb/consensus/log-test.cc b/src/yb/consensus/log-test.cc index 1144003e6fce..f2fe1e918b9f 100644 --- a/src/yb/consensus/log-test.cc +++ b/src/yb/consensus/log-test.cc @@ -132,7 +132,7 @@ class LogTest : public LogTestBase { header.set_major_version(0); header.set_minor_version(0); header.set_unused_tablet_id(kTestTablet); - SchemaToPB(GetSimpleTestSchema(), header.mutable_schema()); + SchemaToPB(GetSimpleTestSchema(), header.mutable_unused_schema()); LogSegmentFooterPB footer; footer.set_num_entries(10); @@ -717,11 +717,8 @@ TEST_F(LogTest, TestGCWithLogRunning) { { ReplicateMsgs repls; int64_t starting_op_segment_seq_num; - yb::SchemaPB schema; - uint32_t schema_version; Status s = log_->GetLogReader()->ReadReplicatesInRange( - 1, 2, LogReader::kNoSizeLimit, &repls, &starting_op_segment_seq_num, - &schema, &schema_version); + 1, 2, LogReader::kNoSizeLimit, &repls, &starting_op_segment_seq_num); ASSERT_TRUE(s.IsNotFound()) << s.ToString(); } @@ -1115,15 +1112,12 @@ TEST_F(LogTest, TestReadLogWithReplacedReplicates) { auto start_index = RandomUniformInt(gc_index, max_repl_index - 1); auto end_index = RandomUniformInt(start_index, max_repl_index); int64_t starting_op_segment_seq_num; - yb::SchemaPB schema; - uint32_t schema_version; { SCOPED_TRACE(Substitute("Reading $0-$1", start_index, end_index)); consensus::ReplicateMsgs repls; ASSERT_OK(reader->ReadReplicatesInRange(start_index, end_index, LogReader::kNoSizeLimit, &repls, - &starting_op_segment_seq_num, - &schema, &schema_version)); + &starting_op_segment_seq_num)); ASSERT_EQ(end_index - start_index + 1, repls.size()); auto expected_index = start_index; for (const auto& repl : repls) { @@ -1147,8 +1141,7 @@ TEST_F(LogTest, TestReadLogWithReplacedReplicates) { start_index, end_index, size_limit)); ReplicateMsgs repls; ASSERT_OK(reader->ReadReplicatesInRange(start_index, end_index, size_limit, &repls, - &starting_op_segment_seq_num, - &schema, &schema_version)); + &starting_op_segment_seq_num)); ASSERT_LE(repls.size(), end_index - start_index + 1); int total_size = 0; auto expected_index = start_index; @@ -1191,12 +1184,9 @@ TEST_F(LogTest, TestReadReplicatesHighIndex) { auto* reader = log_->GetLogReader(); ReplicateMsgs repls; int64_t starting_op_segment_seq_num; - yb::SchemaPB schema; - uint32_t schema_version; ASSERT_OK(reader->ReadReplicatesInRange(first_log_index, first_log_index + kSequenceLength - 1, LogReader::kNoSizeLimit, &repls, - &starting_op_segment_seq_num, - &schema, &schema_version)); + &starting_op_segment_seq_num)); ASSERT_EQ(kSequenceLength, repls.size()); } diff --git a/src/yb/consensus/log.cc b/src/yb/consensus/log.cc index 866232f74942..47b4f28b79a8 100644 --- a/src/yb/consensus/log.cc +++ b/src/yb/consensus/log.cc @@ -1736,8 +1736,8 @@ Status Log::SwitchToAllocatedSegment() { // Set the new segment's schema. { SharedLock l(schema_lock_); - SchemaToPB(*schema_, header.mutable_schema()); - header.set_schema_version(schema_version_); + SchemaToPB(*schema_, header.mutable_unused_schema()); + header.set_unused_schema_version(schema_version_); } RETURN_NOT_OK(new_segment->WriteHeaderAndOpen(header)); diff --git a/src/yb/consensus/log.proto b/src/yb/consensus/log.proto index bf7617b3c4e2..dfe25b79f895 100644 --- a/src/yb/consensus/log.proto +++ b/src/yb/consensus/log.proto @@ -88,8 +88,8 @@ message LogSegmentHeaderPB { required int64 sequence_number = 6; // Schema used when appending entries to this log, and its version. - required SchemaPB schema = 7; - optional uint32 schema_version = 8; + required SchemaPB unused_schema = 7; + optional uint32 unused_schema_version = 8; } // A header for a log index block that are stored inside WAL segment file. diff --git a/src/yb/consensus/log_cache.cc b/src/yb/consensus/log_cache.cc index 0af528e04c04..8a74cb37a2f3 100644 --- a/src/yb/consensus/log_cache.cc +++ b/src/yb/consensus/log_cache.cc @@ -328,25 +328,6 @@ int64_t TotalByteSizeForMessage(const LWReplicateMsg& msg) { return msg_size; } -Status UpdateResultHeaderSchemaFromSegment( - log::LogReader* log_reader, const int64_t segment_seq_num, ReadOpsResult* result) { - const auto segment_result = log_reader->GetSegmentBySequenceNumber(segment_seq_num); - if (!segment_result.ok()) { - if (segment_result.status().IsNotFound()) { - // Just ignore that. - return Status::OK(); - } - // Return error. - return segment_result.status(); - } - const auto& header = (*segment_result)->header(); - if (header.has_schema()) { - result->header_schema.CopyFrom(header.schema()); - result->header_schema_version = header.schema_version(); - } - return Status::OK(); -} - } // anonymous namespace Result LogCache::ReadOps(int64_t after_op_index, size_t max_size_bytes) { @@ -420,14 +401,9 @@ Result LogCache::ReadOps( RETURN_NOT_OK_PREPEND( log_->GetLogReader()->ReadReplicatesInRange( next_index, up_to, remaining_space, &raw_replicate_ptrs, &starting_op_segment_seq_num, - &result.header_schema, &(result.header_schema_version), deadline), + deadline), Substitute("Failed to read ops $0..$1", next_index, up_to)); - if ((starting_op_segment_seq_num != -1) && !result.header_schema.IsInitialized()) { - RETURN_NOT_OK(UpdateResultHeaderSchemaFromSegment( - log_->GetLogReader(), starting_op_segment_seq_num, &result)); - } - metrics_.disk_reads->IncrementBy(raw_replicate_ptrs.size()); LOG_WITH_PREFIX(INFO) << "Successfully read " << raw_replicate_ptrs.size() << " ops from disk."; @@ -442,10 +418,6 @@ Result LogCache::ReadOps( break; } result.messages.push_back(msg); - if (msg->op_type() == consensus::OperationType::CHANGE_METADATA_OP) { - msg->change_metadata_request().schema().ToGoogleProtobuf(&result.header_schema); - result.header_schema_version = msg->change_metadata_request().schema_version(); - } result.read_from_disk_size += current_message_size; next_index++; } @@ -453,8 +425,6 @@ Result LogCache::ReadOps( const auto seg_num_result = log_->GetLogReader()->LookupOpWalSegmentNumber(next_index); if (seg_num_result.ok()) { starting_op_segment_seq_num = *seg_num_result; - RETURN_NOT_OK(UpdateResultHeaderSchemaFromSegment( - log_->GetLogReader(), starting_op_segment_seq_num, &result)); } else if (!seg_num_result.status().IsNotFound()) { // Unexpected error - to be handled by the caller. return seg_num_result.status(); @@ -478,10 +448,6 @@ Result LogCache::ReadOps( } result.messages.push_back(msg); - if (msg->op_type() == consensus::OperationType::CHANGE_METADATA_OP) { - msg->change_metadata_request().schema().ToGoogleProtobuf(&result.header_schema); - result.header_schema_version = msg->change_metadata_request().schema_version(); - } next_index++; } } diff --git a/src/yb/consensus/log_cache.h b/src/yb/consensus/log_cache.h index e1f98cc25d6f..adebf7e408a7 100644 --- a/src/yb/consensus/log_cache.h +++ b/src/yb/consensus/log_cache.h @@ -78,8 +78,6 @@ class ReplicateMsg; struct ReadOpsResult { ReplicateMsgs messages; OpId preceding_op; - SchemaPB header_schema; - uint32_t header_schema_version; HaveMoreMessages have_more_messages = HaveMoreMessages::kFalse; int64_t read_from_disk_size = 0; }; diff --git a/src/yb/consensus/log_reader.cc b/src/yb/consensus/log_reader.cc index 357d2c3cb199..9d2b346fde76 100644 --- a/src/yb/consensus/log_reader.cc +++ b/src/yb/consensus/log_reader.cc @@ -392,8 +392,6 @@ Status LogReader::ReadReplicatesInRange( int64_t max_bytes_to_read, ReplicateMsgs* replicates, int64_t* starting_op_segment_seq_num, - yb::SchemaPB* modified_schema, - uint32_t* modified_schema_version, CoarseTimePoint deadline) const { DCHECK_GT(starting_at, 0); DCHECK_GE(up_to, starting_at); @@ -460,11 +458,6 @@ Status LogReader::ReadReplicatesInRange( max_bytes_to_read <= 0 || total_size + space_required < max_bytes_to_read) { total_size += space_required; - if (entry.replicate().op_type() == consensus::OperationType::CHANGE_METADATA_OP && - modified_schema != nullptr && modified_schema_version != nullptr) { - entry.replicate().change_metadata_request().schema().ToGoogleProtobuf(modified_schema); - *modified_schema_version = entry.replicate().change_metadata_request().schema_version(); - } replicates_tmp.emplace_back(batch, entry.mutable_replicate()); } else { limit_exceeded = true; diff --git a/src/yb/consensus/log_reader.h b/src/yb/consensus/log_reader.h index d551ca9e1d57..f14d4ae1ec32 100644 --- a/src/yb/consensus/log_reader.h +++ b/src/yb/consensus/log_reader.h @@ -119,9 +119,7 @@ class LogReader { const int64_t up_to, int64_t max_bytes_to_read, consensus::ReplicateMsgs* replicates, - int64_t *starting_op_segment_seq_num, - yb::SchemaPB* modified_schema, - uint32_t *schema_version, + int64_t* starting_op_segment_seq_num, CoarseTimePoint deadline = CoarseTimePoint::max()) const; static const int64_t kNoSizeLimit; diff --git a/src/yb/tools/data-patcher.cc b/src/yb/tools/data-patcher.cc index ae674d5260a2..ad3fb4b02fa5 100644 --- a/src/yb/tools/data-patcher.cc +++ b/src/yb/tools/data-patcher.cc @@ -607,7 +607,7 @@ Status ChangeTimeInWalDir( header.set_minor_version(log::kLogMinorVersion); header.set_sequence_number(1); header.set_unused_tablet_id("TABLET ID"); - header.mutable_schema(); + header.mutable_unused_schema(); RETURN_NOT_OK(new_segment.WriteHeaderAndOpen(header)); From ee6b4670bf4da0a049d72034924b064625a58ae4 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Wed, 25 Jan 2023 19:56:27 +0530 Subject: [PATCH 02/20] WIP: Decoupling bootstrap and primary table fields --- src/yb/consensus/consensus_peers-test.cc | 4 +- src/yb/consensus/consensus_queue-test.cc | 4 +- src/yb/consensus/log-dump.cc | 4 +- src/yb/consensus/log-test-base.h | 4 +- src/yb/consensus/log.cc | 28 ++++--- src/yb/consensus/log.h | 14 ++-- src/yb/consensus/log_cache-test.cc | 4 +- src/yb/consensus/raft_consensus-test.cc | 4 +- .../consensus/raft_consensus_quorum-test.cc | 4 +- src/yb/master/sys_catalog.cc | 2 + src/yb/tablet/metadata.proto | 12 ++- src/yb/tablet/tablet.cc | 77 +++++++++++++------ src/yb/tablet/tablet.h | 10 ++- src/yb/tablet/tablet_bootstrap.cc | 4 +- src/yb/tablet/tablet_metadata.cc | 60 +++++++++++---- src/yb/tablet/tablet_metadata.h | 22 +++++- src/yb/tablet/tablet_peer-test.cc | 2 +- src/yb/tserver/remote_bootstrap_client.cc | 7 +- .../tserver/remote_bootstrap_session-test.cc | 4 +- src/yb/tserver/ts_tablet_manager.cc | 42 +++++++--- src/yb/tserver/ts_tablet_manager.h | 6 +- 21 files changed, 217 insertions(+), 101 deletions(-) diff --git a/src/yb/consensus/consensus_peers-test.cc b/src/yb/consensus/consensus_peers-test.cc index 1707d59156f6..cbed90442c2c 100644 --- a/src/yb/consensus/consensus_peers-test.cc +++ b/src/yb/consensus/consensus_peers-test.cc @@ -98,8 +98,8 @@ class ConsensusPeersTest : public YBTest { kTabletId, fs_manager_->GetFirstTabletWalDirOrDie(kTableId, kTabletId), fs_manager_->uuid(), - schema_, - 0, // schema_version + // schema_, + // 0, // schema_version nullptr, // table_metric_entity nullptr, // tablet_metric_entity log_thread_pool_.get(), diff --git a/src/yb/consensus/consensus_queue-test.cc b/src/yb/consensus/consensus_queue-test.cc index 45ee90f30c19..a5bb36306f68 100644 --- a/src/yb/consensus/consensus_queue-test.cc +++ b/src/yb/consensus/consensus_queue-test.cc @@ -89,8 +89,8 @@ class ConsensusQueueTest : public YBTest { kTestTablet, fs_manager_->GetFirstTabletWalDirOrDie(kTestTable, kTestTablet), fs_manager_->uuid(), - schema_, - 0, // schema_version + // schema_, + // 0, // schema_version nullptr, nullptr, log_thread_pool_.get(), diff --git a/src/yb/consensus/log-dump.cc b/src/yb/consensus/log-dump.cc index 9f0a6673e274..e5dc35cc47e4 100644 --- a/src/yb/consensus/log-dump.cc +++ b/src/yb/consensus/log-dump.cc @@ -419,8 +419,8 @@ Status FilterLogSegment(const string& segment_path) { segment_header.unused_tablet_id(), output_wal_dir, "log-dump-tool", - tablet_schema, - segment_header.unused_schema_version(), + // tablet_schema, + // segment_header.unused_schema_version(), /* table_metric_entity */ nullptr, /* tablet_metric_entity */ nullptr, log_thread_pool.get(), diff --git a/src/yb/consensus/log-test-base.h b/src/yb/consensus/log-test-base.h index a1dbcf30260c..d1c0fffbc483 100644 --- a/src/yb/consensus/log-test-base.h +++ b/src/yb/consensus/log-test-base.h @@ -186,8 +186,8 @@ class LogTestBase : public YBTest { kTestTablet, tablet_wal_path_, fs_manager_->uuid(), - schema_with_ids, - 0, // schema_version + // schema_with_ids, + // 0, // schema_version table_metric_entity_.get(), tablet_metric_entity_.get(), log_thread_pool_.get(), diff --git a/src/yb/consensus/log.cc b/src/yb/consensus/log.cc index 47b4f28b79a8..d14a6fecf991 100644 --- a/src/yb/consensus/log.cc +++ b/src/yb/consensus/log.cc @@ -550,8 +550,8 @@ Status Log::Open(const LogOptions &options, const std::string& tablet_id, const std::string& wal_dir, const std::string& peer_uuid, - const Schema& schema, - uint32_t schema_version, + // const Schema& schema, + // uint32_t schema_version, const scoped_refptr& table_metric_entity, const scoped_refptr& tablet_metric_entity, ThreadPool* append_thread_pool, @@ -571,8 +571,8 @@ Status Log::Open(const LogOptions &options, wal_dir, tablet_id, peer_uuid, - schema, - schema_version, + // schema, + // schema_version, table_metric_entity, tablet_metric_entity, append_thread_pool, @@ -589,8 +589,8 @@ Log::Log( string wal_dir, string tablet_id, string peer_uuid, - const Schema& schema, - uint32_t schema_version, + // const Schema& schema, + // uint32_t schema_version, const scoped_refptr& table_metric_entity, const scoped_refptr& tablet_metric_entity, ThreadPool* append_thread_pool, @@ -601,8 +601,8 @@ Log::Log( wal_dir_(std::move(wal_dir)), tablet_id_(std::move(tablet_id)), peer_uuid_(std::move(peer_uuid)), - schema_(std::make_unique(schema)), - schema_version_(schema_version), + schema_(std::make_unique()), + // schema_version_(schema_version), active_segment_sequence_number_(options.initial_active_segment_sequence_number), log_state_(kLogInitialized), max_segment_size_(options_.segment_size_bytes), @@ -1424,8 +1424,7 @@ uint64_t Log::OnDiskSize() { return ret; } -void Log::SetSchemaForNextLogSegment(const Schema& schema, - uint32_t version) { +void Log::SetSchemaForNextLogSegment(const Schema& schema, uint32_t version) { std::lock_guard l(schema_lock_); *schema_ = schema; schema_version_ = version; @@ -1736,8 +1735,13 @@ Status Log::SwitchToAllocatedSegment() { // Set the new segment's schema. { SharedLock l(schema_lock_); - SchemaToPB(*schema_, header.mutable_unused_schema()); - header.set_unused_schema_version(schema_version_); + if (schema_) { + SchemaToPB(*schema_, header.mutable_unused_schema()); + header.set_unused_schema_version(schema_version_); + } else { + header.mutable_unused_schema(); + // header.set_unused_schema_version(schema_version_); + } } RETURN_NOT_OK(new_segment->WriteHeaderAndOpen(header)); diff --git a/src/yb/consensus/log.h b/src/yb/consensus/log.h index be7ac5dc4abc..75f7f5d8c831 100644 --- a/src/yb/consensus/log.h +++ b/src/yb/consensus/log.h @@ -142,8 +142,8 @@ class Log : public RefCountedThreadSafe { const std::string& tablet_id, const std::string& wal_dir, const std::string& peer_uuid, - const Schema& schema, - uint32_t schema_version, + // const Schema& schema, + // uint32_t schema_version, const scoped_refptr& table_metric_entity, const scoped_refptr& tablet_metric_entity, ThreadPool *append_thread_pool, @@ -255,9 +255,9 @@ class Log : public RefCountedThreadSafe { // Returns 0 if the log is shut down. uint64_t OnDiskSize(); - // Set the schema for the _next_ log segment. - // - // This method is thread-safe. + // // Set the schema for the _next_ log segment. + // // + // // This method is thread-safe. void SetSchemaForNextLogSegment(const Schema& schema, uint32_t version); void set_wal_retention_secs(uint32_t wal_retention_secs); @@ -341,8 +341,8 @@ class Log : public RefCountedThreadSafe { std::string wal_dir, std::string tablet_id, std::string peer_uuid, - const Schema& schema, - uint32_t schema_version, + // const Schema& schema, + // uint32_t schema_version, const scoped_refptr& table_metric_entity, const scoped_refptr& tablet_metric_entity, ThreadPool* append_thread_pool, diff --git a/src/yb/consensus/log_cache-test.cc b/src/yb/consensus/log_cache-test.cc index a397ed3a8bcf..8172ae003138 100644 --- a/src/yb/consensus/log_cache-test.cc +++ b/src/yb/consensus/log_cache-test.cc @@ -110,8 +110,8 @@ class LogCacheTest : public YBTest { kTestTablet, fs_manager_->GetFirstTabletWalDirOrDie(kTestTable, kTestTablet), fs_manager_->uuid(), - schema_, - 0, // schema_version + // schema_, + // 0, // schema_version nullptr, // table_metrics_entity nullptr, // tablet_metrics_entity log_thread_pool_.get(), diff --git a/src/yb/consensus/raft_consensus-test.cc b/src/yb/consensus/raft_consensus-test.cc index 1cf491b0fe99..f84aaff850aa 100644 --- a/src/yb/consensus/raft_consensus-test.cc +++ b/src/yb/consensus/raft_consensus-test.cc @@ -238,8 +238,8 @@ class RaftConsensusTest : public YBTest { kTestTablet, fs_manager_->GetFirstTabletWalDirOrDie(kTestTable, kTestTablet), fs_manager_->uuid(), - schema_, - 0, // schema_version + // schema_, + // 0, // schema_version nullptr, // table_metric_entity nullptr, // tablet_metric_entity log_thread_pool_.get(), diff --git a/src/yb/consensus/raft_consensus_quorum-test.cc b/src/yb/consensus/raft_consensus_quorum-test.cc index acad4e2ae2ed..e80463979c78 100644 --- a/src/yb/consensus/raft_consensus_quorum-test.cc +++ b/src/yb/consensus/raft_consensus_quorum-test.cc @@ -139,8 +139,8 @@ class RaftConsensusQuorumTest : public YBTest { kTestTablet, fs_manager->GetFirstTabletWalDirOrDie(kTestTable, kTestTablet), fs_manager->uuid(), - schema_, - 0, // schema_version + // schema_, + // 0, // schema_version nullptr, // table_metric_entity nullptr, // tablet_metric_entity log_thread_pool_.get(), diff --git a/src/yb/master/sys_catalog.cc b/src/yb/master/sys_catalog.cc index b66e5a106d6d..8545beaeba40 100644 --- a/src/yb/master/sys_catalog.cc +++ b/src/yb/master/sys_catalog.cc @@ -599,6 +599,8 @@ Status SysCatalogTable::OpenTablet(const scoped_refptrInit(data.tablet_init_data); + log->SetSchemaForNextLogSegment(*tablet->schema(), tablet->metadata()->schema_version()); // TODO: Do we have a setSplittable(false) or something from the outside is // handling split in the TS? diff --git a/src/yb/tablet/metadata.proto b/src/yb/tablet/metadata.proto index 2fc58ce7cb49..8de5258f4555 100644 --- a/src/yb/tablet/metadata.proto +++ b/src/yb/tablet/metadata.proto @@ -112,9 +112,9 @@ message KvStoreInfoPB { // compacted. Defaults to 0 (i.e. HybridTime::kMin). optional uint64 last_full_compaction_time = 10; - // Initial version of the primary table. Set only when the tablet metadata is in RocksDB. - // Any subsequent version goes to RocksDB. - optional TableInfoPB initial_primary_table = 11; + // // Initial version of the primary table. Set only when the tablet metadata is in RocksDB. + // // Any subsequent version goes to RocksDB. + // optional TableInfoPB initial_primary_table = 11; // Table metadata schema. Set only when the metadata is in RocksDB. optional SchemaPB metadata_schema = 12; @@ -215,6 +215,12 @@ message RaftGroupReplicaSuperBlockPB { optional OpIdPB cdc_sdk_min_checkpoint_op_id = 33; optional fixed64 cdc_sdk_safe_time = 34; + + optional TableType primary_table_type = 35; + + optional bool transactional = 36 [default = false]; + + optional bool index_table = 37 [default = false]; } message FilePB { diff --git a/src/yb/tablet/tablet.cc b/src/yb/tablet/tablet.cc index 54deffe8efb3..6751f811bc1e 100644 --- a/src/yb/tablet/tablet.cc +++ b/src/yb/tablet/tablet.cc @@ -427,9 +427,35 @@ class Tablet::RegularRocksDbListener : public rocksdb::EventListener { const std::string log_prefix_; }; +void Tablet::Init(const TabletInitData& data) { + key_schema_ = std::make_unique(data.metadata->schema()->CreateKeyProjection()); + CHECK(schema()->has_column_ids()); + LOG_WITH_PREFIX(INFO) << "Schema version for " << metadata_->table_name() << " is " + << metadata_->schema_version(); + + auto table_info = metadata_->primary_table_info(); + table_metrics_entity_->SetAttribute("table_name", table_info->table_name); + table_metrics_entity_->SetAttribute("namespace_name", table_info->namespace_name); + + bool has_index = !table_info->index_map->empty(); + + // Create index table metadata cache for secondary index update. + if (has_index) { + CreateNewYBMetaDataCache(); + } + + // If this is a unique index tablet, set up the index primary key schema. + if (table_info->index_info && table_info->index_info->is_unique()) { + unique_index_key_schema_ = std::make_unique(); + const auto ids = table_info->index_info->index_key_column_ids(); + CHECK_OK(table_info->schema().CreateProjectionByIdsIgnoreMissing( + ids, unique_index_key_schema_.get())); + } +} + Tablet::Tablet(const TabletInitData& data) - : key_schema_(std::make_unique(data.metadata->schema()->CreateKeyProjection())), - metadata_(data.metadata), + // : key_schema_(std::make_unique(data.metadata->schema()->CreateKeyProjection())), + : metadata_(data.metadata), table_type_(data.metadata->table_type()), log_anchor_registry_(data.log_anchor_registry), mem_tracker_(MemTracker::CreateTracker( @@ -453,16 +479,16 @@ Tablet::Tablet(const TabletInitData& data) clock_, data.allowed_history_cutoff_provider, metadata_.get())), full_compaction_pool_(data.full_compaction_pool), ts_post_split_compaction_added_(std::move(data.post_split_compaction_added)) { - CHECK(schema()->has_column_ids()); - LOG_WITH_PREFIX(INFO) << "Schema version for " << metadata_->table_name() << " is " - << metadata_->schema_version(); + // CHECK(schema()->has_column_ids()); + // LOG_WITH_PREFIX(INFO) << "Schema version for " << metadata_->table_name() << " is " + // << metadata_->schema_version(); if (data.metric_registry) { MetricEntity::AttributeMap attrs; // TODO(KUDU-745): table_id is apparently not set in the metadata. attrs["table_id"] = metadata_->table_id(); - attrs["table_name"] = metadata_->table_name(); - attrs["namespace_name"] = metadata_->namespace_name(); + // attrs["table_name"] = metadata_->table_name(); + // attrs["namespace_name"] = metadata_->namespace_name(); table_metrics_entity_ = METRIC_ENTITY_table.Instantiate(data.metric_registry, metadata_->table_id(), attrs); tablet_metrics_entity_ = @@ -480,9 +506,10 @@ Tablet::Tablet(const TabletInitData& data) mem_tracker_->SetMetricEntity(tablet_metrics_entity_); } - auto table_info = metadata_->primary_table_info(); - bool has_index = !table_info->index_map->empty(); - bool transactional = data.metadata->schema()->table_properties().is_transactional(); + // auto table_info = metadata_->primary_table_info(); + // bool has_index = !table_info->index_map->empty(); + // bool transactional = data.metadata->schema()->table_properties().is_transactional(); + bool transactional = data.metadata->is_transactional(); if (transactional) { server::HybridClock::EnableClockSkewControl(); } @@ -499,21 +526,21 @@ Tablet::Tablet(const TabletInitData& data) } } - // Create index table metadata cache for secondary index update. - if (has_index) { - CreateNewYBMetaDataCache(); - } + // // Create index table metadata cache for secondary index update. + // if (has_index) { + // CreateNewYBMetaDataCache(); + // } - // If this is a unique index tablet, set up the index primary key schema. - if (table_info->index_info && table_info->index_info->is_unique()) { - unique_index_key_schema_ = std::make_unique(); - const auto ids = table_info->index_info->index_key_column_ids(); - CHECK_OK(table_info->schema().CreateProjectionByIdsIgnoreMissing( - ids, unique_index_key_schema_.get())); - } + // // If this is a unique index tablet, set up the index primary key schema. + // if (table_info->index_info && table_info->index_info->is_unique()) { + // unique_index_key_schema_ = std::make_unique(); + // const auto ids = table_info->index_info->index_key_column_ids(); + // CHECK_OK(table_info->schema().CreateProjectionByIdsIgnoreMissing( + // ids, unique_index_key_schema_.get())); + // } if (data.transaction_coordinator_context && - table_info->table_type == TableType::TRANSACTION_STATUS_TABLE_TYPE) { + table_type_ == TableType::TRANSACTION_STATUS_TABLE_TYPE) { transaction_coordinator_ = std::make_unique( metadata_->fs_manager()->uuid(), data.transaction_coordinator_context, @@ -558,7 +585,7 @@ Status Tablet::Open() { TRACE_EVENT0("tablet", "Tablet::Open"); std::lock_guard lock(component_lock_); CHECK_EQ(state_, kInitialized) << "already open"; - CHECK(schema()->has_column_ids()); + // CHECK(schema()->has_column_ids()); switch (table_type_) { case TableType::PGSQL_TABLE_TYPE: FALLTHROUGH_INTENDED; @@ -707,7 +734,7 @@ Status Tablet::OpenKeyValueTablet() { static const std::string kIntentsDB = "IntentsDB"s; rocksdb::BlockBasedTableOptions table_options; - if (!metadata()->primary_table_info()->index_info || metadata()->colocated()) { + if (!metadata()->is_index_table() || metadata()->colocated()) { // This tablet is not dedicated to the index table, so it should be effective to use // advanced key-value encoding algorithm optimized for docdb keys structure. table_options.use_delta_encoding = true; @@ -3645,7 +3672,7 @@ Result Tablet::CreateTransactionOperationContext( if (transaction_id.is_initialized()) { txn_id = transaction_id.get_ptr(); - } else if (metadata_->schema()->table_properties().is_transactional() || is_ysql_catalog_table) { + } else if (metadata_->is_transactional() || is_ysql_catalog_table) { // deadbeef-dead-beef-dead-beef00000075 static const TransactionId kArbitraryTxnIdForNonTxnReads( 17275436393656397278ULL, 8430738506459819486ULL); diff --git a/src/yb/tablet/tablet.h b/src/yb/tablet/tablet.h index aea1f15049b4..37cb26c1d534 100644 --- a/src/yb/tablet/tablet.h +++ b/src/yb/tablet/tablet.h @@ -153,6 +153,8 @@ class Tablet : public AbstractTablet, ~Tablet(); + void Init(const TabletInitData& data); + // Open the tablet. // Upon completion, the tablet enters the kBootstrapping state. Status Open(); @@ -815,6 +817,10 @@ class Tablet : public AbstractTablet, // critical failures. Status ApplyAutoFlagsConfig(const AutoFlagsConfigPB& config); + Status UpsertMetadataDocOperation( + const std::vector& table_infos, Operation* operation = nullptr, + AlreadyAppliedToRegularDB already_applied_to_regular_db = AlreadyAppliedToRegularDB::kFalse); + std::string LogPrefix() const; private: @@ -920,10 +926,6 @@ class Tablet : public AbstractTablet, Operation* operation, AlreadyAppliedToRegularDB already_applied_to_regular_db = AlreadyAppliedToRegularDB::kFalse); - Status UpsertMetadataDocOperation( - const std::vector& table_infos, Operation* operation = nullptr, - AlreadyAppliedToRegularDB already_applied_to_regular_db = AlreadyAppliedToRegularDB::kFalse); - std::unique_ptr key_schema_; RaftGroupMetadataPtr metadata_; diff --git a/src/yb/tablet/tablet_bootstrap.cc b/src/yb/tablet/tablet_bootstrap.cc index 59fc3e3c5139..74bd67c9af27 100644 --- a/src/yb/tablet/tablet_bootstrap.cc +++ b/src/yb/tablet/tablet_bootstrap.cc @@ -824,8 +824,8 @@ class TabletBootstrap { tablet_->tablet_id(), metadata.wal_dir(), metadata.fs_manager()->uuid(), - *tablet_->schema(), - metadata.schema_version(), + // *tablet_->schema(), + // metadata.schema_version(), tablet_->GetTableMetricsEntity(), tablet_->GetTabletMetricsEntity(), append_pool_, diff --git a/src/yb/tablet/tablet_metadata.cc b/src/yb/tablet/tablet_metadata.cc index 1693e45c695d..d964460343f2 100644 --- a/src/yb/tablet/tablet_metadata.cc +++ b/src/yb/tablet/tablet_metadata.cc @@ -82,7 +82,7 @@ DEPRECATE_FLAG(bool, enable_tablet_orphaned_block_deletion, "10_2022"); DEFINE_RUNTIME_AUTO_bool( - ts_tableinfo_in_rocksdb, kLocalPersisted, false, false, + ts_tableinfo_in_rocksdb, kLocalPersisted, false, true, "Stores the TableInfoPB in RocksDB for tserver tables"); using std::shared_ptr; @@ -420,12 +420,12 @@ Status KvStoreInfo::LoadFromPB(const std::string& tablet_log_prefix, snapshot_schedules.insert(VERIFY_RESULT(FullyDecodeSnapshotScheduleId(schedule_id))); } RETURN_NOT_OK(LoadTablesFromPB(tablet_log_prefix, pb.tables(), primary_table_id)); - if (pb.has_initial_primary_table()) { - initial_primary_table = VERIFY_RESULT( - TableInfo::LoadFromPB(tablet_log_prefix, primary_table_id, pb.initial_primary_table())); - tables.emplace(primary_table_id, initial_primary_table); - UpdateColocationMap(initial_primary_table); - DCHECK(pb.has_metadata_schema()); + if (pb.has_metadata_schema()) { + // initial_primary_table = VERIFY_RESULT( + // TableInfo::LoadFromPB(tablet_log_prefix, primary_table_id, pb.initial_primary_table())); + // tables.emplace(primary_table_id, initial_primary_table); + // UpdateColocationMap(initial_primary_table); + // DCHECK(pb.has_metadata_schema()); RETURN_NOT_OK(SchemaFromPB(pb.metadata_schema(), &metadata_schema)); } return Status::OK(); @@ -487,7 +487,7 @@ void KvStoreInfo::ToPB(const TableId& primary_table_id, KvStoreInfoPB* pb) const pb->set_last_full_compaction_time(last_full_compaction_time); if (IsTableMetadataInRocksDB()) { - initial_primary_table->ToPB(pb->mutable_initial_primary_table()); + // initial_primary_table->ToPB(pb->mutable_initial_primary_table()); SchemaToPB(metadata_schema, pb->mutable_metadata_schema()); } else { // Putting primary table first, then all other tables. @@ -778,6 +778,9 @@ RaftGroupMetadata::RaftGroupMetadata( raft_group_id_(data.raft_group_id), partition_(std::make_shared(data.partition)), primary_table_id_(data.table_info->table_id), + primary_table_type_(data.table_info->table_type), + is_transactional_(data.table_info->schema().table_properties().is_transactional()), + is_index_table_(data.table_info->index_info), kv_store_(KvStoreId(raft_group_id_), data_dir, data.snapshot_schedules), fs_manager_(data.fs_manager), wal_dir_(wal_dir), @@ -787,16 +790,18 @@ RaftGroupMetadata::RaftGroupMetadata( cdc_sdk_min_checkpoint_op_id_(OpId::Invalid()), cdc_sdk_safe_time_(HybridTime::kInvalid), log_prefix_(consensus::MakeTabletLogPrefix(raft_group_id_, fs_manager_->uuid())) { - CHECK(data.table_info->schema().has_column_ids()); - CHECK_GT(data.table_info->schema().num_key_columns(), 0); - kv_store_.tables.emplace(primary_table_id_, data.table_info); - kv_store_.UpdateColocationMap(data.table_info); - bool is_ts_tablet = data.table_info->table_id != master::kSysCatalogTableId; if (is_ts_tablet && FLAGS_ts_tableinfo_in_rocksdb && data.table_info->table_type == PGSQL_TABLE_TYPE) { - kv_store_.initial_primary_table = data.table_info; + // kv_store_.initial_primary_table = data.table_info; kv_store_.metadata_schema = kv_store_.BuildMetadataSchema(); + } else { + CHECK(data.table_info->schema() + .has_column_ids()); // how to do this if metadata in rocksdb for primary table - + // probably we can check if schema is initialized + CHECK_GT(data.table_info->schema().num_key_columns(), 0); + kv_store_.tables.emplace(primary_table_id_, data.table_info); + kv_store_.UpdateColocationMap(data.table_info); } } @@ -856,6 +861,27 @@ Status RaftGroupMetadata::LoadFromSuperBlock(const RaftGroupReplicaSuperBlockPB& RETURN_NOT_OK(kv_store_.LoadFromPB( log_prefix_, superblock.kv_store(), primary_table_id_, local_superblock)); + if (superblock.has_primary_table_type()) { + primary_table_type_ = superblock.primary_table_type(); + } else { + primary_table_type_ = primary_table_info()->table_type; + } + LOG_WITH_FUNC(INFO) << "primary_table_type_ " << primary_table_type_; + + if (superblock.has_transactional()) { + is_transactional_ = superblock.transactional(); + } else { + is_transactional_ = primary_table_info()->schema().table_properties().is_transactional(); + } + LOG_WITH_FUNC(INFO) << "is_transactional_ " << is_transactional_; + + if (superblock.has_index_table()) { + is_index_table_ = superblock.index_table(); + } else { + is_index_table_ = primary_table_info()->index_info != nullptr; + } + LOG_WITH_FUNC(INFO) << "is_index_table_ " << is_index_table_; + wal_dir_ = superblock.wal_dir(); tablet_data_state_ = superblock.tablet_data_state(); @@ -998,6 +1024,9 @@ void RaftGroupMetadata::ToSuperBlockUnlocked(RaftGroupReplicaSuperBlockPB* super } pb.set_primary_table_id(primary_table_id_); + pb.set_primary_table_type(primary_table_type_); + pb.set_transactional(is_transactional_); + pb.set_index_table(is_index_table_); pb.set_colocated(colocated_); pb.set_cdc_min_replicated_index(cdc_min_replicated_index_); cdc_sdk_min_checkpoint_op_id_.ToPB(pb.mutable_cdc_sdk_min_checkpoint_op_id()); @@ -1607,7 +1636,8 @@ std::string RaftGroupMetadata::table_name( TableType RaftGroupMetadata::table_type(const TableId& table_id) const { DCHECK_NE(state_, kNotLoadedYet); if (table_id.empty()) { - return primary_table_info()->table_type; + std::lock_guard lock(data_mutex_); + return primary_table_type_; } const auto& table_info = CHECK_RESULT(GetTableInfo(table_id)); return table_info->table_type; diff --git a/src/yb/tablet/tablet_metadata.h b/src/yb/tablet/tablet_metadata.h index d3b72e3841be..387d9ba412fa 100644 --- a/src/yb/tablet/tablet_metadata.h +++ b/src/yb/tablet/tablet_metadata.h @@ -205,7 +205,7 @@ struct KvStoreInfo { // Updates colocation map with new table info. void UpdateColocationMap(const TableInfoPtr& table_info); - bool IsTableMetadataInRocksDB() const { return initial_primary_table != nullptr; } + bool IsTableMetadataInRocksDB() const { return metadata_schema.initialized(); } KvStoreId kv_store_id; @@ -225,7 +225,7 @@ struct KvStoreInfo { // See KvStoreInfoPB field with the same name. uint64_t last_full_compaction_time = kNoLastFullCompactionTime; - TableInfoPtr initial_primary_table = nullptr; + // TableInfoPtr initial_primary_table = nullptr; // Map of tables sharing this KV-store indexed by the table id. // If pieces of the same table live in the same Raft group they should be located in different @@ -316,6 +316,18 @@ class RaftGroupMetadata : public RefCountedThreadSafe, return primary_table_id_; } + bool is_transactional() const { + DCHECK_NE(state_, kNotLoadedYet); + std::lock_guard lock(data_mutex_); + return is_transactional_; + } + + bool is_index_table() const { + DCHECK_NE(state_, kNotLoadedYet); + std::lock_guard lock(data_mutex_); + return is_index_table_; + } + // Returns the name, type, schema, index map, schema, etc of the table. std::string namespace_name(const TableId& table_id = "") const; @@ -677,6 +689,12 @@ class RaftGroupMetadata : public RefCountedThreadSafe, // Additional tables can be added to this Raft group to co-locate with this table. TableId primary_table_id_ GUARDED_BY(data_mutex_); + TableType primary_table_type_ GUARDED_BY(data_mutex_); + + bool is_transactional_ GUARDED_BY(data_mutex_) = false; + + bool is_index_table_ GUARDED_BY(data_mutex_) = false; + // KV-store for this Raft group. KvStoreInfo kv_store_; diff --git a/src/yb/tablet/tablet_peer-test.cc b/src/yb/tablet/tablet_peer-test.cc index 92a7f1609f65..bb3ce1985942 100644 --- a/src/yb/tablet/tablet_peer-test.cc +++ b/src/yb/tablet/tablet_peer-test.cc @@ -182,7 +182,7 @@ class TabletPeerTest : public YBTabletTest { scoped_refptr log; ASSERT_OK(Log::Open(LogOptions(), tablet()->tablet_id(), tablet()->metadata()->wal_dir(), tablet()->metadata()->fs_manager()->uuid(), - *tablet()->schema(), tablet()->metadata()->schema_version(), + // *tablet()->schema(), tablet()->metadata()->schema_version(), table_metric_entity_.get(), tablet_metric_entity_.get(), log_thread_pool_.get(), log_thread_pool_.get(), log_thread_pool_.get(), tablet()->metadata()->cdc_min_replicated_index(), &log)); diff --git a/src/yb/tserver/remote_bootstrap_client.cc b/src/yb/tserver/remote_bootstrap_client.cc index 9d4399bf3b47..52d958d10bbd 100644 --- a/src/yb/tserver/remote_bootstrap_client.cc +++ b/src/yb/tserver/remote_bootstrap_client.cc @@ -264,9 +264,10 @@ Status RemoteBootstrapClient::Start(const string& bootstrap_peer_uuid, break; } } - if (!table_ptr && kv_store->has_initial_primary_table()) { - table_ptr = &kv_store->initial_primary_table(); - } + // TODO + // if (!table_ptr && kv_store->has_initial_primary_table()) { + // table_ptr = &kv_store->initial_primary_table(); + // } if (!table_ptr) { return STATUS(InvalidArgument, Format( "Tablet $0: Superblock's KV-store doesn't contain primary table $1", tablet_id_, diff --git a/src/yb/tserver/remote_bootstrap_session-test.cc b/src/yb/tserver/remote_bootstrap_session-test.cc index 737602641819..41929c31989e 100644 --- a/src/yb/tserver/remote_bootstrap_session-test.cc +++ b/src/yb/tserver/remote_bootstrap_session-test.cc @@ -68,8 +68,8 @@ void RemoteBootstrapSessionTest::SetUpTabletPeer() { fs_manager()->GetFirstTabletWalDirOrDie(tablet_ptr->metadata()->table_id(), tablet_id), fs_manager()->uuid(), - *tablet()->schema(), - 0, // schema_version + // *tablet()->schema(), + // 0, // schema_version nullptr, // table_metric_entity nullptr, // tablet_metric_entity log_thread_pool_.get(), diff --git a/src/yb/tserver/ts_tablet_manager.cc b/src/yb/tserver/ts_tablet_manager.cc index b062e138c3bb..79b0a93e43cf 100644 --- a/src/yb/tserver/ts_tablet_manager.cc +++ b/src/yb/tserver/ts_tablet_manager.cc @@ -563,7 +563,7 @@ Status TSTabletManager::Init() { TabletPeerPtr tablet_peer = VERIFY_RESULT(CreateAndRegisterTabletPeer(meta, NEW_PEER)); RETURN_NOT_OK(open_tablet_pool_->SubmitFunc( - std::bind(&TSTabletManager::OpenTablet, this, meta, deleter))); + std::bind(&TSTabletManager::OpenTablet, this, meta, deleter, nullptr))); } // Background task initiation. @@ -774,8 +774,11 @@ Result TSTabletManager::CreateNewTablet( TabletPeerPtr new_peer = VERIFY_RESULT(CreateAndRegisterTabletPeer(meta, NEW_PEER)); // We can run this synchronously since there is nothing to bootstrap. - RETURN_NOT_OK( - open_tablet_pool_->SubmitFunc(std::bind(&TSTabletManager::OpenTablet, this, meta, deleter))); + // RETURN_NOT_OK( + // open_tablet_pool_->SubmitFunc(std::bind(&TSTabletManager::OpenTablet, this, meta, + // deleter))); + + OpenTablet(meta, deleter, table_info); return new_peer; } @@ -891,7 +894,8 @@ void TSTabletManager::CreatePeerAndOpenTablet( } return; } - s = open_tablet_pool_->SubmitFunc(std::bind(&TSTabletManager::OpenTablet, this, meta, deleter)); + s = open_tablet_pool_->SubmitFunc( + std::bind(&TSTabletManager::OpenTablet, this, meta, deleter, nullptr)); if (!s.ok()) { LOG(DFATAL) << Format("Failed to schedule opening tablet $0: $1", meta->table_id(), s); return; @@ -1266,7 +1270,7 @@ Status TSTabletManager::StartRemoteBootstrap(const StartRemoteBootstrapRequestPB // to check what happens when this server receives raft consensus requests since at this point, // this tablet server could be a voter (if the ChangeRole request in Finish succeeded and its // initial role was PRE_VOTER). - OpenTablet(meta, nullptr); + OpenTablet(meta, nullptr, nullptr); // If OpenTablet fails, tablet_peer->error() will be set. RETURN_NOT_OK(ShutdownAndTombstoneTabletPeerNotOk( tablet_peer->error(), tablet_peer, meta, fs_manager_->uuid(), @@ -1464,8 +1468,10 @@ Status TSTabletManager::OpenTabletMeta(const string& tablet_id, return Status::OK(); } -void TSTabletManager::OpenTablet(const RaftGroupMetadataPtr& meta, - const scoped_refptr& deleter) { +void TSTabletManager::OpenTablet( + const RaftGroupMetadataPtr& meta, + const scoped_refptr& deleter, + const tablet::TableInfoPtr& table_info) { string tablet_id = meta->raft_group_id(); TRACE_EVENT1("tserver", "TSTabletManager::OpenTablet", "tablet_id", tablet_id); @@ -1568,12 +1574,28 @@ void TSTabletManager::OpenTablet(const RaftGroupMetadataPtr& meta, tablet_peer->SetFailed(s); return; } + if (table_info && tablet->metadata()->IsTableMetadataInRocksDB()) { + s = tablet->UpsertMetadataDocOperation({table_info}); + if (!s.ok()) { + LOG(ERROR) << kLogPrefix << "Failed to insert metadata in RocksDB: " << s; + tablet_peer->SetFailed(s); + return; + } + s = tablet->Flush(tablet::FlushMode::kSync, tablet::FlushFlags::kRegular); + if (!s.ok()) { + LOG(ERROR) << kLogPrefix << "Failed to flush regulardb: " << s; + tablet_peer->SetFailed(s); + return; + } + } s = tablet->metadata()->LoadTablesFromRocksDB(tablet); if (!s.ok()) { LOG(ERROR) << kLogPrefix << "Failed to load table metadata from RocksDB: " << s; tablet_peer->SetFailed(s); return; } + tablet->Init(data.tablet_init_data); + log->SetSchemaForNextLogSegment(*tablet->schema(), tablet->metadata()->schema_version()); } MonoTime start(MonoTime::Now()); @@ -2078,13 +2100,15 @@ void TSTabletManager::CreateReportedTabletPB(const TabletPeerPtr& tablet_peer, AppStatusPB* error_status = reported_tablet->mutable_error(); StatusToPB(tablet_peer->error(), error_status); } - reported_tablet->set_schema_version(tablet_peer->tablet_metadata()->schema_version()); + // reported_tablet->set_schema_version(tablet_peer->tablet_metadata()->schema_version()); + reported_tablet->set_schema_version(0); auto& id_to_version = *reported_tablet->mutable_table_to_version(); // Attach schema versions of all tables including the colocated ones. for (const auto& table_id : tablet_peer->tablet_metadata()->GetAllColocatedTables()) { if (id_to_version.find(table_id) == id_to_version.end()) { - id_to_version[table_id] = tablet_peer->tablet_metadata()->schema_version(table_id); + // id_to_version[table_id] = tablet_peer->tablet_metadata()->schema_version(table_id); + id_to_version[table_id] = 0; } } diff --git a/src/yb/tserver/ts_tablet_manager.h b/src/yb/tserver/ts_tablet_manager.h index 32be39b5b437..79475f7bb772 100644 --- a/src/yb/tserver/ts_tablet_manager.h +++ b/src/yb/tserver/ts_tablet_manager.h @@ -420,8 +420,10 @@ class TSTabletManager : public tserver::TabletPeerLookupIf, public tablet::Table // method. A TransitionInProgressDeleter must be passed as 'deleter' into // this method in order to remove that transition-in-progress entry when // opening the tablet is complete (in either a success or a failure case). - void OpenTablet(const scoped_refptr& meta, - const scoped_refptr& deleter); + void OpenTablet( + const scoped_refptr& meta, + const scoped_refptr& deleter, + const tablet::TableInfoPtr& table_info); // Open a tablet whose metadata has already been loaded. void BootstrapAndInitTablet(const scoped_refptr& meta, From 35b30eb64a23b246e82c3d78bdddeabe7a190437 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Thu, 26 Jan 2023 19:48:31 +0530 Subject: [PATCH 03/20] Primary table in rocksdb: basic funtionality working --- src/yb/tablet/tablet_metadata.cc | 26 ++++++++++++------ src/yb/tserver/ts_tablet_manager.cc | 41 ++++++++++++++++------------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/src/yb/tablet/tablet_metadata.cc b/src/yb/tablet/tablet_metadata.cc index d964460343f2..b0654fdc986b 100644 --- a/src/yb/tablet/tablet_metadata.cc +++ b/src/yb/tablet/tablet_metadata.cc @@ -367,6 +367,7 @@ Status KvStoreInfo::LoadTablesFromPB( Status KvStoreInfo::LoadTablesFromRocksDB( const std::string& tablet_log_prefix, const TabletPtr& tablet, const TableId& primary_table_id) { + LOG(INFO) << "Inside KvStoreInfo::LoadTablesFromRocksDB"; ColumnId metadata_col_id = VERIFY_RESULT(metadata_schema.ColumnIdByName(kSysCatalogTableColMetadata)); const docdb::DocReadContext doc_read_context(tablet_log_prefix, metadata_schema, 0); @@ -386,7 +387,7 @@ Status KvStoreInfo::LoadTablesFromRocksDB( docdb::DocPgsqlScanSpec spec(metadata_schema, rocksdb::kDefaultQueryId, doc_key); RETURN_NOT_OK(doc_iter->Init(spec)); } - + LOG(INFO) << "Starting while loop"; while (VERIFY_RESULT(iter->HasNext())) { QLTableRow row; RETURN_NOT_OK(iter->NextRow(&row)); @@ -397,9 +398,11 @@ Status KvStoreInfo::LoadTablesFromRocksDB( const string& serialized_table_info = table_info_ql_value->binary_value(); TableInfoPtr table_info = VERIFY_RESULT( TableInfo::LoadFromString(tablet_log_prefix, primary_table_id, serialized_table_info)); + LOG(INFO) << "Loaded table from rocksdb " << table_info->ShortDebugString(); tables.emplace(table_info->table_id, table_info); UpdateColocationMap(table_info); } + LOG(INFO) << "Outside while loop"; return Status::OK(); } @@ -790,19 +793,26 @@ RaftGroupMetadata::RaftGroupMetadata( cdc_sdk_min_checkpoint_op_id_(OpId::Invalid()), cdc_sdk_safe_time_(HybridTime::kInvalid), log_prefix_(consensus::MakeTabletLogPrefix(raft_group_id_, fs_manager_->uuid())) { + CHECK(data.table_info->schema() + .has_column_ids()); // how to do this if metadata in rocksdb for primary table - + // probably we can check if schema is initialized + CHECK_GT(data.table_info->schema().num_key_columns(), 0); + kv_store_.tables.emplace(primary_table_id_, data.table_info); + kv_store_.UpdateColocationMap(data.table_info); bool is_ts_tablet = data.table_info->table_id != master::kSysCatalogTableId; if (is_ts_tablet && FLAGS_ts_tableinfo_in_rocksdb && data.table_info->table_type == PGSQL_TABLE_TYPE) { // kv_store_.initial_primary_table = data.table_info; kv_store_.metadata_schema = kv_store_.BuildMetadataSchema(); - } else { - CHECK(data.table_info->schema() - .has_column_ids()); // how to do this if metadata in rocksdb for primary table - - // probably we can check if schema is initialized - CHECK_GT(data.table_info->schema().num_key_columns(), 0); - kv_store_.tables.emplace(primary_table_id_, data.table_info); - kv_store_.UpdateColocationMap(data.table_info); } + // else { + // CHECK(data.table_info->schema() + // .has_column_ids()); // how to do this if metadata in rocksdb for primary table - + // // probably we can check if schema is initialized + // CHECK_GT(data.table_info->schema().num_key_columns(), 0); + // kv_store_.tables.emplace(primary_table_id_, data.table_info); + // kv_store_.UpdateColocationMap(data.table_info); + // } } RaftGroupMetadata::~RaftGroupMetadata() { diff --git a/src/yb/tserver/ts_tablet_manager.cc b/src/yb/tserver/ts_tablet_manager.cc index 79b0a93e43cf..e084f4c7eaba 100644 --- a/src/yb/tserver/ts_tablet_manager.cc +++ b/src/yb/tserver/ts_tablet_manager.cc @@ -1471,7 +1471,7 @@ Status TSTabletManager::OpenTabletMeta(const string& tablet_id, void TSTabletManager::OpenTablet( const RaftGroupMetadataPtr& meta, const scoped_refptr& deleter, - const tablet::TableInfoPtr& table_info) { + const tablet::TableInfoPtr& new_table_info) { string tablet_id = meta->raft_group_id(); TRACE_EVENT1("tserver", "TSTabletManager::OpenTablet", "tablet_id", tablet_id); @@ -1574,26 +1574,29 @@ void TSTabletManager::OpenTablet( tablet_peer->SetFailed(s); return; } - if (table_info && tablet->metadata()->IsTableMetadataInRocksDB()) { - s = tablet->UpsertMetadataDocOperation({table_info}); - if (!s.ok()) { - LOG(ERROR) << kLogPrefix << "Failed to insert metadata in RocksDB: " << s; - tablet_peer->SetFailed(s); - return; - } - s = tablet->Flush(tablet::FlushMode::kSync, tablet::FlushFlags::kRegular); - if (!s.ok()) { - LOG(ERROR) << kLogPrefix << "Failed to flush regulardb: " << s; - tablet_peer->SetFailed(s); - return; + if (tablet->metadata()->IsTableMetadataInRocksDB()) { + if (new_table_info) { + s = tablet->UpsertMetadataDocOperation({new_table_info}); + if (!s.ok()) { + LOG(ERROR) << kLogPrefix << "Failed to insert metadata in RocksDB: " << s; + tablet_peer->SetFailed(s); + return; + } + s = tablet->Flush(tablet::FlushMode::kSync, tablet::FlushFlags::kRegular); + if (!s.ok()) { + LOG(ERROR) << kLogPrefix << "Failed to flush regular to prefist metadata: " << s; + tablet_peer->SetFailed(s); + return; + } + } else { + s = tablet->metadata()->LoadTablesFromRocksDB(tablet); + if (!s.ok()) { + LOG(ERROR) << kLogPrefix << "Failed to load table metadata from RocksDB: " << s; + tablet_peer->SetFailed(s); + return; + } } } - s = tablet->metadata()->LoadTablesFromRocksDB(tablet); - if (!s.ok()) { - LOG(ERROR) << kLogPrefix << "Failed to load table metadata from RocksDB: " << s; - tablet_peer->SetFailed(s); - return; - } tablet->Init(data.tablet_init_data); log->SetSchemaForNextLogSegment(*tablet->schema(), tablet->metadata()->schema_version()); } From 4f8b8ba2deb620afe243556afa47f5f2271e9b16 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Thu, 26 Jan 2023 19:51:24 +0530 Subject: [PATCH 04/20] Prototype: Primary table metadata in rocksdb Test Plan: NA Subscribers: bogdan Differential Revision: https://phabricator.dev.yugabyte.com/D22537 --- src/yb/tablet/metadata.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/yb/tablet/metadata.proto b/src/yb/tablet/metadata.proto index 8de5258f4555..75059cd59c00 100644 --- a/src/yb/tablet/metadata.proto +++ b/src/yb/tablet/metadata.proto @@ -217,7 +217,7 @@ message RaftGroupReplicaSuperBlockPB { optional fixed64 cdc_sdk_safe_time = 34; optional TableType primary_table_type = 35; - + optional bool transactional = 36 [default = false]; optional bool index_table = 37 [default = false]; From 26f7fb06e54c400640a61950a677e78c9da70578 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Thu, 26 Jan 2023 20:25:26 +0530 Subject: [PATCH 05/20] Reply bug fixed: shouldn't check for table's existence --- src/yb/tablet/tablet_bootstrap.cc | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/yb/tablet/tablet_bootstrap.cc b/src/yb/tablet/tablet_bootstrap.cc index 74bd67c9af27..e7e948c4c5d2 100644 --- a/src/yb/tablet/tablet_bootstrap.cc +++ b/src/yb/tablet/tablet_bootstrap.cc @@ -1487,20 +1487,21 @@ class TabletBootstrap { consensus::LWReplicateMsg* replicate_msg, AlreadyAppliedToRegularDB already_applied_to_regular_db) { auto* request = replicate_msg->mutable_change_metadata_request(); - + LOG_WITH_FUNC(INFO) << "Inside PlayChangeMetadataRequest"; ChangeMetadataOperation operation(tablet_, log_.get(), request); // If table id isn't in metadata, ignore the replay as the table might've been dropped. - auto table_info = meta_->GetTableInfo(operation.table_id().ToBuffer()); - if (!table_info.ok()) { - LOG_WITH_PREFIX(WARNING) << "Table ID " << operation.table_id() - << " not found in metadata, skipping this ChangeMetadataRequest"; - return Status::OK(); - } + // auto table_info = meta_->GetTableInfo(operation.table_id().ToBuffer()); + // if (!table_info.ok()) { + // LOG_WITH_PREFIX(WARNING) << "Table ID " << operation.table_id() + // << " not found in metadata, skipping this ChangeMetadataRequest"; + // return Status::OK(); + // } RETURN_NOT_OK(operation.Prepare(IsLeaderSide::kTrue)); if (tablet_->metadata()->IsTableMetadataInRocksDB()) { + LOG_WITH_FUNC(INFO) << "Playing ChangeMetadataRequest"; operation.set_op_id(OpId::FromPB(replicate_msg->id())); HybridTime hybrid_time(replicate_msg->hybrid_time()); operation.set_hybrid_time(hybrid_time); From 341ccf979622f14135be3dc44d0b2c6b6f7fa47b Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Fri, 27 Jan 2023 16:07:59 +0530 Subject: [PATCH 06/20] Moves metadata load instead tablet bootstrap --- src/yb/master/sys_catalog.cc | 2 +- src/yb/tablet/tablet.cc | 4 ++-- src/yb/tablet/tablet.h | 2 +- src/yb/tablet/tablet_bootstrap.cc | 8 +++++--- src/yb/tablet/tablet_metadata.cc | 14 +++++++++----- src/yb/tserver/ts_tablet_manager.cc | 16 ++++++++++------ 6 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/yb/master/sys_catalog.cc b/src/yb/master/sys_catalog.cc index 8545beaeba40..e1e7112b7bcc 100644 --- a/src/yb/master/sys_catalog.cc +++ b/src/yb/master/sys_catalog.cc @@ -599,7 +599,7 @@ Status SysCatalogTable::OpenTablet(const scoped_refptrInit(data.tablet_init_data); + // tablet->Init(); log->SetSchemaForNextLogSegment(*tablet->schema(), tablet->metadata()->schema_version()); // TODO: Do we have a setSplittable(false) or something from the outside is diff --git a/src/yb/tablet/tablet.cc b/src/yb/tablet/tablet.cc index 769bd238c4f5..0f0b3ddead87 100644 --- a/src/yb/tablet/tablet.cc +++ b/src/yb/tablet/tablet.cc @@ -436,8 +436,8 @@ class Tablet::RegularRocksDbListener : public rocksdb::EventListener { const std::string log_prefix_; }; -void Tablet::Init(const TabletInitData& data) { - key_schema_ = std::make_unique(data.metadata->schema()->CreateKeyProjection()); +void Tablet::Init() { + key_schema_ = std::make_unique(metadata_->schema()->CreateKeyProjection()); CHECK(schema()->has_column_ids()); LOG_WITH_PREFIX(INFO) << "Schema version for " << metadata_->table_name() << " is " << metadata_->schema_version(); diff --git a/src/yb/tablet/tablet.h b/src/yb/tablet/tablet.h index d621740ddc77..54e1b0e40c70 100644 --- a/src/yb/tablet/tablet.h +++ b/src/yb/tablet/tablet.h @@ -153,7 +153,7 @@ class Tablet : public AbstractTablet, ~Tablet(); - void Init(const TabletInitData& data); + void Init(); // Open the tablet. // Upon completion, the tablet enters the kBootstrapping state. diff --git a/src/yb/tablet/tablet_bootstrap.cc b/src/yb/tablet/tablet_bootstrap.cc index e7e948c4c5d2..d182241c5746 100644 --- a/src/yb/tablet/tablet_bootstrap.cc +++ b/src/yb/tablet/tablet_bootstrap.cc @@ -521,6 +521,7 @@ class TabletBootstrap { TabletPtr* rebuilt_tablet, scoped_refptr* rebuilt_log, consensus::ConsensusBootstrapInfo* consensus_info) { + LOG_WITH_FUNC(INFO) << "Starting bootstrap function"; const string tablet_id = meta_->raft_group_id(); // Replay requires a valid Consensus metadata file to exist in order to compare the committed @@ -588,6 +589,7 @@ class TabletBootstrap { tablet_id)); } + RETURN_NOT_OK(tablet_->metadata()->LoadTablesFromRocksDB(tablet_)); RETURN_NOT_OK_PREPEND(PlaySegments(consensus_info), "Failed log replay. Reason"); if (cmeta_->current_term() < consensus_info->last_id.term()) { @@ -613,9 +615,9 @@ class TabletBootstrap { RETURN_NOT_OK(tablet_->ModifyFlushedFrontier( new_consensus_frontier, rocksdb::FrontierModificationMode::kForce)); } - + tablet_->Init(); RETURN_NOT_OK(FinishBootstrap("Bootstrap complete.", rebuilt_log, rebuilt_tablet)); - + LOG_WITH_FUNC(INFO) << "Ending bootstrap function"; return Status::OK(); } @@ -1501,7 +1503,7 @@ class TabletBootstrap { RETURN_NOT_OK(operation.Prepare(IsLeaderSide::kTrue)); if (tablet_->metadata()->IsTableMetadataInRocksDB()) { - LOG_WITH_FUNC(INFO) << "Playing ChangeMetadataRequest"; + LOG_WITH_FUNC(INFO) << "Playing ChangeMetadataRequest "; operation.set_op_id(OpId::FromPB(replicate_msg->id())); HybridTime hybrid_time(replicate_msg->hybrid_time()); operation.set_hybrid_time(hybrid_time); diff --git a/src/yb/tablet/tablet_metadata.cc b/src/yb/tablet/tablet_metadata.cc index b0654fdc986b..e6f56bb6a62e 100644 --- a/src/yb/tablet/tablet_metadata.cc +++ b/src/yb/tablet/tablet_metadata.cc @@ -79,6 +79,8 @@ #include "yb/util/status_log.h" #include "yb/util/trace.h" +#include "yb/util/logging.h" + DEPRECATE_FLAG(bool, enable_tablet_orphaned_block_deletion, "10_2022"); DEFINE_RUNTIME_AUTO_bool( @@ -367,12 +369,13 @@ Status KvStoreInfo::LoadTablesFromPB( Status KvStoreInfo::LoadTablesFromRocksDB( const std::string& tablet_log_prefix, const TabletPtr& tablet, const TableId& primary_table_id) { - LOG(INFO) << "Inside KvStoreInfo::LoadTablesFromRocksDB"; + LOG_WITH_FUNC(INFO) << tablet->tablet_id() << " Starting"; ColumnId metadata_col_id = VERIFY_RESULT(metadata_schema.ColumnIdByName(kSysCatalogTableColMetadata)); const docdb::DocReadContext doc_read_context(tablet_log_prefix, metadata_schema, 0); auto iter = VERIFY_RESULT(tablet->NewRowIterator( - metadata_schema.CopyWithoutColumnIds(), metadata_schema, doc_read_context)); + metadata_schema.CopyWithoutColumnIds(), metadata_schema, doc_read_context, + ReadHybridTime::Max(), CoarseTimePoint::max(), AllowBootstrappingState::kTrue)); { auto doc_iter = down_cast(iter.get()); @@ -387,7 +390,7 @@ Status KvStoreInfo::LoadTablesFromRocksDB( docdb::DocPgsqlScanSpec spec(metadata_schema, rocksdb::kDefaultQueryId, doc_key); RETURN_NOT_OK(doc_iter->Init(spec)); } - LOG(INFO) << "Starting while loop"; + LOG_WITH_FUNC(INFO) << tablet->tablet_id() << " Starting while loop"; while (VERIFY_RESULT(iter->HasNext())) { QLTableRow row; RETURN_NOT_OK(iter->NextRow(&row)); @@ -398,11 +401,12 @@ Status KvStoreInfo::LoadTablesFromRocksDB( const string& serialized_table_info = table_info_ql_value->binary_value(); TableInfoPtr table_info = VERIFY_RESULT( TableInfo::LoadFromString(tablet_log_prefix, primary_table_id, serialized_table_info)); - LOG(INFO) << "Loaded table from rocksdb " << table_info->ShortDebugString(); + LOG_WITH_FUNC(INFO) << tablet->tablet_id() << " Loaded table from rocksdb " + << table_info->ShortDebugString(); tables.emplace(table_info->table_id, table_info); UpdateColocationMap(table_info); } - LOG(INFO) << "Outside while loop"; + LOG_WITH_FUNC(INFO) << tablet->tablet_id() << " Outside while loop"; return Status::OK(); } diff --git a/src/yb/tserver/ts_tablet_manager.cc b/src/yb/tserver/ts_tablet_manager.cc index e084f4c7eaba..7f3e85d37fb9 100644 --- a/src/yb/tserver/ts_tablet_manager.cc +++ b/src/yb/tserver/ts_tablet_manager.cc @@ -113,6 +113,8 @@ #include "yb/util/stopwatch.h" #include "yb/util/trace.h" +#include "yb/util/logging.h" + using namespace std::literals; using namespace std::placeholders; @@ -1597,8 +1599,9 @@ void TSTabletManager::OpenTablet( } } } - tablet->Init(data.tablet_init_data); - log->SetSchemaForNextLogSegment(*tablet->schema(), tablet->metadata()->schema_version()); + // tablet->Init(); + // TODO: Undo the changes to log class + // log->SetSchemaForNextLogSegment(*tablet->schema(), tablet->metadata()->schema_version()); } MonoTime start(MonoTime::Now()); @@ -2103,15 +2106,15 @@ void TSTabletManager::CreateReportedTabletPB(const TabletPeerPtr& tablet_peer, AppStatusPB* error_status = reported_tablet->mutable_error(); StatusToPB(tablet_peer->error(), error_status); } - // reported_tablet->set_schema_version(tablet_peer->tablet_metadata()->schema_version()); - reported_tablet->set_schema_version(0); + reported_tablet->set_schema_version(tablet_peer->tablet_metadata()->schema_version()); + // reported_tablet->set_schema_version(0); auto& id_to_version = *reported_tablet->mutable_table_to_version(); // Attach schema versions of all tables including the colocated ones. for (const auto& table_id : tablet_peer->tablet_metadata()->GetAllColocatedTables()) { if (id_to_version.find(table_id) == id_to_version.end()) { - // id_to_version[table_id] = tablet_peer->tablet_metadata()->schema_version(table_id); - id_to_version[table_id] = 0; + id_to_version[table_id] = tablet_peer->tablet_metadata()->schema_version(table_id); + // id_to_version[table_id] = 0; } } @@ -2132,6 +2135,7 @@ void TSTabletManager::CreateReportedTabletPB(const TabletPeerPtr& tablet_peer, // Set the hide status of the tablet. reported_tablet->set_is_hidden(tablet_peer->tablet_metadata()->hidden()); + // LOG_WITH_FUNC(INFO) << "reported_tablet " << reported_tablet->ShortDebugString(); } void TSTabletManager::GenerateTabletReport(TabletReportPB* report, bool include_bootstrap) { From be751a073262935b6125b4c2746066913801c976 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Fri, 27 Jan 2023 16:19:03 +0530 Subject: [PATCH 07/20] Minor fixes --- src/yb/tablet/tablet_metadata.cc | 2 -- src/yb/tserver/ts_tablet_manager.cc | 36 ++++++++++------------------- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/src/yb/tablet/tablet_metadata.cc b/src/yb/tablet/tablet_metadata.cc index e6f56bb6a62e..cc2f122b51d8 100644 --- a/src/yb/tablet/tablet_metadata.cc +++ b/src/yb/tablet/tablet_metadata.cc @@ -79,8 +79,6 @@ #include "yb/util/status_log.h" #include "yb/util/trace.h" -#include "yb/util/logging.h" - DEPRECATE_FLAG(bool, enable_tablet_orphaned_block_deletion, "10_2022"); DEFINE_RUNTIME_AUTO_bool( diff --git a/src/yb/tserver/ts_tablet_manager.cc b/src/yb/tserver/ts_tablet_manager.cc index 7f3e85d37fb9..5cec113532af 100644 --- a/src/yb/tserver/ts_tablet_manager.cc +++ b/src/yb/tserver/ts_tablet_manager.cc @@ -113,8 +113,6 @@ #include "yb/util/stopwatch.h" #include "yb/util/trace.h" -#include "yb/util/logging.h" - using namespace std::literals; using namespace std::placeholders; @@ -1576,30 +1574,20 @@ void TSTabletManager::OpenTablet( tablet_peer->SetFailed(s); return; } - if (tablet->metadata()->IsTableMetadataInRocksDB()) { - if (new_table_info) { - s = tablet->UpsertMetadataDocOperation({new_table_info}); - if (!s.ok()) { - LOG(ERROR) << kLogPrefix << "Failed to insert metadata in RocksDB: " << s; - tablet_peer->SetFailed(s); - return; - } - s = tablet->Flush(tablet::FlushMode::kSync, tablet::FlushFlags::kRegular); - if (!s.ok()) { - LOG(ERROR) << kLogPrefix << "Failed to flush regular to prefist metadata: " << s; - tablet_peer->SetFailed(s); - return; - } - } else { - s = tablet->metadata()->LoadTablesFromRocksDB(tablet); - if (!s.ok()) { - LOG(ERROR) << kLogPrefix << "Failed to load table metadata from RocksDB: " << s; - tablet_peer->SetFailed(s); - return; - } + if (tablet->metadata()->IsTableMetadataInRocksDB() && new_table_info) { + s = tablet->UpsertMetadataDocOperation({new_table_info}); + if (!s.ok()) { + LOG(ERROR) << kLogPrefix << "Failed to insert metadata in RocksDB: " << s; + tablet_peer->SetFailed(s); + return; + } + s = tablet->Flush(tablet::FlushMode::kSync, tablet::FlushFlags::kRegular); + if (!s.ok()) { + LOG(ERROR) << kLogPrefix << "Failed to flush regular to prefist metadata: " << s; + tablet_peer->SetFailed(s); + return; } } - // tablet->Init(); // TODO: Undo the changes to log class // log->SetSchemaForNextLogSegment(*tablet->schema(), tablet->metadata()->schema_version()); } From 98113d4ea47bbbc81e1d3868103d723f25ba4a13 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Fri, 27 Jan 2023 16:35:51 +0530 Subject: [PATCH 08/20] Undoes log class changes --- src/yb/consensus/consensus_peers-test.cc | 4 +-- src/yb/consensus/consensus_queue-test.cc | 4 +-- src/yb/consensus/log-dump.cc | 4 +-- src/yb/consensus/log-test-base.h | 4 +-- src/yb/consensus/log.cc | 25 ++++++++----------- src/yb/consensus/log.h | 8 +++--- src/yb/consensus/log_cache-test.cc | 4 +-- src/yb/consensus/raft_consensus-test.cc | 4 +-- .../consensus/raft_consensus_quorum-test.cc | 4 +-- src/yb/master/sys_catalog.cc | 2 -- src/yb/tablet/tablet_bootstrap.cc | 4 +-- src/yb/tablet/tablet_peer-test.cc | 2 +- .../tserver/remote_bootstrap_session-test.cc | 4 +-- src/yb/tserver/ts_tablet_manager.cc | 2 -- 14 files changed, 33 insertions(+), 42 deletions(-) diff --git a/src/yb/consensus/consensus_peers-test.cc b/src/yb/consensus/consensus_peers-test.cc index cbed90442c2c..1707d59156f6 100644 --- a/src/yb/consensus/consensus_peers-test.cc +++ b/src/yb/consensus/consensus_peers-test.cc @@ -98,8 +98,8 @@ class ConsensusPeersTest : public YBTest { kTabletId, fs_manager_->GetFirstTabletWalDirOrDie(kTableId, kTabletId), fs_manager_->uuid(), - // schema_, - // 0, // schema_version + schema_, + 0, // schema_version nullptr, // table_metric_entity nullptr, // tablet_metric_entity log_thread_pool_.get(), diff --git a/src/yb/consensus/consensus_queue-test.cc b/src/yb/consensus/consensus_queue-test.cc index a5bb36306f68..45ee90f30c19 100644 --- a/src/yb/consensus/consensus_queue-test.cc +++ b/src/yb/consensus/consensus_queue-test.cc @@ -89,8 +89,8 @@ class ConsensusQueueTest : public YBTest { kTestTablet, fs_manager_->GetFirstTabletWalDirOrDie(kTestTable, kTestTablet), fs_manager_->uuid(), - // schema_, - // 0, // schema_version + schema_, + 0, // schema_version nullptr, nullptr, log_thread_pool_.get(), diff --git a/src/yb/consensus/log-dump.cc b/src/yb/consensus/log-dump.cc index 344b3c647bb6..c4f4da67053f 100644 --- a/src/yb/consensus/log-dump.cc +++ b/src/yb/consensus/log-dump.cc @@ -413,8 +413,8 @@ Status FilterLogSegment(const string& segment_path) { segment_header.unused_tablet_id(), output_wal_dir, "log-dump-tool", - // tablet_schema, - // segment_header.deprecated_schema_version(), + tablet_schema, + segment_header.deprecated_schema_version(), /* table_metric_entity */ nullptr, /* tablet_metric_entity */ nullptr, log_thread_pool.get(), diff --git a/src/yb/consensus/log-test-base.h b/src/yb/consensus/log-test-base.h index d1c0fffbc483..a1dbcf30260c 100644 --- a/src/yb/consensus/log-test-base.h +++ b/src/yb/consensus/log-test-base.h @@ -186,8 +186,8 @@ class LogTestBase : public YBTest { kTestTablet, tablet_wal_path_, fs_manager_->uuid(), - // schema_with_ids, - // 0, // schema_version + schema_with_ids, + 0, // schema_version table_metric_entity_.get(), tablet_metric_entity_.get(), log_thread_pool_.get(), diff --git a/src/yb/consensus/log.cc b/src/yb/consensus/log.cc index 4c8ecff5ae2d..ffb2659b3043 100644 --- a/src/yb/consensus/log.cc +++ b/src/yb/consensus/log.cc @@ -550,8 +550,8 @@ Status Log::Open(const LogOptions &options, const std::string& tablet_id, const std::string& wal_dir, const std::string& peer_uuid, - // const Schema& schema, - // uint32_t schema_version, + const Schema& schema, + uint32_t schema_version, const scoped_refptr& table_metric_entity, const scoped_refptr& tablet_metric_entity, ThreadPool* append_thread_pool, @@ -571,8 +571,8 @@ Status Log::Open(const LogOptions &options, wal_dir, tablet_id, peer_uuid, - // schema, - // schema_version, + schema, + schema_version, table_metric_entity, tablet_metric_entity, append_thread_pool, @@ -589,8 +589,8 @@ Log::Log( string wal_dir, string tablet_id, string peer_uuid, - // const Schema& schema, - // uint32_t schema_version, + const Schema& schema, + uint32_t schema_version, const scoped_refptr& table_metric_entity, const scoped_refptr& tablet_metric_entity, ThreadPool* append_thread_pool, @@ -601,8 +601,8 @@ Log::Log( wal_dir_(std::move(wal_dir)), tablet_id_(std::move(tablet_id)), peer_uuid_(std::move(peer_uuid)), - schema_(std::make_unique()), - // schema_version_(schema_version), + schema_(std::make_unique(schema)), + schema_version_(schema_version), active_segment_sequence_number_(options.initial_active_segment_sequence_number), log_state_(kLogInitialized), max_segment_size_(options_.segment_size_bytes), @@ -1735,13 +1735,8 @@ Status Log::SwitchToAllocatedSegment() { // Set the new segment's schema. { SharedLock l(schema_lock_); - if (schema_) { - SchemaToPB(*schema_, header.mutable_deprecated_schema()); - header.set_deprecated_schema_version(schema_version_); - } else { - header.mutable_deprecated_schema(); - // header.set_unused_schema_version(schema_version_); - } + SchemaToPB(*schema_, header.mutable_deprecated_schema()); + header.set_deprecated_schema_version(schema_version_); } RETURN_NOT_OK(new_segment->WriteHeaderAndOpen(header)); diff --git a/src/yb/consensus/log.h b/src/yb/consensus/log.h index 75f7f5d8c831..dfdbabcde1a7 100644 --- a/src/yb/consensus/log.h +++ b/src/yb/consensus/log.h @@ -142,8 +142,8 @@ class Log : public RefCountedThreadSafe { const std::string& tablet_id, const std::string& wal_dir, const std::string& peer_uuid, - // const Schema& schema, - // uint32_t schema_version, + const Schema& schema, + uint32_t schema_version, const scoped_refptr& table_metric_entity, const scoped_refptr& tablet_metric_entity, ThreadPool *append_thread_pool, @@ -341,8 +341,8 @@ class Log : public RefCountedThreadSafe { std::string wal_dir, std::string tablet_id, std::string peer_uuid, - // const Schema& schema, - // uint32_t schema_version, + const Schema& schema, + uint32_t schema_version, const scoped_refptr& table_metric_entity, const scoped_refptr& tablet_metric_entity, ThreadPool* append_thread_pool, diff --git a/src/yb/consensus/log_cache-test.cc b/src/yb/consensus/log_cache-test.cc index 8172ae003138..a397ed3a8bcf 100644 --- a/src/yb/consensus/log_cache-test.cc +++ b/src/yb/consensus/log_cache-test.cc @@ -110,8 +110,8 @@ class LogCacheTest : public YBTest { kTestTablet, fs_manager_->GetFirstTabletWalDirOrDie(kTestTable, kTestTablet), fs_manager_->uuid(), - // schema_, - // 0, // schema_version + schema_, + 0, // schema_version nullptr, // table_metrics_entity nullptr, // tablet_metrics_entity log_thread_pool_.get(), diff --git a/src/yb/consensus/raft_consensus-test.cc b/src/yb/consensus/raft_consensus-test.cc index f84aaff850aa..1cf491b0fe99 100644 --- a/src/yb/consensus/raft_consensus-test.cc +++ b/src/yb/consensus/raft_consensus-test.cc @@ -238,8 +238,8 @@ class RaftConsensusTest : public YBTest { kTestTablet, fs_manager_->GetFirstTabletWalDirOrDie(kTestTable, kTestTablet), fs_manager_->uuid(), - // schema_, - // 0, // schema_version + schema_, + 0, // schema_version nullptr, // table_metric_entity nullptr, // tablet_metric_entity log_thread_pool_.get(), diff --git a/src/yb/consensus/raft_consensus_quorum-test.cc b/src/yb/consensus/raft_consensus_quorum-test.cc index e80463979c78..acad4e2ae2ed 100644 --- a/src/yb/consensus/raft_consensus_quorum-test.cc +++ b/src/yb/consensus/raft_consensus_quorum-test.cc @@ -139,8 +139,8 @@ class RaftConsensusQuorumTest : public YBTest { kTestTablet, fs_manager->GetFirstTabletWalDirOrDie(kTestTable, kTestTablet), fs_manager->uuid(), - // schema_, - // 0, // schema_version + schema_, + 0, // schema_version nullptr, // table_metric_entity nullptr, // tablet_metric_entity log_thread_pool_.get(), diff --git a/src/yb/master/sys_catalog.cc b/src/yb/master/sys_catalog.cc index e1e7112b7bcc..b66e5a106d6d 100644 --- a/src/yb/master/sys_catalog.cc +++ b/src/yb/master/sys_catalog.cc @@ -599,8 +599,6 @@ Status SysCatalogTable::OpenTablet(const scoped_refptrInit(); - log->SetSchemaForNextLogSegment(*tablet->schema(), tablet->metadata()->schema_version()); // TODO: Do we have a setSplittable(false) or something from the outside is // handling split in the TS? diff --git a/src/yb/tablet/tablet_bootstrap.cc b/src/yb/tablet/tablet_bootstrap.cc index d182241c5746..43ef760337e3 100644 --- a/src/yb/tablet/tablet_bootstrap.cc +++ b/src/yb/tablet/tablet_bootstrap.cc @@ -826,8 +826,8 @@ class TabletBootstrap { tablet_->tablet_id(), metadata.wal_dir(), metadata.fs_manager()->uuid(), - // *tablet_->schema(), - // metadata.schema_version(), + *tablet_->schema(), + metadata.schema_version(), tablet_->GetTableMetricsEntity(), tablet_->GetTabletMetricsEntity(), append_pool_, diff --git a/src/yb/tablet/tablet_peer-test.cc b/src/yb/tablet/tablet_peer-test.cc index bb3ce1985942..92a7f1609f65 100644 --- a/src/yb/tablet/tablet_peer-test.cc +++ b/src/yb/tablet/tablet_peer-test.cc @@ -182,7 +182,7 @@ class TabletPeerTest : public YBTabletTest { scoped_refptr log; ASSERT_OK(Log::Open(LogOptions(), tablet()->tablet_id(), tablet()->metadata()->wal_dir(), tablet()->metadata()->fs_manager()->uuid(), - // *tablet()->schema(), tablet()->metadata()->schema_version(), + *tablet()->schema(), tablet()->metadata()->schema_version(), table_metric_entity_.get(), tablet_metric_entity_.get(), log_thread_pool_.get(), log_thread_pool_.get(), log_thread_pool_.get(), tablet()->metadata()->cdc_min_replicated_index(), &log)); diff --git a/src/yb/tserver/remote_bootstrap_session-test.cc b/src/yb/tserver/remote_bootstrap_session-test.cc index 41929c31989e..737602641819 100644 --- a/src/yb/tserver/remote_bootstrap_session-test.cc +++ b/src/yb/tserver/remote_bootstrap_session-test.cc @@ -68,8 +68,8 @@ void RemoteBootstrapSessionTest::SetUpTabletPeer() { fs_manager()->GetFirstTabletWalDirOrDie(tablet_ptr->metadata()->table_id(), tablet_id), fs_manager()->uuid(), - // *tablet()->schema(), - // 0, // schema_version + *tablet()->schema(), + 0, // schema_version nullptr, // table_metric_entity nullptr, // tablet_metric_entity log_thread_pool_.get(), diff --git a/src/yb/tserver/ts_tablet_manager.cc b/src/yb/tserver/ts_tablet_manager.cc index 5cec113532af..d8e01c5794bb 100644 --- a/src/yb/tserver/ts_tablet_manager.cc +++ b/src/yb/tserver/ts_tablet_manager.cc @@ -1588,8 +1588,6 @@ void TSTabletManager::OpenTablet( return; } } - // TODO: Undo the changes to log class - // log->SetSchemaForNextLogSegment(*tablet->schema(), tablet->metadata()->schema_version()); } MonoTime start(MonoTime::Now()); From c52597c18adb00c1091286875e9e0bf9031e78b9 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Fri, 27 Jan 2023 16:38:26 +0530 Subject: [PATCH 09/20] Undoes unnecessary changes --- src/yb/consensus/log.cc | 3 ++- src/yb/consensus/log.h | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/yb/consensus/log.cc b/src/yb/consensus/log.cc index ffb2659b3043..ec18ceccfe57 100644 --- a/src/yb/consensus/log.cc +++ b/src/yb/consensus/log.cc @@ -1424,7 +1424,8 @@ uint64_t Log::OnDiskSize() { return ret; } -void Log::SetSchemaForNextLogSegment(const Schema& schema, uint32_t version) { +void Log::SetSchemaForNextLogSegment(const Schema& schema, + uint32_t version) { std::lock_guard l(schema_lock_); *schema_ = schema; schema_version_ = version; diff --git a/src/yb/consensus/log.h b/src/yb/consensus/log.h index dfdbabcde1a7..be7ac5dc4abc 100644 --- a/src/yb/consensus/log.h +++ b/src/yb/consensus/log.h @@ -255,9 +255,9 @@ class Log : public RefCountedThreadSafe { // Returns 0 if the log is shut down. uint64_t OnDiskSize(); - // // Set the schema for the _next_ log segment. - // // - // // This method is thread-safe. + // Set the schema for the _next_ log segment. + // + // This method is thread-safe. void SetSchemaForNextLogSegment(const Schema& schema, uint32_t version); void set_wal_retention_secs(uint32_t wal_retention_secs); From 205d1c1b6b19684bf70f02fdc861c56c18247619 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Fri, 27 Jan 2023 23:23:57 +0530 Subject: [PATCH 10/20] Handles remote bootstrap --- src/yb/master/sys_catalog.cc | 11 ++- src/yb/tablet/metadata.proto | 2 +- src/yb/tablet/tablet-harness.cc | 11 ++- src/yb/tablet/tablet_bootstrap-test.cc | 11 ++- src/yb/tablet/tablet_metadata.cc | 76 ++++++++++++++------- src/yb/tablet/tablet_metadata.h | 17 +++++ src/yb/tserver/remote_bootstrap_client.cc | 82 +++++++++++++---------- src/yb/tserver/ts_tablet_manager.cc | 14 ++-- 8 files changed, 138 insertions(+), 86 deletions(-) diff --git a/src/yb/master/sys_catalog.cc b/src/yb/master/sys_catalog.cc index b66e5a106d6d..f225501045d9 100644 --- a/src/yb/master/sys_catalog.cc +++ b/src/yb/master/sys_catalog.cc @@ -343,12 +343,11 @@ Status SysCatalogTable::CreateNew(FsManager *fs_manager) { string data_root_dir = fs_manager->GetDataRootDirs()[0]; fs_manager->SetTabletPathByDataPath(kSysCatalogTabletId, data_root_dir); auto metadata = VERIFY_RESULT(tablet::RaftGroupMetadata::CreateNew(tablet::RaftGroupMetadataData { - .fs_manager = fs_manager, - .table_info = table_info, - .raft_group_id = kSysCatalogTabletId, - .partition = partitions[0], - .tablet_data_state = tablet::TABLET_DATA_READY, - .snapshot_schedules = {}, + fs_manager, + table_info, + kSysCatalogTabletId, + partitions[0], + tablet::TABLET_DATA_READY }, data_root_dir)); RaftConfigPB config; diff --git a/src/yb/tablet/metadata.proto b/src/yb/tablet/metadata.proto index 75059cd59c00..f6a598f41339 100644 --- a/src/yb/tablet/metadata.proto +++ b/src/yb/tablet/metadata.proto @@ -117,7 +117,7 @@ message KvStoreInfoPB { // optional TableInfoPB initial_primary_table = 11; // Table metadata schema. Set only when the metadata is in RocksDB. - optional SchemaPB metadata_schema = 12; + optional SchemaPB metadata_schema = 11; } // The super-block keeps track of the Raft group. diff --git a/src/yb/tablet/tablet-harness.cc b/src/yb/tablet/tablet-harness.cc index 0100b2357bad..62c7ca2cca9c 100644 --- a/src/yb/tablet/tablet-harness.cc +++ b/src/yb/tablet/tablet-harness.cc @@ -56,12 +56,11 @@ Status TabletHarness::Create(bool first_time) { "test-tablet", Primary::kTrue, "YBTableTest", "test", "YBTableTest", options_.table_type, schema_, IndexMap(), boost::none, 0 /* schema_version */, partition.first); auto metadata = VERIFY_RESULT(RaftGroupMetadata::TEST_LoadOrCreate(RaftGroupMetadataData { - .fs_manager = fs_manager_.get(), - .table_info = table_info, - .raft_group_id = options_.tablet_id, - .partition = partition.second, - .tablet_data_state = TABLET_DATA_READY, - .snapshot_schedules = {}, + fs_manager_.get(), + table_info, + options_.tablet_id, + partition.second, + TABLET_DATA_READY })); if (options_.enable_metrics) { metrics_registry_.reset(new MetricRegistry()); diff --git a/src/yb/tablet/tablet_bootstrap-test.cc b/src/yb/tablet/tablet_bootstrap-test.cc index 77ee742ae4e1..5c53e72bbafc 100644 --- a/src/yb/tablet/tablet_bootstrap-test.cc +++ b/src/yb/tablet/tablet_bootstrap-test.cc @@ -182,12 +182,11 @@ class BootstrapTest : public LogTestBase { "TEST: ", Primary::kTrue, log::kTestTable, log::kTestNamespace, log::kTestTable, kTableType, schema, IndexMap(), boost::none /* index_info */, 0 /* schema_version */, partition.first); auto result = VERIFY_RESULT(RaftGroupMetadata::TEST_LoadOrCreate(RaftGroupMetadataData { - .fs_manager = fs_manager_.get(), - .table_info = table_info, - .raft_group_id = log::kTestTablet, - .partition = partition.second, - .tablet_data_state = TABLET_DATA_READY, - .snapshot_schedules = {}, + fs_manager_.get(), + table_info, + log::kTestTablet, + partition.second, + TABLET_DATA_READY })); RETURN_NOT_OK(result->Flush()); return result; diff --git a/src/yb/tablet/tablet_metadata.cc b/src/yb/tablet/tablet_metadata.cc index cc2f122b51d8..0ead160d7a74 100644 --- a/src/yb/tablet/tablet_metadata.cc +++ b/src/yb/tablet/tablet_metadata.cc @@ -544,6 +544,39 @@ std::string MakeTabletDirName(const TabletId& tablet_id) { // ============================================================================ +RaftGroupMetadataData::RaftGroupMetadataData( + FsManager* fs_manager_, TableInfoPtr table_info_, RaftGroupId raft_group_id_, + Partition partition_, TabletDataState tablet_data_state_, bool colocated_, + std::vector snapshot_schedules_) + : fs_manager(DCHECK_NOTNULL(fs_manager_)), + table_info(DCHECK_NOTNULL(table_info_)), + raft_group_id(raft_group_id_), + partition(partition_), + tablet_data_state(tablet_data_state_), + colocated(colocated_), + snapshot_schedules(snapshot_schedules_) { + primary_table_id = table_info->table_id; + primary_table_type = table_info->table_type; + is_transactional = table_info->schema().table_properties().is_transactional(); + is_index_table = table_info->index_info != nullptr; +} + +RaftGroupMetadataData::RaftGroupMetadataData( + FsManager* fs_manager_, TableId primary_table_id_, TableType primary_table_type_, + bool is_transactional_, bool is_index_table_, RaftGroupId raft_group_id_, Partition partition_, + TabletDataState tablet_data_state_, bool colocated_, + std::vector snapshot_schedules_) + : fs_manager(DCHECK_NOTNULL(fs_manager_)), + primary_table_id(primary_table_id_), + primary_table_type(primary_table_type_), + is_transactional(is_transactional_), + is_index_table(is_index_table_), + raft_group_id(raft_group_id_), + partition(partition_), + tablet_data_state(tablet_data_state_), + colocated(colocated_), + snapshot_schedules(snapshot_schedules_) {} + Result RaftGroupMetadata::CreateNew( const RaftGroupMetadataData& data, const std::string& data_root_dir, const std::string& wal_root_dir) { @@ -569,11 +602,11 @@ Result RaftGroupMetadata::CreateNew( wal_top_dir = wal_root_dirs[0]; } - const string table_dir_name = Substitute("table-$0", data.table_info->table_id); + const string table_dir_name = Substitute("table-$0", data.primary_table_id); const string tablet_dir_name = MakeTabletDirName(data.raft_group_id); const string wal_dir = JoinPathSegments(wal_top_dir, table_dir_name, tablet_dir_name); - const string rocksdb_dir = JoinPathSegments( - data_top_dir, FsManager::kRocksDBDirName, table_dir_name, tablet_dir_name); + const string rocksdb_dir = + JoinPathSegments(data_top_dir, FsManager::kRocksDBDirName, table_dir_name, tablet_dir_name); RaftGroupMetadataPtr ret(new RaftGroupMetadata(data, rocksdb_dir, wal_dir)); RETURN_NOT_OK(ret->Flush()); @@ -782,10 +815,14 @@ RaftGroupMetadata::RaftGroupMetadata( : state_(kNotWrittenYet), raft_group_id_(data.raft_group_id), partition_(std::make_shared(data.partition)), - primary_table_id_(data.table_info->table_id), - primary_table_type_(data.table_info->table_type), - is_transactional_(data.table_info->schema().table_properties().is_transactional()), - is_index_table_(data.table_info->index_info), + // primary_table_id_(data.table_info->table_id), + // primary_table_type_(data.table_info->table_type), + // is_transactional_(data.table_info->schema().table_properties().is_transactional()), + // is_index_table_(data.table_info->index_info), + primary_table_id_(data.primary_table_id), + primary_table_type_(data.primary_table_type), + is_transactional_(data.is_transactional), + is_index_table_(data.is_index_table), kv_store_(KvStoreId(raft_group_id_), data_dir, data.snapshot_schedules), fs_manager_(data.fs_manager), wal_dir_(wal_dir), @@ -795,26 +832,17 @@ RaftGroupMetadata::RaftGroupMetadata( cdc_sdk_min_checkpoint_op_id_(OpId::Invalid()), cdc_sdk_safe_time_(HybridTime::kInvalid), log_prefix_(consensus::MakeTabletLogPrefix(raft_group_id_, fs_manager_->uuid())) { - CHECK(data.table_info->schema() - .has_column_ids()); // how to do this if metadata in rocksdb for primary table - - // probably we can check if schema is initialized - CHECK_GT(data.table_info->schema().num_key_columns(), 0); - kv_store_.tables.emplace(primary_table_id_, data.table_info); - kv_store_.UpdateColocationMap(data.table_info); - bool is_ts_tablet = data.table_info->table_id != master::kSysCatalogTableId; - if (is_ts_tablet && FLAGS_ts_tableinfo_in_rocksdb && - data.table_info->table_type == PGSQL_TABLE_TYPE) { + if (data.table_info) { + CHECK(data.table_info->schema().has_column_ids()); + CHECK_GT(data.table_info->schema().num_key_columns(), 0); + kv_store_.tables.emplace(primary_table_id_, data.table_info); + kv_store_.UpdateColocationMap(data.table_info); + } + bool is_ts_tablet = primary_table_id_ != master::kSysCatalogTableId; + if (is_ts_tablet && FLAGS_ts_tableinfo_in_rocksdb && primary_table_type_ == PGSQL_TABLE_TYPE) { // kv_store_.initial_primary_table = data.table_info; kv_store_.metadata_schema = kv_store_.BuildMetadataSchema(); } - // else { - // CHECK(data.table_info->schema() - // .has_column_ids()); // how to do this if metadata in rocksdb for primary table - - // // probably we can check if schema is initialized - // CHECK_GT(data.table_info->schema().num_key_columns(), 0); - // kv_store_.tables.emplace(primary_table_id_, data.table_info); - // kv_store_.UpdateColocationMap(data.table_info); - // } } RaftGroupMetadata::~RaftGroupMetadata() { diff --git a/src/yb/tablet/tablet_metadata.h b/src/yb/tablet/tablet_metadata.h index 387d9ba412fa..19e6228e8491 100644 --- a/src/yb/tablet/tablet_metadata.h +++ b/src/yb/tablet/tablet_metadata.h @@ -255,11 +255,28 @@ struct KvStoreInfo { struct RaftGroupMetadataData { FsManager* fs_manager; TableInfoPtr table_info; + TableId primary_table_id; + TableType primary_table_type; + bool is_transactional; + bool is_index_table; RaftGroupId raft_group_id; Partition partition; TabletDataState tablet_data_state; bool colocated = false; std::vector snapshot_schedules; + + RaftGroupMetadataData( + FsManager* fs_manager_, TableInfoPtr table_info_, RaftGroupId raft_group_id_, + Partition partition_, TabletDataState tablet_data_state_, bool colocated_ = false, + std::vector snapshot_schedules_ = {}); + + RaftGroupMetadataData( + FsManager* fs_manager_, TableId primary_table_id_, TableType primary_table_type_, + bool is_transactional_, bool is_index_table_, RaftGroupId raft_group_id_, + Partition partition_, TabletDataState tablet_data_state_, bool colocated_ = false, + std::vector snapshot_schedules_ = {}); + + RaftGroupMetadataData(){} }; // At startup, the TSTabletManager will load a RaftGroupMetadata for each diff --git a/src/yb/tserver/remote_bootstrap_client.cc b/src/yb/tserver/remote_bootstrap_client.cc index 52d958d10bbd..7ec49120672e 100644 --- a/src/yb/tserver/remote_bootstrap_client.cc +++ b/src/yb/tserver/remote_bootstrap_client.cc @@ -257,6 +257,7 @@ Status RemoteBootstrapClient::Start(const string& bootstrap_peer_uuid, const TableId table_id = resp.superblock().primary_table_id(); const bool colocated = resp.superblock().colocated(); + const bool has_metadata_schema = resp.superblock().kv_store().has_metadata_schema(); const tablet::TableInfoPB* table_ptr = nullptr; for (auto& table_pb : kv_store->tables()) { if (table_pb.table_id() == table_id) { @@ -264,16 +265,11 @@ Status RemoteBootstrapClient::Start(const string& bootstrap_peer_uuid, break; } } - // TODO - // if (!table_ptr && kv_store->has_initial_primary_table()) { - // table_ptr = &kv_store->initial_primary_table(); - // } - if (!table_ptr) { + if (!table_ptr && !has_metadata_schema) { return STATUS(InvalidArgument, Format( "Tablet $0: Superblock's KV-store doesn't contain primary table $1", tablet_id_, table_id)); } - const auto& table = *table_ptr; downloader_.Start( proxy_, resp.session_id(), MonoDelta::FromMilliseconds(resp.session_idle_timeout_millis())); @@ -296,9 +292,6 @@ Status RemoteBootstrapClient::Start(const string& bootstrap_peer_uuid, } remote_committed_cstate_.reset(resp.release_initial_committed_cstate()); - Schema schema; - RETURN_NOT_OK_PREPEND(SchemaFromPB( - table.schema(), &schema), "Cannot deserialize schema from remote superblock"); string data_root_dir; string wal_root_dir; if (replace_tombstoned_tablet_) { @@ -341,8 +334,6 @@ Status RemoteBootstrapClient::Start(const string& bootstrap_peer_uuid, } else { Partition partition; Partition::FromPB(superblock_->partition(), &partition); - PartitionSchema partition_schema; - RETURN_NOT_OK(PartitionSchema::FromPB(table.partition_schema(), schema, &partition_schema)); // Create the superblock on disk. if (ts_manager != nullptr) { ts_manager->GetAndRegisterDataAndWalDir(&fs_manager(), @@ -351,24 +342,39 @@ Status RemoteBootstrapClient::Start(const string& bootstrap_peer_uuid, &data_root_dir, &wal_root_dir); } - auto table_info = std::make_shared( - consensus::MakeTabletLogPrefix(tablet_id_, fs_manager().uuid()), - tablet::Primary::kTrue, table_id, table.namespace_name(), table.table_name(), - table.table_type(), schema, IndexMap(table.indexes()), - table.has_index_info() ? boost::optional(table.index_info()) : boost::none, - table.schema_version(), partition_schema); + fs_manager().SetTabletPathByDataPath(tablet_id_, data_root_dir); - auto create_result = RaftGroupMetadata::CreateNew( - tablet::RaftGroupMetadataData { - .fs_manager = &fs_manager(), - .table_info = table_info, - .raft_group_id = tablet_id_, - .partition = partition, - .tablet_data_state = tablet::TABLET_DATA_COPYING, - .colocated = colocated, - .snapshot_schedules = {}, - }, - data_root_dir, wal_root_dir); + + tablet::RaftGroupMetadataData metadata_data; + if (table_ptr) { + const auto& table = *table_ptr; + Schema schema; + RETURN_NOT_OK_PREPEND( + SchemaFromPB(table.schema(), &schema), + "Cannot deserialize schema from remote superblock"); + PartitionSchema partition_schema; + RETURN_NOT_OK(PartitionSchema::FromPB(table.partition_schema(), schema, &partition_schema)); + auto table_info = std::make_shared( + consensus::MakeTabletLogPrefix(tablet_id_, fs_manager().uuid()), tablet::Primary::kTrue, + table_id, table.namespace_name(), table.table_name(), table.table_type(), schema, + IndexMap(table.indexes()), + table.has_index_info() ? boost::optional(table.index_info()) : boost::none, + table.schema_version(), partition_schema); + metadata_data = tablet::RaftGroupMetadataData{ + &fs_manager(), table_info, tablet_id_, partition, tablet::TABLET_DATA_COPYING, colocated}; + } else { + DCHECK(resp.superblock().has_primary_table_type()); + DCHECK(resp.superblock().has_transactional()); + DCHECK(resp.superblock().has_index_table()); + auto primary_table_type = resp.superblock().primary_table_type(); + auto transactional = resp.superblock().transactional(); + auto index_table = resp.superblock().index_table(); + metadata_data = tablet::RaftGroupMetadataData{ + &fs_manager(), table_id, primary_table_type, transactional, index_table, + tablet_id_, partition, tablet::TABLET_DATA_COPYING, colocated}; + } + + auto create_result = RaftGroupMetadata::CreateNew(metadata_data, data_root_dir, wal_root_dir); if (ts_manager != nullptr && !create_result.ok()) { ts_manager->UnregisterDataWalDir(table_id, tablet_id_, data_root_dir, wal_root_dir); } @@ -376,15 +382,19 @@ Status RemoteBootstrapClient::Start(const string& bootstrap_peer_uuid, meta_ = std::move(*create_result); vector deleted_cols; - for (const DeletedColumnPB& col_pb : table.deleted_cols()) { - DeletedColumn col; - RETURN_NOT_OK(DeletedColumn::FromPB(col_pb, &col)); - deleted_cols.push_back(col); + if (table_ptr) { + const auto& table = *table_ptr; + Schema schema; + RETURN_NOT_OK_PREPEND( + SchemaFromPB(table.schema(), &schema), + "Cannot deserialize schema from remote superblock"); + for (const DeletedColumnPB& col_pb : table.deleted_cols()) { + DeletedColumn col; + RETURN_NOT_OK(DeletedColumn::FromPB(col_pb, &col)); + deleted_cols.push_back(col); + } + meta_->SetSchema(schema, IndexMap(table.indexes()), deleted_cols, table.schema_version()); } - meta_->SetSchema(schema, - IndexMap(table.indexes()), - deleted_cols, - table.schema_version()); // Replace rocksdb_dir in the received superblock with our rocksdb_dir. kv_store->set_rocksdb_dir(meta_->rocksdb_dir()); diff --git a/src/yb/tserver/ts_tablet_manager.cc b/src/yb/tserver/ts_tablet_manager.cc index d8e01c5794bb..919c1a2c4b9c 100644 --- a/src/yb/tserver/ts_tablet_manager.cc +++ b/src/yb/tserver/ts_tablet_manager.cc @@ -749,13 +749,13 @@ Result TSTabletManager::CreateNewTablet( fs_manager_, table_info->table_id, tablet_id, &data_root_dir, &wal_root_dir); fs_manager_->SetTabletPathByDataPath(tablet_id, data_root_dir); auto create_result = RaftGroupMetadata::CreateNew(tablet::RaftGroupMetadataData { - .fs_manager = fs_manager_, - .table_info = table_info, - .raft_group_id = tablet_id, - .partition = partition, - .tablet_data_state = TABLET_DATA_READY, - .colocated = colocated, - .snapshot_schedules = snapshot_schedules, + fs_manager_, + table_info, + tablet_id, + partition, + TABLET_DATA_READY, + colocated, + snapshot_schedules, }, data_root_dir, wal_root_dir); if (!create_result.ok()) { UnregisterDataWalDir(table_info->table_id, tablet_id, data_root_dir, wal_root_dir); From 5bcee656ed1933ec8925320e6c4c3de1be4e256c Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Sat, 28 Jan 2023 09:37:00 +0530 Subject: [PATCH 11/20] Null pointer bug fix --- src/yb/tablet/tablet.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/yb/tablet/tablet.cc b/src/yb/tablet/tablet.cc index 0f0b3ddead87..4cee08dddcb6 100644 --- a/src/yb/tablet/tablet.cc +++ b/src/yb/tablet/tablet.cc @@ -443,8 +443,10 @@ void Tablet::Init() { << metadata_->schema_version(); auto table_info = metadata_->primary_table_info(); - table_metrics_entity_->SetAttribute("table_name", table_info->table_name); - table_metrics_entity_->SetAttribute("namespace_name", table_info->namespace_name); + if (table_metrics_entity_) { + table_metrics_entity_->SetAttribute("table_name", table_info->table_name); + table_metrics_entity_->SetAttribute("namespace_name", table_info->namespace_name); + } bool has_index = !table_info->index_map->empty(); From 5cbead961a504d010947edd0607630d38dd3ca8d Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Sat, 28 Jan 2023 13:52:01 +0530 Subject: [PATCH 12/20] Skip schema version in heartbeat till table metadata is loaded --- src/yb/tablet/tablet_metadata.h | 7 +++++++ src/yb/tserver/ts_tablet_manager.cc | 9 ++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/yb/tablet/tablet_metadata.h b/src/yb/tablet/tablet_metadata.h index 19e6228e8491..297f7be88595 100644 --- a/src/yb/tablet/tablet_metadata.h +++ b/src/yb/tablet/tablet_metadata.h @@ -591,6 +591,13 @@ class RaftGroupMetadata : public RefCountedThreadSafe, return primary_table_info_unlocked(); } + bool has_primary_table_info() const { + std::lock_guard lock(data_mutex_); + const auto& tables = kv_store_.tables; + const auto itr = tables.find(primary_table_id_); + return itr != tables.end(); + } + bool IsTableMetadataInRocksDB() const { std::lock_guard lock(data_mutex_); return kv_store_.IsTableMetadataInRocksDB(); diff --git a/src/yb/tserver/ts_tablet_manager.cc b/src/yb/tserver/ts_tablet_manager.cc index 919c1a2c4b9c..77b0a420b634 100644 --- a/src/yb/tserver/ts_tablet_manager.cc +++ b/src/yb/tserver/ts_tablet_manager.cc @@ -2092,15 +2092,18 @@ void TSTabletManager::CreateReportedTabletPB(const TabletPeerPtr& tablet_peer, AppStatusPB* error_status = reported_tablet->mutable_error(); StatusToPB(tablet_peer->error(), error_status); } - reported_tablet->set_schema_version(tablet_peer->tablet_metadata()->schema_version()); - // reported_tablet->set_schema_version(0); + if (tablet_peer->tablet_metadata()->has_primary_table_info()) { + reported_tablet->set_schema_version(tablet_peer->tablet_metadata()->schema_version()); + } else { + LOG(INFO) + << "Primary table metadata not loaded yet, not setting the schema version in heartbeat"; + } auto& id_to_version = *reported_tablet->mutable_table_to_version(); // Attach schema versions of all tables including the colocated ones. for (const auto& table_id : tablet_peer->tablet_metadata()->GetAllColocatedTables()) { if (id_to_version.find(table_id) == id_to_version.end()) { id_to_version[table_id] = tablet_peer->tablet_metadata()->schema_version(table_id); - // id_to_version[table_id] = 0; } } From 73cda9d54ba91ba0d72e97bc0e770e992267b764 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Sat, 28 Jan 2023 19:51:06 +0530 Subject: [PATCH 13/20] Minor bug fix --- src/yb/tablet/tablet_bootstrap.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/yb/tablet/tablet_bootstrap.cc b/src/yb/tablet/tablet_bootstrap.cc index 43ef760337e3..2c78dfd73bb6 100644 --- a/src/yb/tablet/tablet_bootstrap.cc +++ b/src/yb/tablet/tablet_bootstrap.cc @@ -567,6 +567,7 @@ class TabletBootstrap { if (!has_blocks && !needs_recovery) { LOG_WITH_PREFIX(INFO) << "No blocks or log segments found. Creating new log."; RETURN_NOT_OK_PREPEND(OpenLog(CreateNewSegment::kTrue), "Failed to open new log"); + tablet_->Init(); RETURN_NOT_OK(FinishBootstrap("No bootstrap required, opened a new log", rebuilt_log, rebuilt_tablet)); From 8752172dffff164f50012a44e6a148009a1ecd8b Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Sat, 28 Jan 2023 19:57:23 +0530 Subject: [PATCH 14/20] Set table & namespace name attribute if already available --- src/yb/tablet/tablet.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/yb/tablet/tablet.cc b/src/yb/tablet/tablet.cc index 4cee08dddcb6..b26005974daf 100644 --- a/src/yb/tablet/tablet.cc +++ b/src/yb/tablet/tablet.cc @@ -437,6 +437,7 @@ class Tablet::RegularRocksDbListener : public rocksdb::EventListener { }; void Tablet::Init() { + LOG_WITH_FUNC(INFO) << "Inside init"; key_schema_ = std::make_unique(metadata_->schema()->CreateKeyProjection()); CHECK(schema()->has_column_ids()); LOG_WITH_PREFIX(INFO) << "Schema version for " << metadata_->table_name() << " is " @@ -444,6 +445,7 @@ void Tablet::Init() { auto table_info = metadata_->primary_table_info(); if (table_metrics_entity_) { + LOG_WITH_FUNC(INFO) << "Setting attribute table_name " << table_info->table_name; table_metrics_entity_->SetAttribute("table_name", table_info->table_name); table_metrics_entity_->SetAttribute("namespace_name", table_info->namespace_name); } @@ -498,8 +500,10 @@ Tablet::Tablet(const TabletInitData& data) MetricEntity::AttributeMap attrs; // TODO(KUDU-745): table_id is apparently not set in the metadata. attrs["table_id"] = metadata_->table_id(); - // attrs["table_name"] = metadata_->table_name(); - // attrs["namespace_name"] = metadata_->namespace_name(); + if (metadata_->has_primary_table_info()) { + attrs["table_name"] = metadata_->table_name(); + attrs["namespace_name"] = metadata_->namespace_name(); + } table_metrics_entity_ = METRIC_ENTITY_table.Instantiate(data.metric_registry, metadata_->table_id(), attrs); tablet_metrics_entity_ = From 895ac3a514fa609afdb7b69212db0880351d5c3c Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Sat, 28 Jan 2023 20:33:46 +0530 Subject: [PATCH 15/20] Sets table and namespace atts to tablet metrics entity --- src/yb/tablet/tablet.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/yb/tablet/tablet.cc b/src/yb/tablet/tablet.cc index b26005974daf..df7b624e374c 100644 --- a/src/yb/tablet/tablet.cc +++ b/src/yb/tablet/tablet.cc @@ -445,10 +445,13 @@ void Tablet::Init() { auto table_info = metadata_->primary_table_info(); if (table_metrics_entity_) { - LOG_WITH_FUNC(INFO) << "Setting attribute table_name " << table_info->table_name; table_metrics_entity_->SetAttribute("table_name", table_info->table_name); table_metrics_entity_->SetAttribute("namespace_name", table_info->namespace_name); } + if (tablet_metrics_entity_) { + tablet_metrics_entity_->SetAttribute("table_name", table_info->table_name); + tablet_metrics_entity_->SetAttribute("namespace_name", table_info->namespace_name); + } bool has_index = !table_info->index_map->empty(); @@ -500,10 +503,6 @@ Tablet::Tablet(const TabletInitData& data) MetricEntity::AttributeMap attrs; // TODO(KUDU-745): table_id is apparently not set in the metadata. attrs["table_id"] = metadata_->table_id(); - if (metadata_->has_primary_table_info()) { - attrs["table_name"] = metadata_->table_name(); - attrs["namespace_name"] = metadata_->namespace_name(); - } table_metrics_entity_ = METRIC_ENTITY_table.Instantiate(data.metric_registry, metadata_->table_id(), attrs); tablet_metrics_entity_ = From 83cd792dcc8bd84f0ae541b9cbd9dff99a70d093 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Sat, 28 Jan 2023 23:12:23 +0530 Subject: [PATCH 16/20] Fixes packed row tests --- src/yb/tablet/tablet.cc | 6 +++++- src/yb/tablet/tablet.h | 2 +- src/yb/yql/pgwrapper/pg_packed_row-test.cc | 20 ++++++++------------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/yb/tablet/tablet.cc b/src/yb/tablet/tablet.cc index df7b624e374c..98e6eb61f52d 100644 --- a/src/yb/tablet/tablet.cc +++ b/src/yb/tablet/tablet.cc @@ -3583,7 +3583,7 @@ void Tablet::TEST_DocDBDumpToLog(IncludeIntents include_intents) { } } -size_t Tablet::TEST_CountRegularDBRecords() { +size_t Tablet::TEST_CountRegularDBRecords(bool skip_metadata_entries) { if (!regular_db_) return 0; rocksdb::ReadOptions read_opts; read_opts.query_id = rocksdb::kDefaultQueryId; @@ -3591,6 +3591,10 @@ size_t Tablet::TEST_CountRegularDBRecords() { size_t result = 0; for (iter.SeekToFirst(); iter.Valid(); iter.Next()) { + if (skip_metadata_entries && + iter.key().starts_with(docdb::KeyEntryTypeAsChar::kTabletMetadata)) { + continue; + } ++result; } return result; diff --git a/src/yb/tablet/tablet.h b/src/yb/tablet/tablet.h index 54e1b0e40c70..19bd12487460 100644 --- a/src/yb/tablet/tablet.h +++ b/src/yb/tablet/tablet.h @@ -623,7 +623,7 @@ class Tablet : public AbstractTablet, // Dumps DocDB contents to log, every record as a separate log message, with the given prefix. void TEST_DocDBDumpToLog(IncludeIntents include_intents); - size_t TEST_CountRegularDBRecords(); + size_t TEST_CountRegularDBRecords(bool skip_metadata_entries = true); Status CreateReadIntents( const TransactionMetadataPB& transaction_metadata, diff --git a/src/yb/yql/pgwrapper/pg_packed_row-test.cc b/src/yb/yql/pgwrapper/pg_packed_row-test.cc index 6b37479f0a12..63f452e376b3 100644 --- a/src/yb/yql/pgwrapper/pg_packed_row-test.cc +++ b/src/yb/yql/pgwrapper/pg_packed_row-test.cc @@ -272,7 +272,11 @@ TEST_F(PgPackedRowTest, YB_DISABLE_TEST_IN_TSAN(Random)) { for (const auto& line : sorted_values) { LOG(INFO) << "Record: " << line; } - ASSERT_EQ(values.size(), key_state.size()); + int metadata_entries = 0; + if (FLAGS_ts_tableinfo_in_rocksdb) { + metadata_entries = 1; // additional metadata entry for the table + } + ASSERT_EQ(values.size(), key_state.size() + metadata_entries); } } @@ -424,9 +428,6 @@ void PgPackedRowTest::TestColocated(int num_keys, int num_expected_records) { auto conn = ASSERT_RESULT(Connect()); ASSERT_OK(conn.Execute("CREATE DATABASE test WITH colocated = true")); TestCompaction(num_keys, "WITH (colocated = true)"); - if (FLAGS_ts_tableinfo_in_rocksdb) { - num_expected_records += 2; // an additional metadata entry per table - } CheckNumRecords(cluster_.get(), num_expected_records); } @@ -456,23 +457,18 @@ TEST_F(PgPackedRowTest, YB_DISABLE_TEST_IN_TSAN(ColocatedPackRowDisabled)) { ASSERT_OK(conn.Execute( "CREATE TABLE t1 (key INT PRIMARY KEY, value TEXT, payload TEXT) WITH (colocated = true)")); - int metadata_entries = 0; - if (FLAGS_ts_tableinfo_in_rocksdb) { - metadata_entries = 1; // an additional metadata entry for the table - } - ASSERT_OK(conn.Execute("INSERT INTO t1 (key, value, payload) VALUES (1, '', '')")); // The only row should not be packed. - CheckNumRecords(cluster_.get(), 3 + metadata_entries); + CheckNumRecords(cluster_.get(), 3); // Trigger full row update. ASSERT_OK(conn.Execute("UPDATE t1 SET value = '1', payload = '1' WHERE key = 1")); // The updated row should not be packed. - CheckNumRecords(cluster_.get(), 5 + metadata_entries); + CheckNumRecords(cluster_.get(), 5); // Enable pack row for colocated table and trigger compaction. ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_enable_packed_row_for_colocated_table) = true; ASSERT_OK(cluster_->CompactTablets()); - CheckNumRecords(cluster_.get(), 1 + metadata_entries); + CheckNumRecords(cluster_.get(), 1); } TEST_F(PgPackedRowTest, YB_DISABLE_TEST_IN_TSAN(CompactAfterTransaction)) { From 8121c8c8a2fa47c40baf55109bf5706e381ec862 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Sun, 29 Jan 2023 19:29:36 +0530 Subject: [PATCH 17/20] Fixes check failed has_primary_tablet_type() --- src/yb/tserver/remote_bootstrap_client.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/yb/tserver/remote_bootstrap_client.cc b/src/yb/tserver/remote_bootstrap_client.cc index 7ec49120672e..72ae8a7a2bab 100644 --- a/src/yb/tserver/remote_bootstrap_client.cc +++ b/src/yb/tserver/remote_bootstrap_client.cc @@ -363,12 +363,12 @@ Status RemoteBootstrapClient::Start(const string& bootstrap_peer_uuid, metadata_data = tablet::RaftGroupMetadataData{ &fs_manager(), table_info, tablet_id_, partition, tablet::TABLET_DATA_COPYING, colocated}; } else { - DCHECK(resp.superblock().has_primary_table_type()); - DCHECK(resp.superblock().has_transactional()); - DCHECK(resp.superblock().has_index_table()); - auto primary_table_type = resp.superblock().primary_table_type(); - auto transactional = resp.superblock().transactional(); - auto index_table = resp.superblock().index_table(); + DCHECK(superblock_->has_primary_table_type()); + DCHECK(superblock_->has_transactional()); + DCHECK(superblock_->has_index_table()); + auto primary_table_type = superblock_->primary_table_type(); + auto transactional = superblock_->transactional(); + auto index_table = superblock_->index_table(); metadata_data = tablet::RaftGroupMetadataData{ &fs_manager(), table_id, primary_table_type, transactional, index_table, tablet_id_, partition, tablet::TABLET_DATA_COPYING, colocated}; From fd7e8b14d38f0d35eedb78ef42e786ad812389af Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Mon, 30 Jan 2023 18:00:57 +0530 Subject: [PATCH 18/20] Adds a todo --- src/yb/tserver/ts_tablet_manager.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/yb/tserver/ts_tablet_manager.cc b/src/yb/tserver/ts_tablet_manager.cc index 77b0a420b634..f52b62d3c160 100644 --- a/src/yb/tserver/ts_tablet_manager.cc +++ b/src/yb/tserver/ts_tablet_manager.cc @@ -2092,6 +2092,7 @@ void TSTabletManager::CreateReportedTabletPB(const TabletPeerPtr& tablet_peer, AppStatusPB* error_status = reported_tablet->mutable_error(); StatusToPB(tablet_peer->error(), error_status); } + // TODO: We should wait till bootstraping is finished to avoid populating the stale schema version if (tablet_peer->tablet_metadata()->has_primary_table_info()) { reported_tablet->set_schema_version(tablet_peer->tablet_metadata()->schema_version()); } else { From ae24c140512b51cc9d4ed646b4914aa21171a0f2 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Mon, 30 Jan 2023 20:28:22 +0530 Subject: [PATCH 19/20] Adds a todo for future --- src/yb/tablet/tablet_bootstrap.cc | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/yb/tablet/tablet_bootstrap.cc b/src/yb/tablet/tablet_bootstrap.cc index 2c78dfd73bb6..30c933775ff7 100644 --- a/src/yb/tablet/tablet_bootstrap.cc +++ b/src/yb/tablet/tablet_bootstrap.cc @@ -1493,13 +1493,14 @@ class TabletBootstrap { LOG_WITH_FUNC(INFO) << "Inside PlayChangeMetadataRequest"; ChangeMetadataOperation operation(tablet_, log_.get(), request); - // If table id isn't in metadata, ignore the replay as the table might've been dropped. - // auto table_info = meta_->GetTableInfo(operation.table_id().ToBuffer()); - // if (!table_info.ok()) { - // LOG_WITH_PREFIX(WARNING) << "Table ID " << operation.table_id() - // << " not found in metadata, skipping this ChangeMetadataRequest"; - // return Status::OK(); - // } + // TODO: Once table metadata is stored in RocksDB, this check should not be required. + // If table id isn't in metadata, ignore the replay as the table might've been dropped + auto table_info = meta_->GetTableInfo(operation.table_id().ToBuffer()); + if (!table_info.ok()) { + LOG_WITH_PREFIX(WARNING) << "Table ID " << operation.table_id() + << " not found in metadata, skipping this ChangeMetadataRequest"; + return Status::OK(); + } RETURN_NOT_OK(operation.Prepare(IsLeaderSide::kTrue)); From 4c51748b47699c1fef6e267eef0f655511125ef6 Mon Sep 17 00:00:00 2001 From: Arpan Agrawal Date: Wed, 1 Feb 2023 14:33:36 +0530 Subject: [PATCH 20/20] Cleanup --- src/yb/tablet/metadata.proto | 4 ---- src/yb/tablet/tablet.cc | 22 ---------------------- src/yb/tablet/tablet_metadata.cc | 14 -------------- src/yb/tablet/tablet_metadata.h | 2 -- src/yb/tserver/ts_tablet_manager.cc | 5 ----- 5 files changed, 47 deletions(-) diff --git a/src/yb/tablet/metadata.proto b/src/yb/tablet/metadata.proto index f6a598f41339..ca98489a268d 100644 --- a/src/yb/tablet/metadata.proto +++ b/src/yb/tablet/metadata.proto @@ -112,10 +112,6 @@ message KvStoreInfoPB { // compacted. Defaults to 0 (i.e. HybridTime::kMin). optional uint64 last_full_compaction_time = 10; - // // Initial version of the primary table. Set only when the tablet metadata is in RocksDB. - // // Any subsequent version goes to RocksDB. - // optional TableInfoPB initial_primary_table = 11; - // Table metadata schema. Set only when the metadata is in RocksDB. optional SchemaPB metadata_schema = 11; } diff --git a/src/yb/tablet/tablet.cc b/src/yb/tablet/tablet.cc index 98e6eb61f52d..5b5372c5169b 100644 --- a/src/yb/tablet/tablet.cc +++ b/src/yb/tablet/tablet.cc @@ -470,7 +470,6 @@ void Tablet::Init() { } Tablet::Tablet(const TabletInitData& data) - // : key_schema_(std::make_unique(data.metadata->schema()->CreateKeyProjection())), : metadata_(data.metadata), table_type_(data.metadata->table_type()), log_anchor_registry_(data.log_anchor_registry), @@ -495,10 +494,6 @@ Tablet::Tablet(const TabletInitData& data) clock_, data.allowed_history_cutoff_provider, metadata_.get())), full_compaction_pool_(data.full_compaction_pool), ts_post_split_compaction_added_(std::move(data.post_split_compaction_added)) { - // CHECK(schema()->has_column_ids()); - // LOG_WITH_PREFIX(INFO) << "Schema version for " << metadata_->table_name() << " is " - // << metadata_->schema_version(); - if (data.metric_registry) { MetricEntity::AttributeMap attrs; // TODO(KUDU-745): table_id is apparently not set in the metadata. @@ -520,9 +515,6 @@ Tablet::Tablet(const TabletInitData& data) mem_tracker_->SetMetricEntity(tablet_metrics_entity_); } - // auto table_info = metadata_->primary_table_info(); - // bool has_index = !table_info->index_map->empty(); - // bool transactional = data.metadata->schema()->table_properties().is_transactional(); bool transactional = data.metadata->is_transactional(); if (transactional) { server::HybridClock::EnableClockSkewControl(); @@ -540,19 +532,6 @@ Tablet::Tablet(const TabletInitData& data) } } - // // Create index table metadata cache for secondary index update. - // if (has_index) { - // CreateNewYBMetaDataCache(); - // } - - // // If this is a unique index tablet, set up the index primary key schema. - // if (table_info->index_info && table_info->index_info->is_unique()) { - // unique_index_key_schema_ = std::make_unique(); - // const auto ids = table_info->index_info->index_key_column_ids(); - // CHECK_OK(table_info->schema().CreateProjectionByIdsIgnoreMissing( - // ids, unique_index_key_schema_.get())); - // } - if (data.transaction_coordinator_context && table_type_ == TableType::TRANSACTION_STATUS_TABLE_TYPE) { transaction_coordinator_ = std::make_unique( @@ -599,7 +578,6 @@ Status Tablet::Open() { TRACE_EVENT0("tablet", "Tablet::Open"); std::lock_guard lock(component_lock_); CHECK_EQ(state_, kInitialized) << "already open"; - // CHECK(schema()->has_column_ids()); switch (table_type_) { case TableType::PGSQL_TABLE_TYPE: FALLTHROUGH_INTENDED; diff --git a/src/yb/tablet/tablet_metadata.cc b/src/yb/tablet/tablet_metadata.cc index 0ead160d7a74..7ffa7482ebcd 100644 --- a/src/yb/tablet/tablet_metadata.cc +++ b/src/yb/tablet/tablet_metadata.cc @@ -426,11 +426,6 @@ Status KvStoreInfo::LoadFromPB(const std::string& tablet_log_prefix, } RETURN_NOT_OK(LoadTablesFromPB(tablet_log_prefix, pb.tables(), primary_table_id)); if (pb.has_metadata_schema()) { - // initial_primary_table = VERIFY_RESULT( - // TableInfo::LoadFromPB(tablet_log_prefix, primary_table_id, pb.initial_primary_table())); - // tables.emplace(primary_table_id, initial_primary_table); - // UpdateColocationMap(initial_primary_table); - // DCHECK(pb.has_metadata_schema()); RETURN_NOT_OK(SchemaFromPB(pb.metadata_schema(), &metadata_schema)); } return Status::OK(); @@ -492,7 +487,6 @@ void KvStoreInfo::ToPB(const TableId& primary_table_id, KvStoreInfoPB* pb) const pb->set_last_full_compaction_time(last_full_compaction_time); if (IsTableMetadataInRocksDB()) { - // initial_primary_table->ToPB(pb->mutable_initial_primary_table()); SchemaToPB(metadata_schema, pb->mutable_metadata_schema()); } else { // Putting primary table first, then all other tables. @@ -815,10 +809,6 @@ RaftGroupMetadata::RaftGroupMetadata( : state_(kNotWrittenYet), raft_group_id_(data.raft_group_id), partition_(std::make_shared(data.partition)), - // primary_table_id_(data.table_info->table_id), - // primary_table_type_(data.table_info->table_type), - // is_transactional_(data.table_info->schema().table_properties().is_transactional()), - // is_index_table_(data.table_info->index_info), primary_table_id_(data.primary_table_id), primary_table_type_(data.primary_table_type), is_transactional_(data.is_transactional), @@ -840,7 +830,6 @@ RaftGroupMetadata::RaftGroupMetadata( } bool is_ts_tablet = primary_table_id_ != master::kSysCatalogTableId; if (is_ts_tablet && FLAGS_ts_tableinfo_in_rocksdb && primary_table_type_ == PGSQL_TABLE_TYPE) { - // kv_store_.initial_primary_table = data.table_info; kv_store_.metadata_schema = kv_store_.BuildMetadataSchema(); } } @@ -906,21 +895,18 @@ Status RaftGroupMetadata::LoadFromSuperBlock(const RaftGroupReplicaSuperBlockPB& } else { primary_table_type_ = primary_table_info()->table_type; } - LOG_WITH_FUNC(INFO) << "primary_table_type_ " << primary_table_type_; if (superblock.has_transactional()) { is_transactional_ = superblock.transactional(); } else { is_transactional_ = primary_table_info()->schema().table_properties().is_transactional(); } - LOG_WITH_FUNC(INFO) << "is_transactional_ " << is_transactional_; if (superblock.has_index_table()) { is_index_table_ = superblock.index_table(); } else { is_index_table_ = primary_table_info()->index_info != nullptr; } - LOG_WITH_FUNC(INFO) << "is_index_table_ " << is_index_table_; wal_dir_ = superblock.wal_dir(); tablet_data_state_ = superblock.tablet_data_state(); diff --git a/src/yb/tablet/tablet_metadata.h b/src/yb/tablet/tablet_metadata.h index 297f7be88595..5f338a41590a 100644 --- a/src/yb/tablet/tablet_metadata.h +++ b/src/yb/tablet/tablet_metadata.h @@ -225,8 +225,6 @@ struct KvStoreInfo { // See KvStoreInfoPB field with the same name. uint64_t last_full_compaction_time = kNoLastFullCompactionTime; - // TableInfoPtr initial_primary_table = nullptr; - // Map of tables sharing this KV-store indexed by the table id. // If pieces of the same table live in the same Raft group they should be located in different // KV-stores. diff --git a/src/yb/tserver/ts_tablet_manager.cc b/src/yb/tserver/ts_tablet_manager.cc index f52b62d3c160..30c37ccc77e1 100644 --- a/src/yb/tserver/ts_tablet_manager.cc +++ b/src/yb/tserver/ts_tablet_manager.cc @@ -774,10 +774,6 @@ Result TSTabletManager::CreateNewTablet( TabletPeerPtr new_peer = VERIFY_RESULT(CreateAndRegisterTabletPeer(meta, NEW_PEER)); // We can run this synchronously since there is nothing to bootstrap. - // RETURN_NOT_OK( - // open_tablet_pool_->SubmitFunc(std::bind(&TSTabletManager::OpenTablet, this, meta, - // deleter))); - OpenTablet(meta, deleter, table_info); return new_peer; @@ -2125,7 +2121,6 @@ void TSTabletManager::CreateReportedTabletPB(const TabletPeerPtr& tablet_peer, // Set the hide status of the tablet. reported_tablet->set_is_hidden(tablet_peer->tablet_metadata()->hidden()); - // LOG_WITH_FUNC(INFO) << "reported_tablet " << reported_tablet->ShortDebugString(); } void TSTabletManager::GenerateTabletReport(TabletReportPB* report, bool include_bootstrap) {