Skip to content

Commit

Permalink
[#16109] docdb: Fix deadlock with YqlPartitionsVTable
Browse files Browse the repository at this point in the history
Summary:
Fixing lock order inversion with yql system.partitions building and table deletions:
One thread doing deletes in catalog manager is holding CM's mutex_ and blocked on getting the yqlpartitions mutex_:
```
#5  std::lock_guard<std::shared_timed_mutex>::lock_guard (__m=..., this=<synthetic pointer>) at /cases/home/yugabyte/yb-software/yugabyte-2.12.5.0/linuxbrew-xxxxxxxxxxxxxxxxxxxxxxxx/Cellar/gcc/5.5.0_4/include/c++/5.5.0/mutex:386
#6  yb::master::YQLPartitionsVTable::RemoveFromCache (this=0x182091e0, table_id=...) at ../../src/yb/master/yql_partitions_vtable.cc:262
#7  0x00007f384c0931e5 in yb::master::CatalogManager::DeleteTableInternal (this=this@entry=0x275a000, req=req@entry=0x225a2138, resp=resp@entry=0x225a2160, rpc=rpc@entry=0x7f3815561cb0) at ../../src/yb/master/catalog_manager.cc:4831
```
And another thread rebuilding the vtable has the yqlpartitions lock and is waiting on the CM mutex_:
```
#4  yb::NonRecursiveSharedLock<yb::rw_spinlock>::NonRecursiveSharedLock (this=0x7f3767405da0, mutex=...) at ../../src/yb/util/debug/lock_debug.h:41
#5  0x00007f384c04a4b1 in yb::master::CatalogManager::GetTables (this=0x275a000, mode=yb::master::GetTablesMode::kVisibleToClient) at ../../src/yb/master/catalog_manager.cc:5885
#6  0x00007f384c2148f2 in yb::master::YQLPartitionsVTable::GenerateAndCacheData (this=0x182091e0) at ../../src/yb/master/yql_partitions_vtable.cc:140
#7  0x00007f384c03c35d in yb::master::CatalogManager::RebuildYQLSystemPartitions (this=0x275a000) at ../../src/yb/master/catalog_manager.cc:10486
```

For now keeping the default of generating the partitions cache on changes, due to create table perf concerns of the purely bg thread approach

Test Plan:
```
ybd tsan --gtest_filter CppCassandraDriverTest.YQLPartitionsVtableCacheRefresh
```

Reviewers: bogdan, sergei, asrivastava

Reviewed By: asrivastava

Subscribers: kannan, ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D22943
  • Loading branch information
hulien22 committed Feb 17, 2023
1 parent 88c6a57 commit 68fae2f
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 42 deletions.
87 changes: 52 additions & 35 deletions src/yb/master/catalog_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -403,8 +403,10 @@ DEFINE_test_flag(int32, num_missing_tablets, 0, "Simulates missing tablets in a

DEFINE_RUNTIME_int32(partitions_vtable_cache_refresh_secs, 0,
"Amount of time to wait before refreshing the system.partitions cached vtable. "
"If generate_partitions_vtable_on_changes is set, then this background task will "
"update the cache using the internal map, but won't do any generating of the vtable.");
"If generate_partitions_vtable_on_changes is true and this flag is > 0, then this background "
"task will update the cached vtable using the internal map. "
"If generate_partitions_vtable_on_changes is false and this flag is > 0, then this background "
"task will be responsible for regenerating and updating the entire cached vtable.");

DEFINE_RUNTIME_int32(txn_table_wait_min_ts_count, 1,
"Minimum Number of TS to wait for before creating the transaction status table."
Expand Down Expand Up @@ -2061,27 +2063,28 @@ Status CatalogManager::AbortTableCreation(TableInfo* table,
// table that has failed to successfully create.
table->AbortTasksAndClose();
table->WaitTasksCompletion();
{
LockGuard lock(mutex_);

LockGuard lock(mutex_);
// Call AbortMutation() manually, as otherwise the lock won't be released.
for (const auto& tablet : tablets) {
tablet->mutable_metadata()->AbortMutation();
}
table->mutable_metadata()->AbortMutation();
auto tablet_map_checkout = tablet_map_.CheckOut();
for (const TabletId& tablet_id_to_erase : tablet_ids_to_erase) {
CHECK_EQ(tablet_map_checkout->erase(tablet_id_to_erase), 1)
<< "Unable to erase tablet " << tablet_id_to_erase << " from tablet map.";
}

// Call AbortMutation() manually, as otherwise the lock won't be released.
for (const auto& tablet : tablets) {
tablet->mutable_metadata()->AbortMutation();
}
table->mutable_metadata()->AbortMutation();
auto tablet_map_checkout = tablet_map_.CheckOut();
for (const TabletId& tablet_id_to_erase : tablet_ids_to_erase) {
CHECK_EQ(tablet_map_checkout->erase(tablet_id_to_erase), 1)
<< "Unable to erase tablet " << tablet_id_to_erase << " from tablet map.";
auto table_map_checkout = tables_.CheckOut();
table_names_map_.erase({table_namespace_id, table_name}); // Not present if PGSQL table.
CHECK_EQ(table_map_checkout->Erase(table_id), 1)
<< "Unable to erase table with id " << table_id << " from table ids map.";
}

auto table_map_checkout = tables_.CheckOut();
table_names_map_.erase({table_namespace_id, table_name}); // Not present if PGSQL table.
CHECK_EQ(table_map_checkout->Erase(table_id), 1)
<< "Unable to erase table with id " << table_id << " from table ids map.";

if (IsYcqlTable(*table)) {
GetYqlPartitionsVtable().RemoveFromCache(table->id());
// Don't process while holding on to mutex_ (#16109).
GetYqlPartitionsVtable().RemoveFromCache({table->id()});
}
return CheckIfNoLongerLeaderAndSetupError(s, resp);
}
Expand Down Expand Up @@ -5950,24 +5953,33 @@ Status CatalogManager::DeleteTableInternal(
// Also exclude hidden tables, that were already removed from this map.
if (std::any_of(tables.begin(), tables.end(), [](auto& t) { return t.remove_from_name_map; })) {
TRACE("Removing tables from by-name map");
LockGuard lock(mutex_);
for (const auto& table : tables) {
if (table.remove_from_name_map) {
TableInfoByNameMap::key_type key = {table.info->namespace_id(), table.info->name()};
if (table_names_map_.erase(key) != 1) {
LOG(WARNING) << "Could not remove table from map: " << key.first << "." << key.second;
}
vector<TableId> removed_ycql_tables;
{
LockGuard lock(mutex_);
for (const auto& table : tables) {
if (table.remove_from_name_map) {
TableInfoByNameMap::key_type key = {table.info->namespace_id(), table.info->name()};
if (table_names_map_.erase(key) != 1) {
LOG(WARNING) << "Could not remove table from map: " << key.first << "." << key.second;
}

// Also remove from the system.partitions table.
GetYqlPartitionsVtable().RemoveFromCache(table.info->id());
// Remove matviews from matview to pg table id map
matview_pg_table_ids_map_.erase(table.info->id());

// Remove matviews from matview to pg table id map
matview_pg_table_ids_map_.erase(table.info->id());
// Keep track of deleted ycql tables.
if (IsYcqlTable(*table.info)) {
removed_ycql_tables.push_back(table.info->id());
}
}
}
// We commit another map to increment its version and reset cache.
// Since table_name_map_ does not have version.
tables_.Commit();
}
if (!removed_ycql_tables.empty()) {
// Also remove from the system.partitions table.
GetYqlPartitionsVtable().RemoveFromCache(removed_ycql_tables);
}
// We commit another map to increment its version and reset cache.
// Since table_name_map_ does not have version.
tables_.Commit();
}

for (const auto& table : tables) {
Expand Down Expand Up @@ -12397,8 +12409,13 @@ Status CatalogManager::GetYQLPartitionsVTable(std::shared_ptr<SystemTablet>* tab
}

void CatalogManager::RebuildYQLSystemPartitions() {
if (YQLPartitionsVTable::GeneratePartitionsVTableWithBgTask() ||
YQLPartitionsVTable::GeneratePartitionsVTableOnChanges()) {
// This task will keep running, but only want it to do anything if
// FLAGS_partitions_vtable_cache_refresh_secs is explicitly set to some value >0.
const bool use_bg_thread_to_update_cache =
YQLPartitionsVTable::GeneratePartitionsVTableOnChanges() &&
FLAGS_partitions_vtable_cache_refresh_secs > 0;

if (YQLPartitionsVTable::GeneratePartitionsVTableWithBgTask() || use_bg_thread_to_update_cache) {
SCOPED_LEADER_SHARED_LOCK(l, this);
if (l.IsInitializedAndIsLeader()) {
if (system_partitions_tablet_ != nullptr) {
Expand Down
4 changes: 3 additions & 1 deletion src/yb/master/catalog_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -2016,7 +2016,9 @@ class CatalogManager : public tserver::TabletPeerLookupIf,

TSDescriptorVector GetAllLiveNotBlacklistedTServers() const;

const YQLPartitionsVTable& GetYqlPartitionsVtable() const;
// Get the ycql system.partitions vtable. Note that this has EXCLUDES(mutex_), in order to
// maintain lock ordering.
const YQLPartitionsVTable& GetYqlPartitionsVtable() const EXCLUDES(mutex_);

void InitializeTableLoadState(
const TableId& table_id, TSDescriptorVector ts_descs, CMPerTableLoadState* state);
Expand Down
14 changes: 10 additions & 4 deletions src/yb/master/yql_partitions_vtable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@

DECLARE_int32(partitions_vtable_cache_refresh_secs);

DEFINE_UNKNOWN_bool(use_cache_for_partitions_vtable, true,
DEFINE_RUNTIME_bool(use_cache_for_partitions_vtable, true,
"Whether we should use caching for system.partitions table.");

DEFINE_UNKNOWN_bool(generate_partitions_vtable_on_changes, true,
DEFINE_RUNTIME_bool(generate_partitions_vtable_on_changes, true,
"Whether we should generate the system.partitions vtable whenever relevant partition "
"changes occur.");

Expand Down Expand Up @@ -258,9 +258,15 @@ Status YQLPartitionsVTable::InsertTabletIntoRowUnlocked(
return Status::OK();
}

void YQLPartitionsVTable::RemoveFromCache(const TableId& table_id) const {
void YQLPartitionsVTable::RemoveFromCache(const std::vector<TableId>& table_ids) const {
if (!GeneratePartitionsVTableOnChanges() || table_ids.empty()) {
return;
}

std::lock_guard<std::shared_timed_mutex> lock(mutex_);
table_to_partition_start_to_row_map_.erase(table_id);
for (const auto& table_id : table_ids) {
table_to_partition_start_to_row_map_.erase(table_id);
}
// Need to update the cache as the map has been modified.
update_cache_ = true;
}
Expand Down
4 changes: 2 additions & 2 deletions src/yb/master/yql_partitions_vtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ class YQLPartitionsVTable : public YQLVirtualTable {
Result<std::shared_ptr<QLRowBlock>> RetrieveData(const QLReadRequestPB& request) const;
Result<std::shared_ptr<QLRowBlock>> GenerateAndCacheData() const;

// Remove a table from the system.partitions vtable.
void RemoveFromCache(const TableId& table_id) const;
// Remove tables from the system.partitions vtable.
void RemoveFromCache(const std::vector<TableId>& table_ids) const;

// Filter only tablets that have relevant system.partitions changes from a list of tablets that
// have heartbeated in and are being processed.
Expand Down

0 comments on commit 68fae2f

Please sign in to comment.