From 3c99a6a5d7223531512cd3f23103547c3877cc2b Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Tue, 12 Apr 2022 19:30:36 +0800 Subject: [PATCH 01/19] Update client c (#4625) close pingcap/tiflash#4624 --- contrib/client-c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/client-c b/contrib/client-c index bd2ea655141..4e50596db3c 160000 --- a/contrib/client-c +++ b/contrib/client-c @@ -1 +1 @@ -Subproject commit bd2ea65514109b8f17bd522c5d836656c2a1c6cb +Subproject commit 4e50596db3c878f5bf8de86fe32638f09bf2c117 From 26d4f4a38ab296fb38bdb162a07912226c59402c Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Tue, 12 Apr 2022 20:04:36 +0800 Subject: [PATCH 02/19] Fix can not find right path when blobstore restore (#4608) ref pingcap/tiflash#3594 --- dbms/src/Storages/Page/V3/BlobStore.cpp | 200 +++++++----------- dbms/src/Storages/Page/V3/BlobStore.h | 20 +- dbms/src/Storages/Page/V3/PageStorageImpl.cpp | 2 + .../Page/V3/tests/gtest_blob_store.cpp | 21 +- .../Page/V3/tests/gtest_page_directory.cpp | 13 +- 5 files changed, 115 insertions(+), 141 deletions(-) diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 18f5b017e1b..24c2d0c0df9 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -50,12 +50,6 @@ namespace PS::V3 { static constexpr bool BLOBSTORE_CHECKSUM_ON_READ = true; -#ifndef NDEBUG -static constexpr bool CHECK_STATS_ALL_IN_DISK = true; -#else -static constexpr bool CHECK_STATS_ALL_IN_DISK = false; -#endif - using BlobStat = BlobStore::BlobStats::BlobStat; using BlobStatPtr = BlobStore::BlobStats::BlobStatPtr; using ChecksumClass = Digest::CRC64; @@ -74,6 +68,37 @@ BlobStore::BlobStore(String storage_name, const FileProviderPtr & file_provider_ { } +void BlobStore::registerPaths() +{ + for (const auto & path : delegator->listPaths()) + { + Poco::File store_path(path); + if (!store_path.exists()) + { + continue; + } + + std::vector file_list; + store_path.list(file_list); + + for (const auto & blob_name : file_list) + { + const auto & [blob_id, err_msg] = BlobStats::getBlobIdFromName(blob_name); + auto lock_stats = blob_stats.lock(); + if (blob_id != INVALID_BLOBFILE_ID) + { + Poco::File blob(fmt::format("{}/{}", path, blob_name)); + delegator->addPageFileUsedSize({blob_id, 0}, blob.getSize(), path, true); + blob_stats.createStatNotChecking(blob_id, lock_stats); + } + else + { + LOG_FMT_INFO(log, "Ignore not blob file [dir={}] [file={}] [err_msg={}]", path, blob_name, err_msg); + } + } + } +} + PageEntriesEdit BlobStore::write(DB::WriteBatch & wb, const WriteLimiterPtr & write_limiter) { ProfileEvents::increment(ProfileEvents::PSMWritePages, wb.putWriteCount()); @@ -244,7 +269,6 @@ void BlobStore::remove(const PageEntriesV3 & del_entries) for (const auto & blob_id : blob_updated) { const auto & stat = blob_stats.blobIdToStat(blob_id, - /*restore_if_not_exist*/ false, /*ignore_not_exist*/ true); // Some of blob may been removed. @@ -315,11 +339,16 @@ std::pair BlobStore::getPosFromStats(size_t size) void BlobStore::removePosFromStats(BlobFileId blob_id, BlobFileOffset offset, size_t size) { + bool need_remove_stat = false; const auto & stat = blob_stats.blobIdToStat(blob_id); - auto lock = stat->lock(); - stat->removePosFromStat(offset, size, lock); + { + auto lock = stat->lock(); + need_remove_stat = stat->removePosFromStat(offset, size, lock); + } - if (stat->isReadOnly() && stat->sm_valid_size == 0) + // We don't need hold the BlobStat lock(Also can't do that). + // Because once BlobStat become Read-Only type, Then valid size won't increase. + if (need_remove_stat) { LOG_FMT_INFO(log, "Removing BlobFile [blob_id={}]", blob_id); auto lock_stats = blob_stats.lock(); @@ -878,10 +907,42 @@ BlobStore::BlobStats::BlobStats(LoggerPtr log_, PSDiskDelegatorPtr delegator_, B void BlobStore::BlobStats::restoreByEntry(const PageEntryV3 & entry) { - auto stat = blobIdToStat(entry.file_id, /*restore_if_not_exist=*/true); + auto stat = blobIdToStat(entry.file_id); stat->restoreSpaceMap(entry.offset, entry.size); } +std::pair BlobStore::BlobStats::getBlobIdFromName(String blob_name) +{ + String err_msg; + if (!startsWith(blob_name, BlobFile::BLOB_PREFIX_NAME)) + { + return {INVALID_BLOBFILE_ID, err_msg}; + } + + Strings ss; + boost::split(ss, blob_name, boost::is_any_of("_")); + + if (ss.size() != 2) + { + return {INVALID_BLOBFILE_ID, err_msg}; + } + + try + { + const auto & blob_id = std::stoull(ss[1]); + return {blob_id, err_msg}; + } + catch (std::invalid_argument & e) + { + err_msg = e.what(); + } + catch (std::out_of_range & e) + { + err_msg = e.what(); + } + return {INVALID_BLOBFILE_ID, err_msg}; +} + std::set BlobStore::BlobStats::getBlobIdsFromDisk(String path) const { std::set blob_ids_on_disk; @@ -892,43 +953,20 @@ std::set BlobStore::BlobStats::getBlobIdsFromDisk(String path) const return blob_ids_on_disk; } - std::vector file_list; store_path.list(file_list); for (const auto & blob_name : file_list) { - if (!startsWith(blob_name, BlobFile::BLOB_PREFIX_NAME)) - { - LOG_FMT_INFO(log, "Ignore not blob file [dir={}] [file={}]", path, blob_name); - continue; - } - - Strings ss; - boost::split(ss, blob_name, boost::is_any_of("_")); - - if (ss.size() != 2) - { - LOG_FMT_INFO(log, "Ignore unrecognized blob file [dir={}] [file={}]", path, blob_name); - continue; - } - - String err_msg; - try + const auto & [blob_id, err_msg] = getBlobIdFromName(blob_name); + if (blob_id != INVALID_BLOBFILE_ID) { - const auto & blob_id = std::stoull(ss[1]); blob_ids_on_disk.insert(blob_id); - continue; // continue to handle next file - } - catch (std::invalid_argument & e) - { - err_msg = e.what(); } - catch (std::out_of_range & e) + else { - err_msg = e.what(); + LOG_FMT_INFO(log, "Ignore not blob file [dir={}] [file={}] [err_msg={}]", path, blob_name, err_msg); } - LOG_FMT_INFO(log, "Ignore unrecognized blob file [dir={}] [file={}] [err={}]", path, blob_name, err_msg); } return blob_ids_on_disk; @@ -940,85 +978,10 @@ void BlobStore::BlobStats::restore() for (auto & [path, stats] : stats_map) { - std::set blob_ids_in_stats; for (const auto & stat : stats) { stat->recalculateSpaceMap(); max_restored_file_id = std::max(stat->id, max_restored_file_id); - blob_ids_in_stats.insert(stat->id); - } - - // If a BlobFile on disk with a valid rate of 0 (but has not been deleted because of some reason), - // then it won't be restored to stats. But we should check and clean up if such files exist. - - std::set blob_ids_on_disk = getBlobIdsFromDisk(path); - - if (blob_ids_on_disk.size() < blob_ids_in_stats.size()) - { - FmtBuffer fmt_buf; - fmt_buf.fmtAppend( - "Some of Blob are missing in disk.[path={}] [stats ids: ", - path); - - fmt_buf.joinStr( - blob_ids_in_stats.begin(), - blob_ids_in_stats.end(), - [](const auto arg, FmtBuffer & fb) { - fb.fmtAppend("{}", arg); - }, - ", "); - - fmt_buf.append("]"); - - throw Exception(fmt_buf.toString(), - ErrorCodes::LOGICAL_ERROR); - } - - if constexpr (CHECK_STATS_ALL_IN_DISK) - { - std::vector blob_ids_on_disk_not_in_stats(blob_ids_in_stats.size()); - auto last_check_it = std::set_difference(blob_ids_in_stats.begin(), - blob_ids_in_stats.end(), - blob_ids_on_disk.begin(), - blob_ids_on_disk.end(), - blob_ids_on_disk_not_in_stats.begin()); - - if (last_check_it != blob_ids_on_disk_not_in_stats.begin()) - { - FmtBuffer fmt_buf; - fmt_buf.fmtAppend( - "Some of Blob are missing in disk.[path={}] [stats ids: ", - path); - - fmt_buf.joinStr( - blob_ids_in_stats.begin(), - blob_ids_in_stats.end(), - [](const auto arg, FmtBuffer & fb) { - fb.fmtAppend("{}", arg); - }, - ", "); - - fmt_buf.append("]"); - - throw Exception(fmt_buf.toString(), - ErrorCodes::LOGICAL_ERROR); - } - } - - std::vector invalid_blob_ids; - - std::set_difference(blob_ids_on_disk.begin(), - blob_ids_on_disk.end(), - blob_ids_in_stats.begin(), - blob_ids_in_stats.end(), - std::back_inserter(invalid_blob_ids)); - - for (const auto & invalid_blob_id : invalid_blob_ids) - { - const auto & invalid_blob_path = fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, invalid_blob_id); - LOG_FMT_INFO(log, "Remove invalid blob file [file={}]", invalid_blob_path); - Poco::File invalid_blob(invalid_blob_path); - invalid_blob.remove(); } } @@ -1175,7 +1138,7 @@ std::pair BlobStore::BlobStats::chooseStat(size_t buf_s return std::make_pair(stat_ptr, INVALID_BLOBFILE_ID); } -BlobStatPtr BlobStore::BlobStats::blobIdToStat(BlobFileId file_id, bool restore_if_not_exist, bool ignore_not_exist) +BlobStatPtr BlobStore::BlobStats::blobIdToStat(BlobFileId file_id, bool ignore_not_exist) { auto guard = lock(); for (const auto & [path, stats] : stats_map) @@ -1190,12 +1153,6 @@ BlobStatPtr BlobStore::BlobStats::blobIdToStat(BlobFileId file_id, bool restore_ } } - if (restore_if_not_exist) - { - // Restore a stat without checking file_id exist or not and won't push forward the roll_id - return createStatNotChecking(file_id, guard); - } - if (!ignore_not_exist) { throw Exception(fmt::format("Can't find BlobStat with [blob_id={}]", @@ -1246,7 +1203,7 @@ BlobFileOffset BlobStore::BlobStats::BlobStat::getPosFromStat(size_t buf_size, c return offset; } -void BlobStore::BlobStats::BlobStat::removePosFromStat(BlobFileOffset offset, size_t buf_size, const std::lock_guard &) +bool BlobStore::BlobStats::BlobStat::removePosFromStat(BlobFileOffset offset, size_t buf_size, const std::lock_guard &) { if (!smap->markFree(offset, buf_size)) { @@ -1260,6 +1217,7 @@ void BlobStore::BlobStats::BlobStat::removePosFromStat(BlobFileOffset offset, si sm_valid_size -= buf_size; sm_valid_rate = sm_valid_size * 1.0 / sm_total_size; + return (isReadOnly() && sm_valid_size == 0); } void BlobStore::BlobStats::BlobStat::restoreSpaceMap(BlobFileOffset offset, size_t buf_size) diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index c91ba90177e..bd25542b23b 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -114,7 +114,7 @@ class BlobStore : private Allocator BlobFileOffset getPosFromStat(size_t buf_size, const std::lock_guard &); - void removePosFromStat(BlobFileOffset offset, size_t buf_size, const std::lock_guard &); + bool removePosFromStat(BlobFileOffset offset, size_t buf_size, const std::lock_guard &); /** * This method is only used when blobstore restore @@ -140,6 +140,16 @@ class BlobStore : private Allocator public: BlobStats(LoggerPtr log_, PSDiskDelegatorPtr delegator_, BlobStore::Config config); + // Don't require a lock from BlobStats When you already hold a BlobStat lock + // + // Safe options: + // 1. Hold a BlobStats lock, then Hold a/many BlobStat lock(s). + // 2. Without hold a BlobStats lock, But hold a/many BlobStat lock(s). + // 3. Hold a BlobStats lock, without hold a/many BlobStat lock(s). + // + // Not safe options: + // 1. then Hold a/many BlobStat lock(s), then a BlobStats lock. + // [[nodiscard]] std::lock_guard lock() const; BlobStatPtr createStatNotChecking(BlobFileId blob_file_id, const std::lock_guard &); @@ -166,7 +176,7 @@ class BlobStore : private Allocator */ std::pair chooseStat(size_t buf_size, const std::lock_guard &); - BlobStatPtr blobIdToStat(BlobFileId file_id, bool restore_if_not_exist = false, bool ignore_not_exist = false); + BlobStatPtr blobIdToStat(BlobFileId file_id, bool ignore_not_exist = false); std::map> getStats() const { @@ -174,13 +184,15 @@ class BlobStore : private Allocator return stats_map; } + std::set getBlobIdsFromDisk(String path) const; + + static std::pair getBlobIdFromName(String blob_name); #ifndef DBMS_PUBLIC_GTEST private: #endif void restoreByEntry(const PageEntryV3 & entry); void restore(); - std::set getBlobIdsFromDisk(String path) const; friend class PageDirectoryFactory; #ifndef DBMS_PUBLIC_GTEST @@ -199,6 +211,8 @@ class BlobStore : private Allocator BlobStore(String storage_name, const FileProviderPtr & file_provider_, PSDiskDelegatorPtr delegator_, BlobStore::Config config); + void registerPaths(); + std::vector getGCStats(); PageEntriesEdit gc(std::map & entries_need_gc, diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp index cefb20a4736..9ee0616c987 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp @@ -47,6 +47,8 @@ void PageStorageImpl::restore() { // TODO: clean up blobstore. // TODO: Speedup restoring + blob_store.registerPaths(); + PageDirectoryFactory factory; page_directory = factory .setBlobStore(blob_store) diff --git a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp index 3db3b53dd2b..22c81cc76f3 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp @@ -70,10 +70,11 @@ try BlobFileId file_id1 = 10; BlobFileId file_id2 = 12; - const auto & path = getTemporaryPath(); - createIfNotExist(path); - Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id1)).createFile(); - Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id2)).createFile(); + { + const auto & lock = stats.lock(); + stats.createStatNotChecking(file_id1, lock); + stats.createStatNotChecking(file_id2, lock); + } { stats.restoreByEntry(PageEntryV3{ @@ -294,6 +295,7 @@ try createIfNotExist(path); Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id1)).createFile(); Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id2)).createFile(); + blob_store.registerPaths(); { blob_store.blob_stats.restoreByEntry(PageEntryV3{ @@ -388,6 +390,7 @@ try }; auto restore_blobs = [](BlobStore & blob_store, std::vector blob_ids) { + blob_store.registerPaths(); for (const auto & id : blob_ids) { blob_store.blob_stats.restoreByEntry(PageEntryV3{ @@ -481,15 +484,7 @@ try ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); auto blob_store_check = BlobStore(getCurrentTestName(), file_provider, delegator, config); - restore_blobs(blob_store_check, {4}); - ASSERT_THROW(blob_store_check.blob_stats.restore(), DB::Exception); - // Won't remove blob if exception happened. - ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); - - auto blob_store_check2 = BlobStore(getCurrentTestName(), file_provider, delegator, config); - restore_blobs(blob_store_check2, {1, 2, 3, 4}); - ASSERT_THROW(blob_store_check2.blob_stats.restore(), DB::Exception); - ASSERT_TRUE(check_in_disk_file(test_path, {1, 2, 3})); + ASSERT_THROW(restore_blobs(blob_store_check, {4}), DB::Exception); } } CATCH diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 16c2140964b..ae149fbf69b 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -1972,8 +1972,8 @@ try PageEntryV3 entry_5_v2{.file_id = file_id2, .size = 255, .tag = 0, .offset = 0x400, .checksum = 0x4567}; { PageEntriesEdit edit; - edit.put(file_id1, entry_1_v1); - edit.put(file_id2, entry_5_v1); + edit.put(1, entry_1_v1); + edit.put(5, entry_5_v1); dir->apply(std::move(edit)); } { @@ -1999,6 +1999,11 @@ try auto path = getTemporaryPath(); PSDiskDelegatorPtr delegator = std::make_shared(path); BlobStore::BlobStats stats(log, delegator, BlobStore::Config{}); + { + const auto & lock = stats.lock(); + stats.createStatNotChecking(file_id1, lock); + stats.createStatNotChecking(file_id2, lock); + } auto restored_dir = restore_from_edit(edit, stats); auto temp_snap = restored_dir->createSnapshot(); EXPECT_SAME_ENTRY(entry_1_v1, restored_dir->get(2, temp_snap).second); @@ -2006,9 +2011,9 @@ try EXPECT_SAME_ENTRY(entry_5_v2, restored_dir->get(5, temp_snap).second); // The entry_1_v1 should be restored to stats - auto stat_for_file_1 = stats.blobIdToStat(file_id1, false, false); + auto stat_for_file_1 = stats.blobIdToStat(file_id1, /*ignore_not_exist*/ false); EXPECT_TRUE(stat_for_file_1->smap->isMarkUsed(entry_1_v1.offset, entry_1_v1.size)); - auto stat_for_file_5 = stats.blobIdToStat(file_id2, false, false); + auto stat_for_file_5 = stats.blobIdToStat(file_id2, /*ignore_not_exist*/ false); // entry_5_v1 should not be restored to stats EXPECT_FALSE(stat_for_file_5->smap->isMarkUsed(entry_5_v1.offset, entry_5_v1.size)); EXPECT_TRUE(stat_for_file_5->smap->isMarkUsed(entry_5_v2.offset, entry_5_v2.size)); From 782506551bb82dd043ef8f0443bd8f3cc69b3aa0 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Wed, 13 Apr 2022 12:42:35 +0800 Subject: [PATCH 03/19] *:Fix MppInfo saving reference to temporary object. (#4638) close pingcap/tiflash#4476 --- dbms/src/Debug/astToExecutor.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/astToExecutor.h index 4f1b262e76d..0de229bccfa 100644 --- a/dbms/src/Debug/astToExecutor.h +++ b/dbms/src/Debug/astToExecutor.h @@ -66,9 +66,15 @@ struct MPPInfo Timestamp start_ts; Int64 partition_id; Int64 task_id; - const std::vector & sender_target_task_ids; - const std::unordered_map> & receiver_source_task_ids_map; - MPPInfo(Timestamp start_ts_, Int64 partition_id_, Int64 task_id_, const std::vector & sender_target_task_ids_, const std::unordered_map> & receiver_source_task_ids_map_) + const std::vector sender_target_task_ids; + const std::unordered_map> receiver_source_task_ids_map; + + MPPInfo( + Timestamp start_ts_, + Int64 partition_id_, + Int64 task_id_, + const std::vector & sender_target_task_ids_, + const std::unordered_map> & receiver_source_task_ids_map_) : start_ts(start_ts_) , partition_id(partition_id_) , task_id(task_id_) From 65247ec061130f2026fe23959167c836563d4511 Mon Sep 17 00:00:00 2001 From: jinhelin Date: Wed, 13 Apr 2022 20:36:35 +0800 Subject: [PATCH 04/19] Fix logger initialization of DTWorkload. (#4637) close pingcap/tiflash#4636 --- .../DeltaMerge/tools/workload/MainEntry.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp index 14635c3feac..9730a44c5c9 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp +++ b/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,18 @@ using namespace DB::DM::tests; std::ofstream log_ofs; +void initWorkDirs(const std::vector & dirs) +{ + for (const auto & dir : dirs) + { + int ret = ::mkdir(dir.c_str(), 0777); + if (ret != 0 && errno != EEXIST) + { + throw std::runtime_error(fmt::format("mkdir {} failed: {}", dir, strerror(errno))); + } + } +} + void init(WorkloadOptions & opts) { log_ofs.open(opts.log_file, std::ios_base::out | std::ios_base::app); @@ -248,6 +261,9 @@ int DTWorkload::mainEntry(int argc, char ** argv) return -1; } + // Log file is created in the first directory of `opts.work_dirs` by default. + // So create these work_dirs before logger initialization. + initWorkDirs(opts.work_dirs); // need to init logger before creating global context, // or the logging in global context won't be output to // the log file From e192ce5db1e55b6f14d8a3346327b5a8b245b9e3 Mon Sep 17 00:00:00 2001 From: SeaRise Date: Thu, 14 Apr 2022 00:16:35 +0800 Subject: [PATCH 05/19] move `output_field_types` and `output_offsets` to `DAGContext` (#4626) ref pingcap/tiflash#4118 --- dbms/src/Flash/Coprocessor/DAGContext.cpp | 20 +++++++++++++++++ dbms/src/Flash/Coprocessor/DAGContext.h | 22 ++++++++++++++++--- dbms/src/Flash/Coprocessor/DAGQueryBlock.h | 4 ---- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 11 ++++------ .../Coprocessor/DAGQueryBlockInterpreter.h | 2 -- dbms/src/Flash/Coprocessor/DAGQuerySource.cpp | 21 ------------------ dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 1 - 7 files changed, 43 insertions(+), 38 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp index a38eeef3145..1f6618d3170 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.cpp +++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp @@ -14,6 +14,8 @@ #include #include +#include +#include #include #include #include @@ -33,6 +35,24 @@ bool strictSqlMode(UInt64 sql_mode) return sql_mode & TiDBSQLMode::STRICT_ALL_TABLES || sql_mode & TiDBSQLMode::STRICT_TRANS_TABLES; } +void DAGContext::initOutputInfo() +{ + output_field_types = collectOutputFieldTypes(*dag_request); + output_offsets.clear(); + result_field_types.clear(); + for (UInt32 i : dag_request->output_offsets()) + { + output_offsets.push_back(i); + if (unlikely(i >= output_field_types.size())) + throw TiFlashException( + fmt::format("{}: Invalid output offset(schema has {} columns, access index {}", __PRETTY_FUNCTION__, output_field_types.size(), i), + Errors::Coprocessor::BadRequest); + result_field_types.push_back(output_field_types[i]); + } + encode_type = analyzeDAGEncodeType(*this); + keep_session_timezone_info = encode_type == tipb::EncodeType::TypeChunk || encode_type == tipb::EncodeType::TypeCHBlock; +} + bool DAGContext::allowZeroInDate() const { return flags & TiDBSQLFlags::IGNORE_ZERO_IN_DATE; diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index b1c92a9035e..30397dc496a 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -25,8 +25,8 @@ #include #include +#include #include -#include #include #include #include @@ -112,6 +112,7 @@ constexpr UInt64 ALLOW_INVALID_DATES = 1ul << 32ul; class DAGContext { public: + // for non-mpp(cop/batchCop) explicit DAGContext(const tipb::DAGRequest & dag_request_) : dag_request(&dag_request_) , collect_execution_summaries(dag_request->has_collect_execution_summaries() && dag_request->collect_execution_summaries()) @@ -126,8 +127,11 @@ class DAGContext { assert(dag_request->has_root_executor() || dag_request->executors_size() > 0); return_executor_id = dag_request->root_executor().has_executor_id() || dag_request->executors(0).has_executor_id(); + + initOutputInfo(); } + // for mpp DAGContext(const tipb::DAGRequest & dag_request_, const mpp::TaskMeta & meta_, bool is_root_mpp_task_) : dag_request(&dag_request_) , collect_execution_summaries(dag_request->has_collect_execution_summaries() && dag_request->collect_execution_summaries()) @@ -144,8 +148,13 @@ class DAGContext , warning_count(0) { assert(dag_request->has_root_executor() && dag_request->root_executor().has_executor_id()); + + // only mpp task has join executor. + initExecutorIdToJoinIdMap(); + initOutputInfo(); } + // for test explicit DAGContext(UInt64 max_error_count_) : dag_request(nullptr) , collect_execution_summaries(false) @@ -162,7 +171,6 @@ class DAGContext void attachBlockIO(const BlockIO & io_); std::unordered_map & getProfileStreamsMap(); - void initExecutorIdToJoinIdMap(); std::unordered_map> & getExecutorIdToJoinIdMap(); std::unordered_map & getJoinExecuteInfoMap(); @@ -291,9 +299,17 @@ class DAGContext LoggerPtr log; - bool keep_session_timezone_info = false; + // initialized in `initOutputInfo`. std::vector result_field_types; tipb::EncodeType encode_type = tipb::EncodeType::TypeDefault; + // only meaningful in final projection. + bool keep_session_timezone_info = false; + std::vector output_field_types; + std::vector output_offsets; + +private: + void initExecutorIdToJoinIdMap(); + void initOutputInfo(); private: /// Hold io for correcting the destruction order. diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h index 6ad35bc63be..486345efa03 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h @@ -68,10 +68,6 @@ class DAGQueryBlock String qb_column_prefix; std::vector> children; - // only meaningful for root query block. - std::vector output_field_types; - std::vector output_offsets; - bool isRootQueryBlock() const { return id == 1; }; bool isTableScanSource() const { return source->tp() == tipb::ExecType::TypeTableScan || source->tp() == tipb::ExecType::TypePartitionTableScan; } }; diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 6c3c6700577..51cd1bf671f 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -57,12 +57,10 @@ DAGQueryBlockInterpreter::DAGQueryBlockInterpreter( const std::vector & input_streams_vec_, const DAGQueryBlock & query_block_, size_t max_streams_, - bool keep_session_timezone_info_, std::vector & subqueries_for_sets_) : context(context_) , input_streams_vec(input_streams_vec_) , query_block(query_block_) - , keep_session_timezone_info(keep_session_timezone_info_) , max_streams(max_streams_) , subqueries_for_sets(subqueries_for_sets_) , log(Logger::get("DAGQueryBlockInterpreter", dagContext().log ? dagContext().log->identifier() : "")) @@ -118,7 +116,6 @@ AnalysisResult analyzeExpressions( Context & context, DAGExpressionAnalyzer & analyzer, const DAGQueryBlock & query_block, - bool keep_session_timezone_info, NamesWithAliases & final_project) { AnalysisResult res; @@ -174,14 +171,15 @@ AnalysisResult analyzeExpressions( res.order_columns = analyzer.appendOrderBy(chain, query_block.limit_or_topn->topn()); } + const auto & dag_context = *context.getDAGContext(); // Append final project results if needed. final_project = query_block.isRootQueryBlock() ? analyzer.appendFinalProjectForRootQueryBlock( chain, - query_block.output_field_types, - query_block.output_offsets, + dag_context.output_field_types, + dag_context.output_offsets, query_block.qb_column_prefix, - keep_session_timezone_info) + dag_context.keep_session_timezone_info) : analyzer.appendFinalProjectForNonRootQueryBlock( chain, query_block.qb_column_prefix); @@ -1057,7 +1055,6 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) context, *analyzer, query_block, - keep_session_timezone_info, final_project); if (res.before_where) diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 5325b76eec6..35627cd19ee 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -48,7 +48,6 @@ class DAGQueryBlockInterpreter const std::vector & input_streams_vec_, const DAGQueryBlock & query_block_, size_t max_streams_, - bool keep_session_timezone_info_, std::vector & subqueries_for_sets_); ~DAGQueryBlockInterpreter() = default; @@ -110,7 +109,6 @@ class DAGQueryBlockInterpreter Context & context; std::vector input_streams_vec; const DAGQueryBlock & query_block; - const bool keep_session_timezone_info; NamesWithAliases final_project; diff --git a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp index 72d93f86e85..882699e1599 100644 --- a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp @@ -20,11 +20,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int COP_BAD_DAG_REQUEST; -} // namespace ErrorCodes - DAGQuerySource::DAGQuerySource(Context & context_) : context(context_) { @@ -38,22 +33,6 @@ DAGQuerySource::DAGQuerySource(Context & context_) { root_query_block = std::make_shared(1, dag_request.executors()); } - - root_query_block->output_field_types = collectOutputFieldTypes(dag_request); - getDAGContext().initExecutorIdToJoinIdMap(); - - for (UInt32 i : dag_request.output_offsets()) - { - root_query_block->output_offsets.push_back(i); - if (unlikely(i >= root_query_block->output_field_types.size())) - throw TiFlashException( - fmt::format("{}: Invalid output offset(schema has {} columns, access index {}", __PRETTY_FUNCTION__, root_query_block->output_field_types.size(), i), - Errors::Coprocessor::BadRequest); - getDAGContext().result_field_types.push_back(root_query_block->output_field_types[i]); - } - auto encode_type = analyzeDAGEncodeType(getDAGContext()); - getDAGContext().encode_type = encode_type; - getDAGContext().keep_session_timezone_info = encode_type == tipb::EncodeType::TypeChunk || encode_type == tipb::EncodeType::TypeCHBlock; } std::tuple DAGQuerySource::parse(size_t) diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index d3c23fe2e16..1bfe87e5695 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -51,7 +51,6 @@ BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block, input_streams_vec, query_block, max_streams, - dagContext().keep_session_timezone_info || !query_block.isRootQueryBlock(), subqueries_for_sets); return query_block_interpreter.execute(); } From e41c545a6daf17c80010ae3baa2bab2e8b3b730a Mon Sep 17 00:00:00 2001 From: Fu Zhe Date: Thu, 14 Apr 2022 11:58:36 +0800 Subject: [PATCH 06/19] Fix potential data race in DynamicThreadPool (#4648) close pingcap/tiflash#4595 --- dbms/src/Common/DynamicThreadPool.cpp | 34 +++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/dbms/src/Common/DynamicThreadPool.cpp b/dbms/src/Common/DynamicThreadPool.cpp index 482761a8bb8..5b154f8e4fd 100644 --- a/dbms/src/Common/DynamicThreadPool.cpp +++ b/dbms/src/Common/DynamicThreadPool.cpp @@ -120,26 +120,30 @@ void DynamicThreadPool::fixedWork(size_t index) void DynamicThreadPool::dynamicWork(TaskPtr initial_task) { - UPDATE_CUR_AND_MAX_METRIC(tiflash_thread_count, type_total_threads_of_thdpool, type_max_threads_of_thdpool); - executeTask(initial_task); - - DynamicNode node; - while (true) { + UPDATE_CUR_AND_MAX_METRIC(tiflash_thread_count, type_total_threads_of_thdpool, type_max_threads_of_thdpool); + executeTask(initial_task); + + DynamicNode node; + while (true) { - std::unique_lock lock(dynamic_mutex); - if (in_destructing) + { + std::unique_lock lock(dynamic_mutex); + if (in_destructing) + break; + // attach to just after head to reuse hot threads so that cold threads have chance to exit + node.appendTo(&dynamic_idle_head); + node.cv.wait_for(lock, dynamic_auto_shrink_cooldown); + node.detach(); + } + + if (!node.task) // may be timeout or cancelled break; - // attach to just after head to reuse hot threads so that cold threads have chance to exit - node.appendTo(&dynamic_idle_head); - node.cv.wait_for(lock, dynamic_auto_shrink_cooldown); - node.detach(); + executeTask(node.task); } - - if (!node.task) // may be timeout or cancelled - break; - executeTask(node.task); } + // must decrease counter after scope of `UPDATE_CUR_AND_MAX_METRIC` + // to avoid potential data race (#4595) alive_dynamic_threads.fetch_sub(1); } From 51dd32f4d98b38e8bb4b4192cce3803ff18eb57f Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger <60744015+Lloyd-Pottiger@users.noreply.github.com> Date: Thu, 14 Apr 2022 13:10:35 +0800 Subject: [PATCH 07/19] Fix create table error (#4630) close pingcap/tiflash#4596 --- dbms/src/Databases/test/gtest_database.cpp | 120 ++++++++++++++++++++- dbms/src/Storages/Transaction/TiDB.cpp | 7 +- dbms/src/Storages/Transaction/TiDB.h | 24 ++--- 3 files changed, 133 insertions(+), 18 deletions(-) diff --git a/dbms/src/Databases/test/gtest_database.cpp b/dbms/src/Databases/test/gtest_database.cpp index 149420f09e5..72915b8644f 100644 --- a/dbms/src/Databases/test/gtest_database.cpp +++ b/dbms/src/Databases/test/gtest_database.cpp @@ -94,7 +94,7 @@ class DatabaseTiFlashTest : public ::testing::Test } } - void recreateMetadataPath() const + static void recreateMetadataPath() { String path = TiFlashTestEnv::getContext().getPath(); @@ -652,6 +652,118 @@ try } CATCH +TEST_F(DatabaseTiFlashTest, ISSUE4596) +try +{ + const String db_name = "db_1"; + auto ctx = TiFlashTestEnv::getContext(); + + { + // Create database + const String statement = "CREATE DATABASE " + db_name + " ENGINE=TiFlash"; + ASTPtr ast = parseCreateStatement(statement); + InterpreterCreateQuery interpreter(ast, ctx); + interpreter.setInternal(true); + interpreter.setForceRestoreData(false); + interpreter.execute(); + } + + auto db = ctx.getDatabase(db_name); + + const String tbl_name = "t_111"; + { + /// Create table + ParserCreateQuery parser; + const String stmt = fmt::format("CREATE TABLE `{}`.`{}` ", db_name, tbl_name) + + R"stmt( + (`id` Int32,`b` String) Engine = DeltaMerge((`id`), + '{ + "cols":[{ + "comment":"", + "default":null, + "default_bit":null, + "id":1, + "name":{ + "L":"id", + "O":"id" + }, + "offset":0, + "origin_default":null, + "state":5, + "type":{ + "Charset":"binary", + "Collate":"binary", + "Decimal":0, + "Elems":null, + "Flag":515, + "Flen":16, + "Tp":3 + } + }, + { + "comment":"", + "default":"", + "default_bit":null, + "id":15, + "name":{ + "L":"b", + "O":"b" + }, + "offset":12, + "origin_default":"", + "state":5, + "type":{ + "Charset":"binary", + "Collate":"binary", + "Decimal":0, + "Elems":null, + "Flag":4225, + "Flen":-1, + "Tp":251 + } + }], + "comment":"", + "id":330, + "index_info":[], + "is_common_handle":false, + "name":{ + "L":"test", + "O":"test" + }, + "partition":null, + "pk_is_handle":true, + "schema_version":465, + "state":5, + "update_timestamp":99999 + }' + ) + )stmt"; + ASTPtr ast = parseQuery(parser, stmt, 0); + + InterpreterCreateQuery interpreter(ast, ctx); + interpreter.setInternal(true); + interpreter.setForceRestoreData(false); + interpreter.execute(); + } + + EXPECT_FALSE(db->empty(ctx)); + EXPECT_TRUE(db->isTableExist(ctx, tbl_name)); + + { + // Get storage from database + auto storage = db->tryGetTable(ctx, tbl_name); + ASSERT_NE(storage, nullptr); + + EXPECT_EQ(storage->getName(), MutableSupport::delta_tree_storage_name); + EXPECT_EQ(storage->getTableName(), tbl_name); + + auto managed_storage = std::dynamic_pointer_cast(storage); + EXPECT_EQ(managed_storage->getDatabaseName(), db_name); + EXPECT_EQ(managed_storage->getTableInfo().name, "test"); + } +} +CATCH + TEST_F(DatabaseTiFlashTest, ISSUE1055) try { @@ -688,7 +800,7 @@ try DatabaseLoading::loadTable(ctx, *db, meta_path, db_name, db_data_path, "TiFlash", "t_45.sql", false); // Get storage from database - const auto tbl_name = "t_45"; + const auto * tbl_name = "t_45"; auto storage = db->tryGetTable(ctx, tbl_name); ASSERT_NE(storage, nullptr); EXPECT_EQ(storage->getName(), MutableSupport::delta_tree_storage_name); @@ -776,7 +888,7 @@ try auto db = ctx.getDatabase(name_mapper.mapDatabaseName(*db_info)); ASSERT_NE(db, nullptr); EXPECT_EQ(db->getEngineName(), "TiFlash"); - auto flash_db = typeid_cast(db.get()); + auto * flash_db = typeid_cast(db.get()); auto & db_info_get = flash_db->getDatabaseInfo(); ASSERT_EQ(db_info_get.name, expect_name); } @@ -841,7 +953,7 @@ try )", }; - for (auto & statement : statements) + for (const auto & statement : statements) { { // Cleanup: Drop database if exists diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index 580850de08a..763dcac39fc 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -155,10 +155,13 @@ Field ColumnInfo::defaultValueToField() const auto v = value.convert(); if (hasBinaryFlag()) { - // For binary column, we have to pad trailing zeros according to the specified type length. + // For some binary column(like varchar(20)), we have to pad trailing zeros according to the specified type length. // User may define default value `0x1234` for a `BINARY(4)` column, TiDB stores it in a string "\u12\u34" (sized 2). // But it actually means `0x12340000`. - v.append(flen - v.length(), '\0'); + // And for some binary column(like longblob), we do not need to pad trailing zeros. + // And the `Flen` is set to -1, therefore we need to check `Flen >= 0` here. + if (Int32 vlen = v.length(); flen >= 0 && vlen < flen) + v.append(flen - vlen, '\0'); } return v; } diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index 3ea573c0972..f67bfb332c7 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -91,7 +91,7 @@ enum TP #ifdef M #error "Please undefine macro M first." #endif -#define M(tt, v, cf, ct, w) Type##tt = v, +#define M(tt, v, cf, ct, w) Type##tt = (v), COLUMN_TYPES(M) #undef M }; @@ -123,7 +123,7 @@ enum ColumnFlag #ifdef M #error "Please undefine macro M first." #endif -#define M(cf, v) ColumnFlag##cf = v, +#define M(cf, v) ColumnFlag##cf = (v), COLUMN_FLAGS(M) #undef M }; @@ -152,7 +152,7 @@ enum CodecFlag #ifdef M #error "Please undefine macro M first." #endif -#define M(cf, v) CodecFlag##cf = v, +#define M(cf, v) CodecFlag##cf = (v), CODEC_FLAGS(M) #undef M }; @@ -197,10 +197,10 @@ struct ColumnInfo #ifdef M #error "Please undefine macro M first." #endif -#define M(f, v) \ - inline bool has##f##Flag() const { return (flag & v) != 0; } \ - inline void set##f##Flag() { flag |= v; } \ - inline void clear##f##Flag() { flag &= (~v); } +#define M(f, v) \ + inline bool has##f##Flag() const { return (flag & (v)) != 0; } \ + inline void set##f##Flag() { flag |= (v); } \ + inline void clear##f##Flag() { flag &= (~(v)); } COLUMN_FLAGS(M) #undef M @@ -225,7 +225,7 @@ struct PartitionDefinition { PartitionDefinition() = default; - PartitionDefinition(Poco::JSON::Object::Ptr json); + explicit PartitionDefinition(Poco::JSON::Object::Ptr json); Poco::JSON::Object::Ptr getJSONObject() const; @@ -241,7 +241,7 @@ struct PartitionInfo { PartitionInfo() = default; - PartitionInfo(Poco::JSON::Object::Ptr json); + explicit PartitionInfo(Poco::JSON::Object::Ptr json); Poco::JSON::Object::Ptr getJSONObject() const; @@ -264,7 +264,7 @@ struct DBInfo SchemaState state; DBInfo() = default; - DBInfo(const String & json) { deserialize(json); } + explicit DBInfo(const String & json) { deserialize(json); } String serialize() const; @@ -375,9 +375,9 @@ struct TableInfo ::TiDB::StorageEngine engine_type = ::TiDB::StorageEngine::UNSPECIFIED; ColumnID getColumnID(const String & name) const; - String getColumnName(const ColumnID id) const; + String getColumnName(ColumnID id) const; - const ColumnInfo & getColumnInfo(const ColumnID id) const; + const ColumnInfo & getColumnInfo(ColumnID id) const; std::optional> getPKHandleColumn() const; From 402e4779df81abb316d7196b2ac297f1d671e8dc Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Thu, 14 Apr 2022 13:46:36 +0800 Subject: [PATCH 08/19] Add a pagestorage v3 controller. (#4320) ref pingcap/tiflash#3594 --- dbms/src/Storages/Page/V3/BlobStore.cpp | 6 +- dbms/src/Storages/Page/V3/BlobStore.h | 2 + dbms/src/Storages/Page/V3/PageDirectory.h | 2 + dbms/src/Storages/Page/V3/PageStorageImpl.h | 1 + .../Storages/Page/V3/spacemap/SpaceMap.cpp | 5 +- dbms/src/Storages/Page/V3/spacemap/SpaceMap.h | 10 +- .../Page/V3/spacemap/SpaceMapRBTree.cpp | 24 +- .../Page/V3/spacemap/SpaceMapRBTree.h | 2 +- .../Page/V3/spacemap/SpaceMapSTDMap.h | 11 +- .../src/Storages/Page/V3/tests/CMakeLists.txt | 8 +- .../Storages/Page/V3/tests/gtest_free_map.cpp | 6 +- .../Page/V3/tests/page_storage_ctl.cpp | 477 ++++++++++++++++++ dbms/src/Storages/Page/stress/PSStressEnv.cpp | 2 + dbms/src/Storages/Page/stress/PSStressEnv.h | 6 +- dbms/src/Storages/Page/stress/PSWorkload.cpp | 7 +- .../Storages/Page/stress/workload/Normal.cpp | 6 +- 16 files changed, 537 insertions(+), 38 deletions(-) create mode 100644 dbms/src/Storages/Page/V3/tests/page_storage_ctl.cpp diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 24c2d0c0df9..6919e8081bd 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -325,7 +325,7 @@ std::pair BlobStore::getPosFromStats(size_t size) // Can't insert into this spacemap if (offset == INVALID_BLOBFILE_OFFSET) { - stat->smap->logStats(); + stat->smap->logDebugString(); throw Exception(fmt::format("Get postion from BlobStat failed, it may caused by `sm_max_caps` is no correct. [size={}] [old_max_caps={}] [max_caps={}] [blob_id={}]", size, old_max_cap, @@ -1207,7 +1207,7 @@ bool BlobStore::BlobStats::BlobStat::removePosFromStat(BlobFileOffset offset, si { if (!smap->markFree(offset, buf_size)) { - smap->logStats(); + smap->logDebugString(); throw Exception(fmt::format("Remove postion from BlobStat failed, [offset={} , buf_size={}, blob_id={}] is invalid.", offset, buf_size, @@ -1224,7 +1224,7 @@ void BlobStore::BlobStats::BlobStat::restoreSpaceMap(BlobFileOffset offset, size { if (!smap->markUsed(offset, buf_size)) { - smap->logStats(); + smap->logDebugString(); throw Exception(fmt::format("Restore postion from BlobStat failed, [offset={}] [buf_size={}] [blob_id={}] is used or subspan is used", offset, buf_size, diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index bd25542b23b..a289081acab 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -269,6 +269,8 @@ class BlobStore : private Allocator BlobFilePtr getBlobFile(BlobFileId blob_id); friend class PageDirectoryFactory; + friend class PageStorageControl; + #ifndef DBMS_PUBLIC_GTEST private: #endif diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index d0cc6ffd313..4cdf51bbf91 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -252,6 +252,7 @@ class VersionedPageEntries being_ref_count, entries.size()); } + friend class PageStorageControl; private: mutable std::mutex m; @@ -365,6 +366,7 @@ class PageDirectory PageDirectory & operator=(PageDirectory && rhs) = delete; friend class PageDirectoryFactory; + friend class PageStorageControl; private: // Only `std::map` is allow for `MVCCMap`. Cause `std::map::insert` ensure that diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.h b/dbms/src/Storages/Page/V3/PageStorageImpl.h index 2db4ea9d8e9..eb1fc91b2e7 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.h +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.h @@ -82,6 +82,7 @@ class PageStorageImpl : public DB::PageStorage #endif friend class PageDirectoryFactory; + friend class PageStorageControl; #ifndef DBMS_PUBLIC_GTEST private: #endif diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp index e203418454c..7ee9e02ce48 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp @@ -59,11 +59,12 @@ bool SpaceMap::checkSpace(UInt64 offset, size_t size) const return (offset < start) || (offset > end) || (offset + size - 1 > end); } -void SpaceMap::logStats() +void SpaceMap::logDebugString() { - smapStats(); + LOG_DEBUG(log, toDebugString()); } + bool SpaceMap::markFree(UInt64 offset, size_t length) { if (checkSpace(offset, length)) diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h index f50eeee580a..e4af33c5a81 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h @@ -118,7 +118,12 @@ class SpaceMap /** * Log the status of space map */ - void logStats(); + void logDebugString(); + + /** + * return the status of space map + */ + virtual String toDebugString() = 0; SpaceMapType getType() const { @@ -143,9 +148,6 @@ class SpaceMap virtual ~SpaceMap() = default; - /* Print space maps status */ - virtual void smapStats() = 0; - // Return true if space [offset, offset+size) are all free virtual bool isMarkUnused(UInt64 offset, size_t size) = 0; diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp index 33bb1304a19..3b4c6a28099 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp @@ -464,36 +464,28 @@ void RBTreeSpaceMap::freeSmap() } } -void RBTreeSpaceMap::smapStats() +String RBTreeSpaceMap::toDebugString() { struct rb_node * node = nullptr; struct SmapRbEntry * entry; UInt64 count = 0; - UInt64 max_size = 0; - UInt64 min_size = ULONG_MAX; + FmtBuffer fmt_buffer; if (rb_tree->root.rb_node == nullptr) { - LOG_ERROR(log, "Tree have not been inited."); - return; + fmt_buffer.append("Tree have not been inited."); + return fmt_buffer.toString(); } - LOG_DEBUG(log, "RB-Tree entries status: "); + fmt_buffer.append(" RB-Tree entries status: \n"); for (node = rb_tree_first(&rb_tree->root); node != nullptr; node = rb_tree_next(node)) { entry = node_to_entry(node); - LOG_FMT_DEBUG(log, " Space: {} start: {} size: {}", count, entry->start, entry->count); + fmt_buffer.fmtAppend(" Space: {} start: {} size: {} \n", count, entry->start, entry->count); count++; - if (entry->count > max_size) - { - max_size = entry->count; - } - - if (entry->count < min_size) - { - min_size = entry->count; - } } + + return fmt_buffer.toString(); } bool RBTreeSpaceMap::isMarkUnused(UInt64 offset, size_t length) diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h index baeb6ef20b9..8c53724be7d 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h @@ -56,7 +56,7 @@ class RBTreeSpaceMap void freeSmap(); - void smapStats() override; + String toDebugString() override; bool isMarkUnused(UInt64 offset, size_t length) override; diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h index ac74fdcaaed..6e57abd43f4 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h @@ -57,16 +57,21 @@ class STDMapSpaceMap free_map.insert({start, end}); } - void smapStats() override + String toDebugString() override { UInt64 count = 0; - LOG_FMT_DEBUG(log, "STD-Map entries status: "); + FmtBuffer fmt_buffer; + fmt_buffer.append(" STD-Map entries status: \n"); + + // Need use `count`,so can't use `joinStr` here. for (auto it = free_map.begin(); it != free_map.end(); it++) { - LOG_FMT_DEBUG(log, " Space: {} start: {} size : {}", count, it->first, it->second); + fmt_buffer.fmtAppend(" Space: {} start: {} size : {}\n", count, it->first, it->second); count++; } + + return fmt_buffer.toString(); } std::pair getSizes() const override diff --git a/dbms/src/Storages/Page/V3/tests/CMakeLists.txt b/dbms/src/Storages/Page/V3/tests/CMakeLists.txt index 355247c9eba..8bab6afcded 100644 --- a/dbms/src/Storages/Page/V3/tests/CMakeLists.txt +++ b/dbms/src/Storages/Page/V3/tests/CMakeLists.txt @@ -26,4 +26,10 @@ add_executable(gtests_page_storage_v3 ${ps_v3_gtest_sources} ${TiFlash_SOURCE_DI target_link_libraries(gtests_page_storage_v3 page_storage_v3 gtest_main) target_compile_options(gtests_page_storage_v3 PRIVATE -Wno-unknown-pragmas) target_compile_definitions(gtests_page_storage_v3 PRIVATE DBMS_PUBLIC_GTEST) -add_check(gtests_page_storage_v3) \ No newline at end of file +add_check(gtests_page_storage_v3) + + +add_executable(page_storage_ctl EXCLUDE_FROM_ALL page_storage_ctl.cpp) +target_compile_definitions(page_storage_ctl PUBLIC DBMS_PUBLIC_GTEST) +target_link_libraries(page_storage_ctl dbms page_storage_v3) +target_compile_options(page_storage_ctl PRIVATE -Wno-format -lc++) # turn off printf format check diff --git a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp index a4f3fdbe948..85a94ec0ac3 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp @@ -54,7 +54,7 @@ TEST_P(SpaceMapTest, InitAndDestory) { SpaceMapPtr smap = SpaceMap::createSpaceMap(test_type, 0, 100); - smap->logStats(); + smap->logDebugString(); } @@ -256,11 +256,11 @@ TEST_P(SpaceMapTest, TestMargins2) // Right margin in marked used space // Left margin contain freed space ASSERT_FALSE(smap->markFree(49, 10)); - smap->logStats(); + smap->logDebugString(); // Left margin align with marked used space left margin // But right margin contain freed space ASSERT_FALSE(smap->markFree(51, 20)); - smap->logStats(); + smap->logDebugString(); // Right margin align with marked used space right margin // But left margin contain freed space ASSERT_FALSE(smap->markUsed(40, 19)); diff --git a/dbms/src/Storages/Page/V3/tests/page_storage_ctl.cpp b/dbms/src/Storages/Page/V3/tests/page_storage_ctl.cpp new file mode 100644 index 00000000000..4f3cefa0ad7 --- /dev/null +++ b/dbms/src/Storages/Page/V3/tests/page_storage_ctl.cpp @@ -0,0 +1,477 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB::PS::V3 +{ +struct ControlOptions +{ + enum DisplayType + { + DISPLAY_SUMMARY_INFO = 1, + DISPLAY_DIRECTORY_INFO = 2, + DISPLAY_BLOBS_INFO = 3, + CHECK_ALL_DATA_CRC = 4, + }; + + std::vector paths; + int display_mode = DisplayType::DISPLAY_SUMMARY_INFO; + UInt64 query_page_id = UINT64_MAX; + UInt32 query_blob_id = UINT32_MAX; + UInt64 query_ns_id = DB::TEST_NAMESPACE_ID; + UInt64 check_page_id = UINT64_MAX; + bool enable_fo_check = true; + + static ControlOptions parse(int argc, char ** argv); +}; + + +ControlOptions ControlOptions::parse(int argc, char ** argv) +{ + namespace po = boost::program_options; + using po::value; + + po::options_description desc("Allowed options"); + desc.add_options()("help,h", "produce help message") // + ("paths,P", value>(), "store path(s)") // + ("display_mode,D", value()->default_value(1), "Display Mode: 1 is summary information,\n 2 is display all of stored page and version chaim(will be very long),\n 3 is display all blobs(in disk) data distribution. \n 4 is check every data is valid.") // + ("enable_fo_check,E", value()->default_value(true), "Also check the evert field offsets. This options only works when `display_mode` is 4.") // + ("query_ns_id,N", value()->default_value(DB::TEST_NAMESPACE_ID), "When used `check_page_id`/`query_page_id`/`query_blob_id` to query results. You can specify a namespace id.")("check_page_id,C", value()->default_value(UINT64_MAX), "Check a single Page id, display the exception if meet. And also will check the field offsets.") // + ("query_page_id,W", value()->default_value(UINT64_MAX), "Quert a single Page id, and print its version chaim.") // + ("query_blob_id,B", value()->default_value(UINT32_MAX), "Quert a single Blob id, and print its data distribution."); + + + static_assert(sizeof(DB::PageId) == sizeof(UInt64)); + static_assert(sizeof(DB::BlobFileId) == sizeof(UInt32)); + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + po::notify(options); + + if (options.count("help") > 0) + { + std::cerr << desc << std::endl; + exit(0); + } + + ControlOptions opt; + + if (options.count("paths") == 0) + { + std::cerr << "Invalid arg paths." << std::endl; + std::cerr << desc << std::endl; + exit(0); + } + opt.paths = options["paths"].as>(); + opt.display_mode = options["display_mode"].as(); + opt.query_page_id = options["query_page_id"].as(); + opt.query_blob_id = options["query_blob_id"].as(); + opt.enable_fo_check = options["enable_fo_check"].as(); + opt.check_page_id = options["check_page_id"].as(); + opt.query_ns_id = options["query_ns_id"].as(); + + if (opt.display_mode < DisplayType::DISPLAY_SUMMARY_INFO || opt.display_mode > DisplayType::CHECK_ALL_DATA_CRC) + { + std::cerr << "Invalid display mode: " << opt.display_mode << std::endl; + std::cerr << desc << std::endl; + exit(0); + } + + return opt; +} + +class PageStorageControl +{ +public: + explicit PageStorageControl(const ControlOptions & options_) + : options(options_) + { + } + + void run() + { + DB::PSDiskDelegatorPtr delegator; + if (options.paths.size() == 1) + { + delegator = std::make_shared(options.paths[0]); + } + else + { + delegator = std::make_shared(options.paths); + } + + auto key_manager = std::make_shared(false); + auto file_provider = std::make_shared(key_manager, false); + + BlobStore::Config blob_config; + + PageStorage::Config config; + PageStorageImpl ps_v3("PageStorageControl", delegator, config, file_provider); + ps_v3.restore(); + PageDirectory::MVCCMapType & mvcc_table_directory = ps_v3.page_directory->mvcc_table_directory; + + switch (options.display_mode) + { + case ControlOptions::DisplayType::DISPLAY_SUMMARY_INFO: + { + std::cout << getSummaryInfo(mvcc_table_directory, ps_v3.blob_store) << std::endl; + break; + } + case ControlOptions::DisplayType::DISPLAY_DIRECTORY_INFO: + { + std::cout << getDirectoryInfo(mvcc_table_directory, options.query_ns_id, options.query_page_id) << std::endl; + break; + } + case ControlOptions::DisplayType::DISPLAY_BLOBS_INFO: + { + std::cout << getBlobsInfo(ps_v3.blob_store, options.query_blob_id) << std::endl; + break; + } + case ControlOptions::DisplayType::CHECK_ALL_DATA_CRC: + { + if (options.check_page_id != UINT64_MAX) + { + std::cout << checkSinglePage(mvcc_table_directory, ps_v3.blob_store, options.query_ns_id, options.check_page_id) << std::endl; + } + else + { + std::cout << checkAllDatasCrc(mvcc_table_directory, ps_v3.blob_store, options.enable_fo_check) << std::endl; + } + break; + } + default: + std::cout << "Invalid display mode." << std::endl; + break; + } + } + +private: + static String getBlobsInfo(BlobStore & blob_store, UInt32 blob_id) + { + auto stat_info = [](const BlobStore::BlobStats::BlobStatPtr & stat, const String & path) { + FmtBuffer stat_str; + stat_str.fmtAppend(" stat id: {}\n" + " path: {}\n" + " total size: {}\n" + " valid size: {}\n" + " valid rate: {}\n" + " max cap: {}\n", // + stat->id, // + path, + stat->sm_total_size, // + stat->sm_valid_size, // + stat->sm_valid_rate, // + stat->sm_max_caps); + + stat_str.append(stat->smap->toDebugString()); + stat_str.append("\n"); + return stat_str.toString(); + }; + + FmtBuffer stats_info; + stats_info.append(" Blobs specific info: \n\n"); + + for (const auto & [path, stats] : blob_store.blob_stats.getStats()) + { + for (const auto & stat : stats) + { + if (blob_id != UINT32_MAX) + { + if (stat->id == blob_id) + { + stats_info.append(stat_info(stat, path)); + return stats_info.toString(); + } + continue; + } + + stats_info.append(stat_info(stat, path)); + } + } + + if (blob_id != UINT32_MAX) + { + stats_info.fmtAppend(" no found blob {}", blob_id); + } + return stats_info.toString(); + } + + static String getDirectoryInfo(PageDirectory::MVCCMapType & mvcc_table_directory, UInt64 ns_id, UInt64 page_id) + { + auto page_info = [](UInt128 page_internal_id_, const VersionedPageEntriesPtr & versioned_entries) { + FmtBuffer page_str; + page_str.fmtAppend(" page id {}\n", page_internal_id_); + page_str.fmtAppend(" {}\n", versioned_entries->toDebugString()); + + size_t count = 0; + for (const auto & [version, entry_or_del] : versioned_entries->entries) + { + const auto & entry = entry_or_del.entry; + page_str.fmtAppend(" entry {}\n" + " sequence: {}\n" + " epoch: {}\n" + " is del: {}\n" + " blob id: {}\n" + " offset: {}\n" + " size: {}\n" + " crc: {}\n", // + count++, // + version.sequence, // + version.epoch, // + entry_or_del.isDelete(), // + entry.file_id, // + entry.offset, // + entry.size, // + entry.checksum, // + entry.field_offsets.size() // + ); + if (!entry.field_offsets.empty()) + { + page_str.append(" field offset:\n"); + for (const auto & [offset, crc] : entry.field_offsets) + { + page_str.fmtAppend(" offset: {} crc: 0x{:X}\n", offset, crc); + } + page_str.append("\n"); + } + } + return page_str.toString(); + }; + + FmtBuffer directory_info; + directory_info.append(" Directory specific info: \n\n"); + for (const auto & [internal_id, versioned_entries] : mvcc_table_directory) + { + if (page_id != UINT64_MAX) + { + if (internal_id.low == page_id && internal_id.high == ns_id) + { + directory_info.append(page_info(internal_id, versioned_entries)); + return directory_info.toString(); + } + continue; + } + directory_info.append(page_info(internal_id, versioned_entries)); + } + + if (page_id != UINT64_MAX) + { + directory_info.fmtAppend(" no found page {}", page_id); + } + return directory_info.toString(); + } + + static String getSummaryInfo(PageDirectory::MVCCMapType & mvcc_table_directory, BlobStore & blob_store) + { + UInt64 longest_version_chaim = 0; + UInt64 shortest_version_chaim = UINT64_MAX; + FmtBuffer dir_summary_info; + + dir_summary_info.append(" Directory summary info: \n"); + + for (const auto & [internal_id, versioned_entries] : mvcc_table_directory) + { + (void)internal_id; + longest_version_chaim = std::max(longest_version_chaim, versioned_entries->size()); + shortest_version_chaim = std::min(shortest_version_chaim, versioned_entries->size()); + } + + dir_summary_info.fmtAppend(" total pages: {}, longest version chaim: {} , shortest version chaim: {} \n\n", + mvcc_table_directory.size(), + longest_version_chaim, + shortest_version_chaim); + + dir_summary_info.append(" Blobs summary info: \n"); + const auto & blob_stats = blob_store.blob_stats.getStats(); + dir_summary_info.joinStr( + blob_stats.begin(), + blob_stats.end(), + [](const auto arg, FmtBuffer & fb) { + for (const auto & stat : arg.second) + { + fb.fmtAppend(" stat id: {}\n" + " path: {}\n" + " total size: {}\n" + " valid size: {}\n" + " valid rate: {}\n" + " max cap: {}\n", + stat->id, + arg.first, + stat->sm_total_size, + stat->sm_valid_size, + stat->sm_valid_rate, + stat->sm_max_caps); + } + }, + ""); + + return dir_summary_info.toString(); + } + + static String checkSinglePage(PageDirectory::MVCCMapType & mvcc_table_directory, BlobStore & blob_store, UInt64 ns_id, UInt64 page_id) + { + const auto & page_internal_id = buildV3Id(ns_id, page_id); + const auto & it = mvcc_table_directory.find(page_internal_id); + if (it == mvcc_table_directory.end()) + { + return fmt::format("Can't find {}", page_internal_id); + } + + FmtBuffer error_msg; + size_t error_count = 0; + for (const auto & [version, entry_or_del] : it->second->entries) + { + if (entry_or_del.isEntry() && it->second->type == EditRecordType::VAR_ENTRY) + { + (void)blob_store; + try + { + PageIDAndEntryV3 to_read_entry; + const PageEntryV3 & entry = entry_or_del.entry; + PageIDAndEntriesV3 to_read; + to_read_entry.first = page_internal_id; + to_read_entry.second = entry; + + to_read.emplace_back(to_read_entry); + blob_store.read(to_read); + + if (!entry.field_offsets.empty()) + { + DB::PageStorage::FieldIndices indices(entry.field_offsets.size()); + std::iota(std::begin(indices), std::end(indices), 0); + + BlobStore::FieldReadInfos infos; + BlobStore::FieldReadInfo info(page_internal_id, entry, indices); + infos.emplace_back(info); + blob_store.read(infos); + } + } + catch (DB::Exception & e) + { + error_count++; + error_msg.append(e.displayText()); + error_msg.append("\n"); + } + } + } + + if (error_count == 0) + { + return fmt::format("Checked {} without any error.", page_internal_id); + } + + error_msg.fmtAppend("Check {} meet {} errors!", page_internal_id, error_count); + return error_msg.toString(); + } + + static String checkAllDatasCrc(PageDirectory::MVCCMapType & mvcc_table_directory, BlobStore & blob_store, bool enable_fo_check) + { + size_t total_pages = mvcc_table_directory.size(); + size_t cut_index = 0; + size_t index = 0; + std::cout << fmt::format("Begin to check all of datas CRC. enable_fo_check={}", static_cast(enable_fo_check)) << std::endl; + + std::list> error_versioned_pages; + for (const auto & [internal_id, versioned_entries] : mvcc_table_directory) + { + if (index == total_pages / 10 * cut_index) + { + std::cout << fmt::format("processing : {}%", cut_index * 10) << std::endl; + cut_index++; + } + + // TODO : need replace by getLastEntry(); + for (const auto & [version, entry_or_del] : versioned_entries->entries) + { + if (entry_or_del.isEntry() && versioned_entries->type == EditRecordType::VAR_ENTRY) + { + (void)blob_store; + try + { + PageIDAndEntryV3 to_read_entry; + const PageEntryV3 & entry = entry_or_del.entry; + PageIDAndEntriesV3 to_read; + to_read_entry.first = internal_id; + to_read_entry.second = entry; + + to_read.emplace_back(to_read_entry); + blob_store.read(to_read); + + if (enable_fo_check && !entry.field_offsets.empty()) + { + DB::PageStorage::FieldIndices indices(entry.field_offsets.size()); + std::iota(std::begin(indices), std::end(indices), 0); + + BlobStore::FieldReadInfos infos; + BlobStore::FieldReadInfo info(internal_id, entry, indices); + infos.emplace_back(info); + blob_store.read(infos); + } + } + catch (DB::Exception & e) + { + error_versioned_pages.emplace_back(std::make_pair(internal_id, version)); + } + } + } + index++; + } + + if (error_versioned_pages.empty()) + { + return "All of data checked. All passed."; + } + + FmtBuffer error_msg; + error_msg.append("Found error in these pages: "); + for (const auto & [internal_id, versioned] : error_versioned_pages) + { + error_msg.fmtAppend("id: {}, sequence: {}, epoch: {} \n", internal_id, versioned.sequence, versioned.epoch); + } + error_msg.append("Please use `--query_table_id` + `--check_page_id` to get the more error info."); + + return error_msg.toString(); + } + +private: + ControlOptions options; +}; + + +} // namespace DB::PS::V3 + +using namespace DB::PS::V3; +int main(int argc, char ** argv) +{ + const auto & options = ControlOptions::parse(argc, argv); + PageStorageControl(options).run(); + return 0; +} \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/PSStressEnv.cpp b/dbms/src/Storages/Page/stress/PSStressEnv.cpp index afe2ef700ea..7d680cd43c0 100644 --- a/dbms/src/Storages/Page/stress/PSStressEnv.cpp +++ b/dbms/src/Storages/Page/stress/PSStressEnv.cpp @@ -49,6 +49,7 @@ StressEnv StressEnv::parse(int argc, char ** argv) ("read_concurrency,R", value()->default_value(16), "number of read threads") // ("clean_before_run,C", value()->default_value(false), "drop data before running") // ("init_pages,I", value()->default_value(false), "init pages if not exist before running") // + ("just_init_pages,J", value()->default_value(false), "Only init pages 0 - 1000.Then quit") // ("timeout,T", value()->default_value(600), "maximum run time (seconds). 0 means run infinitely") // ("writer_slots", value()->default_value(4), "number of PageStorage writer slots") // ("read_delay_ms", value()->default_value(0), "millionseconds of read delay") // @@ -75,6 +76,7 @@ StressEnv StressEnv::parse(int argc, char ** argv) opt.num_writers = options["write_concurrency"].as(); opt.num_readers = options["read_concurrency"].as(); opt.init_pages = options["init_pages"].as(); + opt.just_init_pages = options["just_init_pages"].as(); opt.clean_before_run = options["clean_before_run"].as(); opt.timeout_s = options["timeout"].as(); opt.read_delay_ms = options["read_delay_ms"].as(); diff --git a/dbms/src/Storages/Page/stress/PSStressEnv.h b/dbms/src/Storages/Page/stress/PSStressEnv.h index fce903d37c7..1c7d8ee761f 100644 --- a/dbms/src/Storages/Page/stress/PSStressEnv.h +++ b/dbms/src/Storages/Page/stress/PSStressEnv.h @@ -75,6 +75,7 @@ struct StressEnv size_t num_writers = 1; size_t num_readers = 4; bool init_pages = false; + bool just_init_pages = false; bool clean_before_run = false; size_t timeout_s = 0; size_t read_delay_ms = 0; @@ -92,8 +93,8 @@ struct StressEnv { return fmt::format( "{{ " - "num_writers: {}, num_readers: {}, init_pages: {}, clean_before_run: {}" - ", timeout_s: {}, read_delay_ms: {}, num_writer_slots: {}" + "num_writers: {}, num_readers: {}, init_pages: {}, just_init_pages: {}" + ", clean_before_run: {}, timeout_s: {}, read_delay_ms: {}, num_writer_slots: {}" ", avg_page_size_mb: {}, paths: [{}], failpoints: [{}]" ", status_interval: {}, situation_mask: {}, verify: {}" ", running_pagestorage_version : {}." @@ -101,6 +102,7 @@ struct StressEnv num_writers, num_readers, init_pages, + just_init_pages, clean_before_run, timeout_s, read_delay_ms, diff --git a/dbms/src/Storages/Page/stress/PSWorkload.cpp b/dbms/src/Storages/Page/stress/PSWorkload.cpp index 6159e15acac..ce1f8d92ce0 100644 --- a/dbms/src/Storages/Page/stress/PSWorkload.cpp +++ b/dbms/src/Storages/Page/stress/PSWorkload.cpp @@ -136,7 +136,7 @@ void StressWorkload::startBackgroundTimer() void StressWorkloadManger::runWorkload() { - if (options.situation_mask == NORMAL_WORKLOAD) + if (options.just_init_pages || options.situation_mask == NORMAL_WORKLOAD) { String name; WorkloadCreator func; @@ -144,7 +144,10 @@ void StressWorkloadManger::runWorkload() auto workload = std::shared_ptr(func(options)); LOG_INFO(StressEnv::logger, fmt::format("Start Running {} , {}", name, workload->desc())); workload->run(); - workload->onDumpResult(); + if (!options.just_init_pages) + { + workload->onDumpResult(); + } return; } diff --git a/dbms/src/Storages/Page/stress/workload/Normal.cpp b/dbms/src/Storages/Page/stress/workload/Normal.cpp index ec385b4b70c..0323b857613 100644 --- a/dbms/src/Storages/Page/stress/workload/Normal.cpp +++ b/dbms/src/Storages/Page/stress/workload/Normal.cpp @@ -52,10 +52,14 @@ class NormalWorkload } // init all pages in PageStorage - if (options.init_pages) + if (options.init_pages || options.just_init_pages) { PSWriter::fillAllPages(ps); LOG_INFO(StressEnv::logger, "All pages have been init."); + if (options.just_init_pages) + { + return; + } } stop_watch.start(); From 989bf8db36e73493ab9e9c780fca9cc77af0fa88 Mon Sep 17 00:00:00 2001 From: xufei Date: Thu, 14 Apr 2022 16:26:36 +0800 Subject: [PATCH 09/19] update client-c to disable grpc client keepalive (#4655) ref pingcap/tiflash#4192 --- contrib/client-c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/client-c b/contrib/client-c index 4e50596db3c..184cde7ae4f 160000 --- a/contrib/client-c +++ b/contrib/client-c @@ -1 +1 @@ -Subproject commit 4e50596db3c878f5bf8de86fe32638f09bf2c117 +Subproject commit 184cde7ae4f83c0e9aaaaf825f3e0e7d600e62fa From 6780525e0a2845e7da823b6fc2e7a147372d1d4b Mon Sep 17 00:00:00 2001 From: nauta Date: Fri, 15 Apr 2022 10:42:36 +0800 Subject: [PATCH 10/19] fix typo (#4671) close pingcap/tiflash#4672 --- dbms/src/Storages/Page/PageStorage.h | 2 +- dbms/src/Storages/Page/V1/PageStorage.h | 4 ++-- dbms/src/Storages/Page/V2/PageStorage.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index f863795ca1d..3b5e1a18c9d 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -62,7 +62,7 @@ struct ExternalPageCallbacks /** * A storage system stored pages. Pages are serialized objects referenced by PageID. Store Page with the same PageID - * will covered the old ones. + * will cover the old ones. * Users should call #gc() constantly to release disk space. * * This class is multi-threads safe. Support multi threads write, and multi threads read. diff --git a/dbms/src/Storages/Page/V1/PageStorage.h b/dbms/src/Storages/Page/V1/PageStorage.h index d6bc0ac85af..84691ce6d53 100644 --- a/dbms/src/Storages/Page/V1/PageStorage.h +++ b/dbms/src/Storages/Page/V1/PageStorage.h @@ -35,8 +35,8 @@ namespace DB::PS::V1 /** * A storage system stored pages. Pages are serialized objects referenced by PageId. Store Page with the same PageId - * will covered the old ones. The file used to persist the Pages called PageFile. The meta data of a Page, like the - * latest PageFile the Page is stored , the offset in file, and checksum, are cached in memory. Users should call + * will cover the old ones. The file used to persist the Pages called PageFile. The meta data of a Page, like the + * latest PageFile the Page is stored, the offset in file, and checksum, are cached in memory. Users should call * #gc() constantly to clean up the sparse PageFiles and release disk space. * * This class is multi-threads safe. Support single thread write, and multi threads read. diff --git a/dbms/src/Storages/Page/V2/PageStorage.h b/dbms/src/Storages/Page/V2/PageStorage.h index f1752545815..01633f9a052 100644 --- a/dbms/src/Storages/Page/V2/PageStorage.h +++ b/dbms/src/Storages/Page/V2/PageStorage.h @@ -37,8 +37,8 @@ namespace PS::V2 { /** * A storage system stored pages. Pages are serialized objects referenced by PageID. Store Page with the same PageID - * will covered the old ones. The file used to persist the Pages called PageFile. The meta data of a Page, like the - * latest PageFile the Page is stored , the offset in file, and checksum, are cached in memory. Users should call + * will cover the old ones. The file used to persist the Pages called PageFile. The meta data of a Page, like the + * latest PageFile the Page is stored, the offset in file, and checksum, are cached in memory. Users should call * #gc() constantly to clean up the sparse PageFiles and release disk space. * * This class is multi-threads safe. Support multi threads write, and multi threads read. From 1def5be0d300d30f8944ce31f7812b70a4ce52ec Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Fri, 15 Apr 2022 14:40:36 +0800 Subject: [PATCH 11/19] Add a fail point that can hold PS snapshot for five minute. (#4620) ref pingcap/tiflash#3594 --- dbms/src/Common/FailPoint.cpp | 3 ++- dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp | 10 ++++++++++ dbms/src/Storages/Page/V3/BlobStore.cpp | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index f5d9f247f06..6da54e74e69 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -79,7 +79,8 @@ std::unordered_map> FailPointHelper::f M(force_set_dtfile_exist_when_acquire_id) \ M(force_no_local_region_for_mpp_task) \ M(force_remote_read_for_batch_cop) \ - M(force_context_path) + M(force_context_path) \ + M(force_slow_page_storage_snapshot_release) #define APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) \ M(pause_after_learner_read) \ diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 11ec13f25dd..80b1d81f817 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -94,6 +94,7 @@ extern const char force_triggle_foreground_flush[]; extern const char force_set_segment_ingest_packs_fail[]; extern const char segment_merge_after_ingest_packs[]; extern const char random_exception_after_dt_write_done[]; +extern const char force_slow_page_storage_snapshot_release[]; } // namespace FailPoints namespace DM @@ -1036,11 +1037,20 @@ BlockInputStreams DeltaMergeStore::readRaw(const Context & db_context, auto segment_snap = segment->createSnapshot(*dm_context, false, CurrentMetrics::DT_SnapshotOfReadRaw); if (unlikely(!segment_snap)) throw Exception("Failed to get segment snap", ErrorCodes::LOGICAL_ERROR); + tasks.push_back(std::make_shared(segment, segment_snap, RowKeyRanges{segment->getRowKeyRange()})); } } } + fiu_do_on(FailPoints::force_slow_page_storage_snapshot_release, { + std::thread thread_hold_snapshots([tasks]() { + std::this_thread::sleep_for(std::chrono::seconds(5 * 60)); + (void)tasks; + }); + thread_hold_snapshots.detach(); + }); + auto after_segment_read = [&](const DMContextPtr & dm_context_, const SegmentPtr & segment_) { this->checkSegmentUpdate(dm_context_, segment_, ThreadType::Read); }; diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index 6919e8081bd..0b7fb1669ff 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -978,6 +978,7 @@ void BlobStore::BlobStats::restore() for (auto & [path, stats] : stats_map) { + (void)path; for (const auto & stat : stats) { stat->recalculateSpaceMap(); From bd50b0efc629a30959c9fe908849084a891077b3 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Fri, 15 Apr 2022 17:48:35 +0800 Subject: [PATCH 12/19] Interpreter: Hand write tipb::Executor. (#4632) ref pingcap/tiflash#4609 --- dbms/src/Debug/astToExecutor.cpp | 3 - dbms/src/TestUtils/InterpreterTestUtils.cpp | 82 ++++++ dbms/src/TestUtils/InterpreterTestUtils.h | 73 +++++ dbms/src/TestUtils/mockExecutor.cpp | 259 ++++++++++++++++++ dbms/src/TestUtils/mockExecutor.h | 137 +++++++++ .../TestUtils/tests/gtest_mock_executors.cpp | 164 +++++++++++ 6 files changed, 715 insertions(+), 3 deletions(-) create mode 100644 dbms/src/TestUtils/InterpreterTestUtils.cpp create mode 100644 dbms/src/TestUtils/InterpreterTestUtils.h create mode 100644 dbms/src/TestUtils/mockExecutor.cpp create mode 100644 dbms/src/TestUtils/mockExecutor.h create mode 100644 dbms/src/TestUtils/tests/gtest_mock_executors.cpp diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/astToExecutor.cpp index 5f7567f0eff..11b90e60fb9 100644 --- a/dbms/src/Debug/astToExecutor.cpp +++ b/dbms/src/Debug/astToExecutor.cpp @@ -29,9 +29,6 @@ #include #include -#include -#include - namespace DB { namespace diff --git a/dbms/src/TestUtils/InterpreterTestUtils.cpp b/dbms/src/TestUtils/InterpreterTestUtils.cpp new file mode 100644 index 00000000000..52ff5e1cb08 --- /dev/null +++ b/dbms/src/TestUtils/InterpreterTestUtils.cpp @@ -0,0 +1,82 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace DB::tests +{ +namespace +{ +String toTreeString(const tipb::Executor & root_executor, size_t level = 0); + +// serialize tipb::DAGRequest, print the executor name in a Tree format. +String toTreeString(std::shared_ptr dag_request) +{ + assert((dag_request->executors_size() > 0) != dag_request->has_root_executor()); + if (dag_request->has_root_executor()) + { + return toTreeString(dag_request->root_executor()); + } + else + { + FmtBuffer buffer; + String prefix; + traverseExecutors(dag_request.get(), [&buffer, &prefix](const tipb::Executor & executor) { + assert(executor.has_executor_id()); + buffer.fmtAppend("{}{}\n", prefix, executor.executor_id()); + prefix.append(" "); + return true; + }); + return buffer.toString(); + } +} + +String toTreeString(const tipb::Executor & root_executor, size_t level) +{ + FmtBuffer buffer; + + auto append_str = [&buffer, &level](const tipb::Executor & executor) { + assert(executor.has_executor_id()); + buffer.append(String(level, ' ')); + buffer.append(executor.executor_id()).append("\n"); + }; + + traverseExecutorTree(root_executor, [&](const tipb::Executor & executor) { + if (executor.has_join()) + { + append_str(executor); + ++level; + for (const auto & child : executor.join().children()) + buffer.append(toTreeString(child, level)); + return false; + } + else + { + append_str(executor); + ++level; + return true; + } + }); + + return buffer.toString(); +} +} // namespace + +void dagRequestEqual(String & expected_string, const std::shared_ptr & actual) +{ + String actual_string = toTreeString(actual); + ASSERT_EQ(Poco::trimInPlace(expected_string), Poco::trimInPlace(actual_string)); +} + +} // namespace DB::tests diff --git a/dbms/src/TestUtils/InterpreterTestUtils.h b/dbms/src/TestUtils/InterpreterTestUtils.h new file mode 100644 index 00000000000..e68bbe8ab47 --- /dev/null +++ b/dbms/src/TestUtils/InterpreterTestUtils.h @@ -0,0 +1,73 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB::tests +{ +void dagRequestEqual(String & expected_string, const std::shared_ptr & actual); +class MockExecutorTest : public ::testing::Test +{ +protected: + void SetUp() override + { + initializeContext(); + } + +public: + MockExecutorTest() + : context(TiFlashTestEnv::getContext()) + {} + + static void SetUpTestCase() + { + try + { + DB::registerFunctions(); + } + catch (DB::Exception &) + { + // Maybe another test has already registered, ignore exception here. + } + } + + virtual void initializeContext() + { + dag_context_ptr = std::make_unique(1024); + context.setDAGContext(dag_context_ptr.get()); + mock_dag_request_context = MockDAGRequestContext(); + } + + DAGContext & getDAGContext() + { + assert(dag_context_ptr != nullptr); + return *dag_context_ptr; + } + +protected: + Context context; + MockDAGRequestContext mock_dag_request_context; + std::unique_ptr dag_context_ptr; +}; + +#define ASSERT_DAGREQUEST_EQAUL(str, request) dagRequestEqual(str, request); +} // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp new file mode 100644 index 00000000000..8295d161753 --- /dev/null +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -0,0 +1,259 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB::tests +{ +ASTPtr buildColumn(const String & column_name) +{ + return std::make_shared(column_name); +} + +ASTPtr buildLiteral(const Field & field) +{ + return std::make_shared(field); +} + +ASTPtr buildOrderByItemList(MockOrderByItems order_by_items) +{ + std::vector vec; + for (auto item : order_by_items) + { + int direction = item.second ? 1 : -1; + ASTPtr locale_node; + auto order_by_item = std::make_shared(direction, direction, false, locale_node); + order_by_item->children.push_back(std::make_shared(item.first)); + vec.push_back(order_by_item); + } + auto exp_list = std::make_shared(); + exp_list->children.insert(exp_list->children.end(), vec.begin(), vec.end()); + return exp_list; +} + +// a mock DAGRequest should prepare its time_zone, flags, encode_type and output_schema. +void DAGRequestBuilder::initDAGRequest(tipb::DAGRequest & dag_request) +{ + dag_request.set_time_zone_name(properties.tz_name); + dag_request.set_time_zone_offset(properties.tz_offset); + dag_request.set_flags(dag_request.flags() | (1u << 1u /* TRUNCATE_AS_WARNING */) | (1u << 6u /* OVERFLOW_AS_WARNING */)); + + if (properties.encode_type == "chunk") + dag_request.set_encode_type(tipb::EncodeType::TypeChunk); + else if (properties.encode_type == "chblock") + dag_request.set_encode_type(tipb::EncodeType::TypeCHBlock); + else + dag_request.set_encode_type(tipb::EncodeType::TypeDefault); + + for (size_t i = 0; i < root->output_schema.size(); ++i) + dag_request.add_output_offsets(i); +} + +// traval the AST tree to build tipb::Executor recursively. +std::shared_ptr DAGRequestBuilder::build(Context & context) +{ + MPPInfo mpp_info(properties.start_ts, -1, -1, {}, {}); + std::shared_ptr dag_request_ptr = std::make_shared(); + tipb::DAGRequest & dag_request = *dag_request_ptr; + initDAGRequest(dag_request); + root->toTiPBExecutor(dag_request.mutable_root_executor(), properties.collator, mpp_info, context); + root.reset(); + executor_index = 0; + return dag_request_ptr; +} + +DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String & table, const MockColumnInfos & columns) +{ + assert(!columns.empty()); + TableInfo table_info; + table_info.name = db + "." + table; + for (const auto & column : columns) + { + TiDB::ColumnInfo ret; + ret.tp = column.second; + ret.name = column.first; + table_info.columns.push_back(std::move(ret)); + } + String empty_alias; + root = compileTableScan(getExecutorIndex(), table_info, empty_alias, false); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const std::vector> & columns) +{ + return mockTable(name.first, name.second, columns); +} + +DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const MockColumnInfoList & columns) +{ + return mockTable(name.first, name.second, columns); +} + +DAGRequestBuilder & DAGRequestBuilder::filter(ASTPtr filter_expr) +{ + assert(root); + root = compileSelection(root, getExecutorIndex(), filter_expr); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::limit(int limit) +{ + assert(root); + root = compileLimit(root, getExecutorIndex(), buildLiteral(Field(static_cast(limit)))); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::limit(ASTPtr limit_expr) +{ + assert(root); + root = compileLimit(root, getExecutorIndex(), limit_expr); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::topN(ASTPtr order_exprs, ASTPtr limit_expr) +{ + assert(root); + root = compileTopN(root, getExecutorIndex(), order_exprs, limit_expr); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::topN(const String & col_name, bool desc, int limit) +{ + assert(root); + root = compileTopN(root, getExecutorIndex(), buildOrderByItemList({{col_name, desc}}), buildLiteral(Field(static_cast(limit)))); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItems order_by_items, int limit) +{ + return topN(order_by_items, buildLiteral(Field(static_cast(limit)))); +} + +DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItems order_by_items, ASTPtr limit_expr) +{ + assert(root); + root = compileTopN(root, getExecutorIndex(), buildOrderByItemList(order_by_items), limit_expr); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::project(const String & col_name) +{ + assert(root); + root = compileProject(root, getExecutorIndex(), buildColumn(col_name)); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::project(MockAsts exprs) +{ + assert(root); + auto exp_list = std::make_shared(); + for (const auto & expr : exprs) + { + exp_list->children.push_back(expr); + } + root = compileProject(root, getExecutorIndex(), exp_list); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::project(MockColumnNames col_names) +{ + assert(root); + auto exp_list = std::make_shared(); + for (const auto & name : col_names) + { + exp_list->children.push_back(col(name)); + } + + root = compileProject(root, getExecutorIndex(), exp_list); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, ASTPtr using_expr_list) +{ + return join(right, using_expr_list, ASTTableJoin::Kind::Inner); +} + +DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, ASTPtr using_expr_list, ASTTableJoin::Kind kind) +{ + assert(root); + assert(right.root); + auto join_ast = std::make_shared(); + join_ast->using_expression_list = using_expr_list; + join_ast->strictness = ASTTableJoin::Strictness::All; + join_ast->kind = kind; + root = compileJoin(getExecutorIndex(), root, right.root, join_ast); + return *this; +} + +DAGRequestBuilder & DAGRequestBuilder::aggregation(ASTPtr agg_func, ASTPtr group_by_expr) +{ + auto agg_funcs = std::make_shared(); + auto group_by_exprs = std::make_shared(); + agg_funcs->children.push_back(agg_func); + group_by_exprs->children.push_back(group_by_expr); + return buildAggregation(agg_funcs, group_by_exprs); +} + +DAGRequestBuilder & DAGRequestBuilder::aggregation(MockAsts agg_funcs, MockAsts group_by_exprs) +{ + auto agg_func_list = std::make_shared(); + auto group_by_expr_list = std::make_shared(); + for (const auto & func : agg_funcs) + agg_func_list->children.push_back(func); + for (const auto & group_by : group_by_exprs) + group_by_expr_list->children.push_back(group_by); + + return buildAggregation(agg_func_list, group_by_expr_list); +} + +DAGRequestBuilder & DAGRequestBuilder::buildAggregation(ASTPtr agg_funcs, ASTPtr group_by_exprs) +{ + assert(root); + root = compileAggregation(root, getExecutorIndex(), agg_funcs, group_by_exprs); + return *this; +} + + +void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoList & columns) +{ + std::vector v_column_info; + for (const auto & info : columns) + { + v_column_info.push_back(std::move(info)); + } + mock_tables[name.first + "." + name.second] = v_column_info; +} + +void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfos & columns) +{ + mock_tables[db + "." + table] = columns; +} + +void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfos & columns) +{ + mock_tables[name.first + "." + name.second] = columns; +} + +DAGRequestBuilder MockDAGRequestContext::scan(String db_name, String table_name) +{ + return DAGRequestBuilder(index).mockTable({db_name, table_name}, mock_tables[db_name + "." + table_name]); +} + +} // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h new file mode 100644 index 00000000000..24d2df21f4a --- /dev/null +++ b/dbms/src/TestUtils/mockExecutor.h @@ -0,0 +1,137 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include +#include + +namespace DB::tests +{ +using MockColumnInfo = std::pair; +using MockColumnInfos = std::vector; +using MockColumnInfoList = std::initializer_list; +using MockTableName = std::pair; +using MockOrderByItem = std::pair; +using MockOrderByItems = std::initializer_list; +using MockColumnNames = std::initializer_list; +using MockAsts = std::initializer_list; + + +/** Responsible for Hand write tipb::DAGRequest + * Use this class to mock DAGRequest, then feed the DAGRequest into + * the Interpreter for test purpose. + * The mockTable() method must called first in order to generate the table schema. + * After construct all necessary operators in DAGRequest, call build() to generate DAGRequest。 + */ +class DAGRequestBuilder +{ +public: + size_t & executor_index; + + size_t & getExecutorIndex() const + { + return executor_index; + } + + explicit DAGRequestBuilder(size_t & index) + : executor_index(index) + {} + + std::shared_ptr build(Context & context); + + DAGRequestBuilder & mockTable(const String & db, const String & table, const MockColumnInfos & columns); + DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfos & columns); + DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfoList & columns); + + DAGRequestBuilder & filter(ASTPtr filter_expr); + + DAGRequestBuilder & limit(int limit); + DAGRequestBuilder & limit(ASTPtr limit_expr); + + DAGRequestBuilder & topN(ASTPtr order_exprs, ASTPtr limit_expr); + DAGRequestBuilder & topN(const String & col_name, bool desc, int limit); + DAGRequestBuilder & topN(MockOrderByItems order_by_items, int limit); + DAGRequestBuilder & topN(MockOrderByItems order_by_items, ASTPtr limit_expr); + + DAGRequestBuilder & project(const String & col_name); + DAGRequestBuilder & project(MockAsts expr); + DAGRequestBuilder & project(MockColumnNames col_names); + + // Currentlt only support inner join, left join and right join. + // TODO support more types of join. + DAGRequestBuilder & join(const DAGRequestBuilder & right, ASTPtr using_expr_list); + DAGRequestBuilder & join(const DAGRequestBuilder & right, ASTPtr using_expr_list, ASTTableJoin::Kind kind); + + // aggregation + DAGRequestBuilder & aggregation(ASTPtr agg_func, ASTPtr group_by_expr); + DAGRequestBuilder & aggregation(MockAsts agg_funcs, MockAsts group_by_exprs); + +private: + void initDAGRequest(tipb::DAGRequest & dag_request); + DAGRequestBuilder & buildAggregation(ASTPtr agg_funcs, ASTPtr group_by_exprs); + + ExecutorPtr root; + DAGProperties properties; +}; + +/** Responsible for storing necessary arguments in order to Mock DAGRequest + * index: used in DAGRequestBuilder to identify executors + * mock_tables: DAGRequestBuilder uses it to mock TableScan executors + */ +class MockDAGRequestContext +{ +public: + MockDAGRequestContext() + { + index = 0; + } + + DAGRequestBuilder createDAGRequestBuilder() + { + return DAGRequestBuilder(index); + } + + void addMockTable(const MockTableName & name, const MockColumnInfoList & columns); + void addMockTable(const String & db, const String & table, const MockColumnInfos & columns); + void addMockTable(const MockTableName & name, const MockColumnInfos & columns); + + DAGRequestBuilder scan(String db_name, String table_name); + +private: + size_t index; + std::unordered_map mock_tables; +}; + +ASTPtr buildColumn(const String & column_name); +ASTPtr buildLiteral(const Field & field); +ASTPtr buildFunction(MockAsts exprs, const String & name); +ASTPtr buildOrderByItemList(MockOrderByItems order_by_items); + +#define col(name) buildColumn((name)) +#define lit(field) buildLiteral((field)) +#define eq(expr1, expr2) makeASTFunction("equals", (expr1), (expr2)) +#define Not_eq(expr1, expr2) makeASTFunction("notEquals", (expr1), (expr2)) +#define lt(expr1, expr2) makeASTFunction("less", (expr1), (expr2)) +#define gt(expr1, expr2) makeASTFunction("greater", (expr1), (expr2)) +#define And(expr1, expr2) makeASTFunction("and", (expr1), (expr2)) +#define Or(expr1, expr2) makeASTFunction("or", (expr1), (expr2)) +#define NOT(expr) makeASTFunction("not", (expr1), (expr2)) +#define Max(expr) makeASTFunction("max", expr) + +} // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp new file mode 100644 index 00000000000..2be63311034 --- /dev/null +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -0,0 +1,164 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace DB +{ +namespace tests +{ +class MockDAGRequestTest : public DB::tests::MockExecutorTest +{ +public: + void initializeContext() override + { + dag_context_ptr = std::make_unique(1024); + context.setDAGContext(dag_context_ptr.get()); + mock_dag_request_context = MockDAGRequestContext(); + mock_dag_request_context.addMockTable({"test_db", "test_table"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}}); + mock_dag_request_context.addMockTable({"test_db", "test_table_1"}, {{"s1", TiDB::TP::TypeLong}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); + mock_dag_request_context.addMockTable({"test_db", "r_table"}, {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"r_c", TiDB::TP::TypeString}}); + mock_dag_request_context.addMockTable({"test_db", "l_table"}, {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"l_c", TiDB::TP::TypeString}}); + } +}; + +TEST_F(MockDAGRequestTest, MockTable) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table").build(context); + String expected_string_1 = "table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string_1, request); + + request = mock_dag_request_context.scan("test_db", "test_table_1").build(context); + String expected_string_2 = "table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); +} +CATCH + +TEST_F(MockDAGRequestTest, Filter) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table").filter(eq(col("s1"), col("s2"))).build(context); + String expected_string = "selection_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); + + request = mock_dag_request_context.scan("test_db", "test_table_1") + .filter(And(eq(col("s1"), col("s2")), lt(col("s2"), col("s3")))) + .build(context); + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +TEST_F(MockDAGRequestTest, Projection) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table") + .project("s1") + .build(context); + String expected_string = "project_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); + + request = mock_dag_request_context.scan("test_db", "test_table_1") + .project({col("s3"), eq(col("s1"), col("s2"))}) + .build(context); + String expected_string_2 = "project_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string_2, request); + + request = mock_dag_request_context.scan("test_db", "test_table_1") + .project({"s1", "s2"}) + .build(context); + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +TEST_F(MockDAGRequestTest, Limit) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table") + .limit(10) + .build(context); + String expected_string = "limit_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); + + request = mock_dag_request_context.scan("test_db", "test_table_1") + .limit(lit(Field(static_cast(10)))) + .build(context); + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +TEST_F(MockDAGRequestTest, TopN) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table") + .topN({{"s1", false}}, 10) + .build(context); + String expected_string = "topn_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); + + request = mock_dag_request_context.scan("test_db", "test_table") + .topN("s1", false, 10) + .build(context); + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +TEST_F(MockDAGRequestTest, Aggregation) +try +{ + auto request = mock_dag_request_context.scan("test_db", "test_table") + .aggregation(Max(col("s1")), col("s2")) + .build(context); + String expected_string = "aggregation_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +TEST_F(MockDAGRequestTest, Join) +try +{ + DAGRequestBuilder right_builder = mock_dag_request_context.scan("test_db", "r_table") + .filter(eq(col("r_a"), col("r_b"))) + .project({col("r_a"), col("r_b")}) + .aggregation(Max(col("r_a")), col("r_b")); + + + DAGRequestBuilder left_builder = mock_dag_request_context.scan("test_db", "l_table") + .topN({{"l_a", false}}, 10) + .join(right_builder, col("l_a"), ASTTableJoin::Kind::Left) + .limit(10); + + auto request = left_builder.build(context); + String expected_string = "limit_7\n" + " Join_6\n" + " topn_5\n" + " table_scan_4\n" + " aggregation_3\n" + " project_2\n" + " selection_1\n" + " table_scan_0\n"; + ASSERT_DAGREQUEST_EQAUL(expected_string, request); +} +CATCH + +} // namespace tests +} // namespace DB \ No newline at end of file From 66f45c76692e941bc845c01349ea89de0f2cc210 Mon Sep 17 00:00:00 2001 From: SeaRise Date: Mon, 18 Apr 2022 10:48:03 +0800 Subject: [PATCH 13/19] refine `SubqueryForSet` (#4623) ref pingcap/tiflash#4118 --- .../CreatingSetsBlockInputStream.h | 2 +- dbms/src/Flash/Coprocessor/DAGContext.cpp | 7 +++ dbms/src/Flash/Coprocessor/DAGContext.h | 8 +++ .../Coprocessor/DAGQueryBlockInterpreter.cpp | 9 +-- .../Coprocessor/DAGQueryBlockInterpreter.h | 5 +- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 30 +++++----- dbms/src/Flash/Coprocessor/InterpreterDAG.h | 13 +---- dbms/src/Interpreters/ExpressionAnalyzer.h | 34 +---------- dbms/src/Interpreters/SubqueryForSet.h | 57 +++++++++++++++++++ 9 files changed, 96 insertions(+), 69 deletions(-) create mode 100644 dbms/src/Interpreters/SubqueryForSet.h diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.h b/dbms/src/DataStreams/CreatingSetsBlockInputStream.h index 2f9ad61e4c8..b8e2ee6fe87 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.h +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.h @@ -17,7 +17,7 @@ #include #include #include -#include /// SubqueriesForSets +#include namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp index 1f6618d3170..17fb6553eab 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.cpp +++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp @@ -63,6 +63,13 @@ bool DAGContext::allowInvalidDate() const return sql_mode & TiDBSQLMode::ALLOW_INVALID_DATES; } +void DAGContext::addSubquery(const String & subquery_id, SubqueryForSet && subquery) +{ + SubqueriesForSets subqueries_for_sets; + subqueries_for_sets[subquery_id] = std::move(subquery); + subqueries.push_back(std::move(subqueries_for_sets)); +} + std::unordered_map & DAGContext::getProfileStreamsMap() { return profile_streams_map; diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index 30397dc496a..18ad73ec207 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -29,6 +29,7 @@ #include #include #include +#include #include namespace DB @@ -279,6 +280,10 @@ class DAGContext void initExchangeReceiverIfMPP(Context & context, size_t max_streams); const std::unordered_map> & getMPPExchangeReceiverMap() const; + void addSubquery(const String & subquery_id, SubqueryForSet && subquery); + bool hasSubquery() const { return !subqueries.empty(); } + std::vector && moveSubqueries() { return std::move(subqueries); } + const tipb::DAGRequest * dag_request; Int64 compile_time_ns = 0; size_t final_concurrency = 1; @@ -337,6 +342,9 @@ class DAGContext /// key: executor_id of ExchangeReceiver nodes in dag. std::unordered_map> mpp_exchange_receiver_map; bool mpp_exchange_receiver_map_inited = false; + /// vector of SubqueriesForSets(such as join build subquery). + /// The order of the vector is also the order of the subquery. + std::vector subqueries; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 51cd1bf671f..b4832ff4f17 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -56,13 +56,11 @@ DAGQueryBlockInterpreter::DAGQueryBlockInterpreter( Context & context_, const std::vector & input_streams_vec_, const DAGQueryBlock & query_block_, - size_t max_streams_, - std::vector & subqueries_for_sets_) + size_t max_streams_) : context(context_) , input_streams_vec(input_streams_vec_) , query_block(query_block_) , max_streams(max_streams_) - , subqueries_for_sets(subqueries_for_sets_) , log(Logger::get("DAGQueryBlockInterpreter", dagContext().log ? dagContext().log->identifier() : "")) {} @@ -1023,10 +1021,7 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) SubqueryForSet right_query; handleJoin(query_block.source->join(), pipeline, right_query); recordProfileStreams(pipeline, query_block.source_name); - - SubqueriesForSets subquries; - subquries[query_block.source_name] = right_query; - subqueries_for_sets.emplace_back(subquries); + dagContext().addSubquery(query_block.source_name, std::move(right_query)); } else if (query_block.source->tp() == tipb::ExecType::TypeExchangeReceiver) { diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 35627cd19ee..b681d22188c 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -47,8 +47,7 @@ class DAGQueryBlockInterpreter Context & context_, const std::vector & input_streams_vec_, const DAGQueryBlock & query_block_, - size_t max_streams_, - std::vector & subqueries_for_sets_); + size_t max_streams_); ~DAGQueryBlockInterpreter() = default; @@ -117,8 +116,6 @@ class DAGQueryBlockInterpreter std::unique_ptr analyzer; - std::vector & subqueries_for_sets; - LoggerPtr log; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 1bfe87e5695..4c67d67e4f9 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -13,10 +13,11 @@ // limitations under the License. #include -#include +#include +#include #include #include -#include +#include namespace DB { @@ -35,23 +36,27 @@ InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) } } +DAGContext & InterpreterDAG::dagContext() const +{ + return *context.getDAGContext(); +} + /** executeQueryBlock recursively converts all the children of the DAGQueryBlock and itself (Coprocessor DAG request) * into an array of IBlockInputStream (element of physical executing plan of TiFlash) */ -BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block, std::vector & subqueries_for_sets) +BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block) { std::vector input_streams_vec; for (auto & child : query_block.children) { - BlockInputStreams child_streams = executeQueryBlock(*child, subqueries_for_sets); + BlockInputStreams child_streams = executeQueryBlock(*child); input_streams_vec.push_back(child_streams); } DAGQueryBlockInterpreter query_block_interpreter( context, input_streams_vec, query_block, - max_streams, - subqueries_for_sets); + max_streams); return query_block_interpreter.execute(); } @@ -60,26 +65,23 @@ BlockIO InterpreterDAG::execute() /// Due to learner read, DAGQueryBlockInterpreter may take a long time to build /// the query plan, so we init mpp exchange receiver before executeQueryBlock dagContext().initExchangeReceiverIfMPP(context, max_streams); - /// region_info should base on the source executor, however - /// tidb does not support multi-table dag request yet, so - /// it is ok to use the same region_info for the whole dag request - std::vector subqueries_for_sets; - BlockInputStreams streams = executeQueryBlock(*dag.getRootQueryBlock(), subqueries_for_sets); + + BlockInputStreams streams = executeQueryBlock(*dag.getRootQueryBlock()); DAGPipeline pipeline; pipeline.streams = streams; /// add union to run in parallel if needed - if (context.getDAGContext()->isMPPTask()) + if (dagContext().isMPPTask()) /// MPPTask do not need the returned blocks. executeUnion(pipeline, max_streams, dagContext().log, /*ignore_block=*/true); else executeUnion(pipeline, max_streams, dagContext().log); - if (!subqueries_for_sets.empty()) + if (dagContext().hasSubquery()) { const Settings & settings = context.getSettingsRef(); pipeline.firstStream() = std::make_shared( pipeline.firstStream(), - std::move(subqueries_for_sets), + std::move(dagContext().moveSubqueries()), SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode), dagContext().log->identifier()); } diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.h b/dbms/src/Flash/Coprocessor/InterpreterDAG.h index 46b995ef9a6..40f7d8c62cf 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.h +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.h @@ -21,16 +21,9 @@ #pragma GCC diagnostic pop #include -#include -#include +#include #include -#include -#include -#include #include -#include -#include -#include namespace DB { @@ -50,9 +43,9 @@ class InterpreterDAG : public IInterpreter BlockIO execute() override; private: - BlockInputStreams executeQueryBlock(DAGQueryBlock & query_block, std::vector & subqueries_for_sets); + BlockInputStreams executeQueryBlock(DAGQueryBlock & query_block); - DAGContext & dagContext() const { return *context.getDAGContext(); } + DAGContext & dagContext() const; Context & context; const DAGQuerySource & dag; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index fb8bea20a8a..3558b0ffc90 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB @@ -26,21 +27,8 @@ class Context; class ExpressionActions; struct ExpressionActionsChain; -class Join; -using JoinPtr = std::shared_ptr; - -class IAST; -using ASTPtr = std::shared_ptr; - -class Set; -using SetPtr = std::shared_ptr; using PreparedSets = std::unordered_map; -class IBlockInputStream; -using BlockInputStreamPtr = std::shared_ptr; - -class IStorage; -using StoragePtr = std::shared_ptr; using Tables = std::map; class ASTFunction; @@ -48,26 +36,6 @@ class ASTExpressionList; class ASTSelectQuery; -/** Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section. - */ -struct SubqueryForSet -{ - /// The source is obtained using the InterpreterSelectQuery subquery. - BlockInputStreamPtr source; - - /// If set, build it from result. - SetPtr set; - JoinPtr join; - - /// If set, put the result into the table. - /// This is a temporary table for transferring to remote servers for distributed query processing. - StoragePtr table; -}; - -/// ID of subquery -> what to do with it. -using SubqueriesForSets = std::unordered_map; - - /** Transforms an expression from a syntax tree into a sequence of actions to execute it. * * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer. diff --git a/dbms/src/Interpreters/SubqueryForSet.h b/dbms/src/Interpreters/SubqueryForSet.h new file mode 100644 index 00000000000..b3c45e948e1 --- /dev/null +++ b/dbms/src/Interpreters/SubqueryForSet.h @@ -0,0 +1,57 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include +#include + +namespace DB +{ +class Join; +using JoinPtr = std::shared_ptr; + +class IAST; +using ASTPtr = std::shared_ptr; + +class Set; +using SetPtr = std::shared_ptr; + +class IBlockInputStream; +using BlockInputStreamPtr = std::shared_ptr; + +class IStorage; +using StoragePtr = std::shared_ptr; + +/** Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section. + */ +struct SubqueryForSet +{ + /// The source is obtained using the InterpreterSelectQuery subquery. + BlockInputStreamPtr source; + + /// If set, build it from result. + SetPtr set; + JoinPtr join; + + /// If set, put the result into the table. + /// This is a temporary table for transferring to remote servers for distributed query processing. + StoragePtr table; +}; + +/// ID of subquery -> what to do with it. +using SubqueriesForSets = std::unordered_map; +} // namespace DB From 605ddc0ebe68f90503877e90e4f676914a2e4055 Mon Sep 17 00:00:00 2001 From: yibin Date: Mon, 18 Apr 2022 15:34:03 +0800 Subject: [PATCH 14/19] Add gtests for MPPTunnel (#4553) close pingcap/tiflash#4481 --- dbms/src/Flash/Mpp/MPPTunnel.cpp | 22 + dbms/src/Flash/Mpp/MPPTunnel.h | 17 + dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp | 630 +++++++++++++++++++ 3 files changed, 669 insertions(+) create mode 100644 dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp diff --git a/dbms/src/Flash/Mpp/MPPTunnel.cpp b/dbms/src/Flash/Mpp/MPPTunnel.cpp index 78d7312d919..6db39e61586 100644 --- a/dbms/src/Flash/Mpp/MPPTunnel.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnel.cpp @@ -51,6 +51,28 @@ MPPTunnelBase::MPPTunnelBase( GET_METRIC(tiflash_object_count, type_count_of_mpptunnel).Increment(); } +template +MPPTunnelBase::MPPTunnelBase( + const String & tunnel_id_, + const std::chrono::seconds timeout_, + int input_steams_num_, + bool is_local_, + bool is_async_, + const String & req_id) + : connected(false) + , finished(false) + , is_local(is_local_) + , is_async(is_async_) + , timeout(timeout_) + , tunnel_id(tunnel_id_) + , input_streams_num(input_steams_num_) + , send_queue(std::max(5, input_steams_num_ * 5)) // MPMCQueue can benefit from a slightly larger queue size + , thread_manager(newThreadManager()) + , log(Logger::get("MPPTunnel", req_id, tunnel_id)) +{ + RUNTIME_ASSERT(!(is_local && is_async), log, "is_local: {}, is_async: {}.", is_local, is_async); +} + template MPPTunnelBase::~MPPTunnelBase() { diff --git a/dbms/src/Flash/Mpp/MPPTunnel.h b/dbms/src/Flash/Mpp/MPPTunnel.h index be2fbadfd38..bdc60a97f5a 100644 --- a/dbms/src/Flash/Mpp/MPPTunnel.h +++ b/dbms/src/Flash/Mpp/MPPTunnel.h @@ -42,6 +42,12 @@ namespace DB { +namespace tests +{ +class MPPTunnelTest; +class TestMPPTunnelBase; +} // namespace tests + class EstablishCallData; /** @@ -123,6 +129,17 @@ class MPPTunnelBase : private boost::noncopyable void sendJob(bool need_lock = true); private: + friend class tests::MPPTunnelTest; + friend class tests::TestMPPTunnelBase; + // For gtest usage + MPPTunnelBase( + const String & tunnel_id_, + std::chrono::seconds timeout_, + int input_steams_num_, + bool is_local_, + bool is_async_, + const String & req_id); + void finishSendQueue(); void waitUntilConnectedOrFinished(std::unique_lock & lk); diff --git a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp new file mode 100644 index 00000000000..133142cc867 --- /dev/null +++ b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp @@ -0,0 +1,630 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ +namespace tests +{ +class MPPTunnelTest : public MPPTunnelBase +{ +public: + using Base = MPPTunnelBase; + using Base::Base; + MPPTunnelTest( + const String & tunnel_id_, + std::chrono::seconds timeout_, + int input_steams_num_, + bool is_local_, + bool is_async_, + const String & req_id) + : Base(tunnel_id_, timeout_, input_steams_num_, is_local_, is_async_, req_id) + {} + void setFinishFlag(bool flag) + { + finished = flag; + } + bool getFinishFlag() + { + return finished; + } + bool getConnectFlag() + { + return connected; + } + std::shared_ptr getThreadManager() + { + return thread_manager; + } +}; + +using MPPTunnelTestPtr = std::shared_ptr; + +class MockWriter : public PacketWriter +{ + bool write(const mpp::MPPDataPacket & packet) override + { + write_packet_vec.push_back(packet.data()); + return true; + } + +public: + std::vector write_packet_vec; +}; + +class MockFailedWriter : public PacketWriter +{ + bool write(const mpp::MPPDataPacket &) override + { + return false; + } +}; + +struct MockLocalReader +{ + MPPTunnelTestPtr tunnel; + std::vector write_packet_vec; + + explicit MockLocalReader(const MPPTunnelTestPtr & tunnel_) + : tunnel(tunnel_) + {} + + ~MockLocalReader() + { + if (tunnel) + { + // In case that ExchangeReceiver throw error before finish reading from mpp_tunnel + tunnel->consumerFinish("Receiver closed"); + } + } + + void read() + { + while (true) + { + MPPDataPacketPtr tmp_packet = tunnel->readForLocal(); + bool success = tmp_packet != nullptr; + if (success) + { + write_packet_vec.push_back(tmp_packet->data()); + } + else + { + break; + } + } + } +}; +using MockLocalReaderPtr = std::shared_ptr; + +struct MockTerminateLocalReader +{ + MPPTunnelTestPtr tunnel; + + explicit MockTerminateLocalReader(const MPPTunnelTestPtr & tunnel_) + : tunnel(tunnel_) + {} + + ~MockTerminateLocalReader() + { + if (tunnel) + { + // In case that ExchangeReceiver throw error before finish reading from mpp_tunnel + tunnel->consumerFinish("Receiver closed"); + } + } + + void read() const + { + MPPDataPacketPtr tmp_packet = tunnel->readForLocal(); + tunnel->consumerFinish("Receiver closed"); + } +}; +using MockTerminateLocalReaderPtr = std::shared_ptr; + + +class MockAsyncWriter : public PacketWriter +{ +public: + explicit MockAsyncWriter(MPPTunnelTestPtr tunnel_) + : tunnel(tunnel_) + {} + bool write(const mpp::MPPDataPacket & packet) override + { + write_packet_vec.push_back(packet.data()); + // Simulate the async process, write success then check if exist msg, then write again + if (tunnel->isSendQueueNextPopNonBlocking()) + { + tunnel->sendJob(false); + } + return true; + } + + void tryFlushOne() override + { + if (ready && tunnel->isSendQueueNextPopNonBlocking()) + { + tunnel->sendJob(false); + } + ready = true; + } + MPPTunnelTestPtr tunnel; + std::vector write_packet_vec; + bool ready = false; +}; + +class MockFailedAsyncWriter : public PacketWriter +{ +public: + explicit MockFailedAsyncWriter(MPPTunnelTestPtr tunnel_) + : tunnel(tunnel_) + {} + bool write(const mpp::MPPDataPacket & packet) override + { + write_packet_vec.push_back(packet.data()); + // Simulate the async process, write success then check if exist msg, then write again + if (tunnel->isSendQueueNextPopNonBlocking()) + { + tunnel->sendJob(false); + } + return false; + } + + void tryFlushOne() override + { + if (ready && tunnel->isSendQueueNextPopNonBlocking()) + { + tunnel->sendJob(false); + } + ready = true; + } + MPPTunnelTestPtr tunnel; + std::vector write_packet_vec; + bool ready = false; +}; + +class TestMPPTunnelBase : public testing::Test +{ +protected: + virtual void SetUp() override { timeout = std::chrono::seconds(10); } + virtual void TearDown() override {} + std::chrono::seconds timeout; + +public: + MPPTunnelTestPtr constructRemoteSyncTunnel() + { + auto tunnel = std::make_shared(String("0000_0001"), timeout, 2, false, false, String("0")); + return tunnel; + } + + MPPTunnelTestPtr constructLocalSyncTunnel() + { + auto tunnel = std::make_shared(String("0000_0001"), timeout, 2, true, false, String("0")); + return tunnel; + } + + static MockLocalReaderPtr connectLocalSyncTunnel(MPPTunnelTestPtr mpp_tunnel_ptr) + { + mpp_tunnel_ptr->connect(nullptr); + MockLocalReaderPtr local_reader_ptr = std::make_shared(mpp_tunnel_ptr); + mpp_tunnel_ptr->getThreadManager()->schedule(true, "LocalReader", [local_reader_ptr] { + local_reader_ptr->read(); + }); + return local_reader_ptr; + } + + MPPTunnelTestPtr constructRemoteAsyncTunnel() + { + auto tunnel = std::make_shared(String("0000_0001"), timeout, 2, false, true, String("0")); + return tunnel; + } +}; + +TEST_F(TestMPPTunnelBase, ConnectWhenFinished) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + mpp_tunnel_ptr->setFinishFlag(true); + mpp_tunnel_ptr->connect(nullptr); + GTEST_FAIL(); +} +catch (Exception & e) +{ + GTEST_ASSERT_EQ(e.message(), "MPPTunnel has finished"); +} + +TEST_F(TestMPPTunnelBase, ConnectWhenConnected) +{ + try + { + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "MPPTunnel has connected"); + } +} + +TEST_F(TestMPPTunnelBase, CloseBeforeConnect) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), false); +} +CATCH + +TEST_F(TestMPPTunnelBase, CloseAfterClose) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); +} +CATCH + +TEST_F(TestMPPTunnelBase, ConnectWriteCancel) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->close("Cancel"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec.size(), 2); //Second for err msg + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, ConnectWriteWithCloseFlag) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr, true); + mpp_tunnel_ptr->waitForFinish(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, ConnectWriteWriteDone) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + auto data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->writeDone(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, ConsumerFinish) +try +{ + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + auto data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->consumerFinish(""); + mpp_tunnel_ptr->getThreadManager()->wait(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(dynamic_cast(writer_ptr.get())->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, WriteError) +{ + try + { + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + auto data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->waitForFinish(); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, grpc writes failed."); + } +} + +TEST_F(TestMPPTunnelBase, WriteAfterFinished) +{ + try + { + auto mpp_tunnel_ptr = constructRemoteSyncTunnel(); + std::unique_ptr writer_ptr = std::make_unique(); + mpp_tunnel_ptr->connect(writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + mpp_tunnel_ptr->close("Canceled"); + auto data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->waitForFinish(); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "write to tunnel which is already closed,"); + } +} + +/// Test Local MPPTunnel +TEST_F(TestMPPTunnelBase, LocalConnectWhenFinished) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + mpp_tunnel_ptr->setFinishFlag(true); + mpp_tunnel_ptr->connect(nullptr); + GTEST_FAIL(); +} +catch (Exception & e) +{ + GTEST_ASSERT_EQ(e.message(), "MPPTunnel has finished"); +} + +TEST_F(TestMPPTunnelBase, LocalConnectWhenConnected) +{ + try + { + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + auto local_reader_ptr = connectLocalSyncTunnel(mpp_tunnel_ptr); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + mpp_tunnel_ptr->connect(nullptr); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "MPPTunnel has connected"); + } +} + +TEST_F(TestMPPTunnelBase, LocalCloseBeforeConnect) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), false); +} +CATCH + +TEST_F(TestMPPTunnelBase, LocalCloseAfterClose) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + mpp_tunnel_ptr->close("Canceled"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); +} +CATCH + +TEST_F(TestMPPTunnelBase, LocalConnectWriteCancel) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + auto local_reader_ptr = connectLocalSyncTunnel(mpp_tunnel_ptr); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->close("Cancel"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec.size(), 2); //Second for err msg + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, LocalConnectWriteWriteDone) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + auto local_reader_ptr = connectLocalSyncTunnel(mpp_tunnel_ptr); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->writeDone(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, LocalConsumerFinish) +try +{ + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + auto local_reader_ptr = connectLocalSyncTunnel(mpp_tunnel_ptr); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->consumerFinish(""); + mpp_tunnel_ptr->getThreadManager()->wait(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(local_reader_ptr->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, LocalReadTerminate) +{ + try + { + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + mpp_tunnel_ptr->connect(nullptr); + MockTerminateLocalReaderPtr local_reader_ptr = std::make_shared(mpp_tunnel_ptr); + mpp_tunnel_ptr->getThreadManager()->schedule(true, "LocalReader", [local_reader_ptr] { + local_reader_ptr->read(); + }); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->waitForFinish(); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, Receiver closed"); + } +} + +TEST_F(TestMPPTunnelBase, LocalWriteAfterFinished) +{ + try + { + auto mpp_tunnel_ptr = constructLocalSyncTunnel(); + auto local_reader_ptr = connectLocalSyncTunnel(mpp_tunnel_ptr); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + mpp_tunnel_ptr->close(""); + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->waitForFinish(); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "write to tunnel which is already closed,"); + } +} + +/// Test Async MPPTunnel +TEST_F(TestMPPTunnelBase, AsyncConnectWriteCancel) +try +{ + auto mpp_tunnel_ptr = constructRemoteAsyncTunnel(); + std::unique_ptr async_writer_ptr = std::make_unique(mpp_tunnel_ptr); + mpp_tunnel_ptr->connect(async_writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + data_packet_ptr->set_data("Second"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->close("Cancel"); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec.size(), 3); //Third for err msg + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec[0], "First"); + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec[1], "Second"); +} +CATCH + +TEST_F(TestMPPTunnelBase, AsyncConnectWriteWriteDone) +try +{ + auto mpp_tunnel_ptr = constructRemoteAsyncTunnel(); + std::unique_ptr async_writer_ptr = std::make_unique(mpp_tunnel_ptr); + mpp_tunnel_ptr->connect(async_writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->writeDone(); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec.size(), 1); + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec[0], "First"); +} +CATCH + +TEST_F(TestMPPTunnelBase, AsyncConsumerFinish) +try +{ + auto mpp_tunnel_ptr = constructRemoteAsyncTunnel(); + std::unique_ptr async_writer_ptr = std::make_unique(mpp_tunnel_ptr); + mpp_tunnel_ptr->connect(async_writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + + std::unique_ptr data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->consumerFinish(""); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getFinishFlag(), true); + GTEST_ASSERT_EQ(dynamic_cast(async_writer_ptr.get())->write_packet_vec.size(), 0); +} +CATCH + +TEST_F(TestMPPTunnelBase, AsyncWriteError) +{ + try + { + auto mpp_tunnel_ptr = constructRemoteAsyncTunnel(); + std::unique_ptr async_writer_ptr = std::make_unique(mpp_tunnel_ptr); + mpp_tunnel_ptr->connect(async_writer_ptr.get()); + GTEST_ASSERT_EQ(mpp_tunnel_ptr->getConnectFlag(), true); + auto data_packet_ptr = std::make_unique(); + data_packet_ptr->set_data("First"); + mpp_tunnel_ptr->write(*data_packet_ptr); + data_packet_ptr->set_data("Second"); + mpp_tunnel_ptr->write(*data_packet_ptr); + mpp_tunnel_ptr->waitForFinish(); + GTEST_FAIL(); + } + catch (Exception & e) + { + GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, grpc writes failed."); + } +} + +} // namespace tests +} // namespace DB From cf8ab9567759409b9f4630282a0a008087d16157 Mon Sep 17 00:00:00 2001 From: jinhelin Date: Mon, 18 Apr 2022 18:20:03 +0800 Subject: [PATCH 15/19] Fix background_pool_size not take effect and BackgroundProcessingPool::getThreadIds may misses some thread_ids. (#4686) close pingcap/tiflash#4684, ref pingcap/tiflash#4685 --- dbms/CMakeLists.txt | 1 + dbms/src/Encryption/RateLimiter.cpp | 1 + dbms/src/Interpreters/Context.cpp | 6 ++--- dbms/src/Interpreters/Context.h | 2 +- dbms/src/Server/Server.cpp | 25 ++++++++++------- .../src/Storages/BackgroundProcessingPool.cpp | 27 +++++++++++++++---- dbms/src/Storages/BackgroundProcessingPool.h | 2 ++ libs/libcommon/include/common/logger_useful.h | 1 + 8 files changed, 47 insertions(+), 18 deletions(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 598f0c8d001..91f9aeb93c6 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -206,6 +206,7 @@ target_link_libraries (dbms ${RE2_ST_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY} ${BTRIE_LIBRARIES} + absl::synchronization ) if (NOT USE_INTERNAL_RE2_LIBRARY) diff --git a/dbms/src/Encryption/RateLimiter.cpp b/dbms/src/Encryption/RateLimiter.cpp index 0b5d90d8d09..38fd8468341 100644 --- a/dbms/src/Encryption/RateLimiter.cpp +++ b/dbms/src/Encryption/RateLimiter.cpp @@ -523,6 +523,7 @@ void IORateLimiter::setBackgroundThreadIds(std::vector thread_ids) { std::lock_guard lock(bg_thread_ids_mtx); bg_thread_ids.swap(thread_ids); + LOG_FMT_INFO(log, "bg_thread_ids {} => {}", bg_thread_ids.size(), bg_thread_ids); } std::pair IORateLimiter::getReadWriteBytes(const std::string & fname [[maybe_unused]]) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 04882d97d3f..ac959158490 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1548,11 +1548,11 @@ FileProviderPtr Context::getFileProvider() const return shared->file_provider; } -void Context::initializeRateLimiter(Poco::Util::AbstractConfiguration & config) +void Context::initializeRateLimiter(Poco::Util::AbstractConfiguration & config, BackgroundProcessingPool & bg_pool, BackgroundProcessingPool & blockable_bg_pool) const { getIORateLimiter().init(config); - auto tids = getBackgroundPool().getThreadIds(); - auto blockable_tids = getBlockableBackgroundPool().getThreadIds(); + auto tids = bg_pool.getThreadIds(); + auto blockable_tids = blockable_bg_pool.getThreadIds(); tids.insert(tids.end(), blockable_tids.begin(), blockable_tids.end()); getIORateLimiter().setBackgroundThreadIds(tids); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index d9f89c6cfa9..ebf7d8c82e2 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -404,7 +404,7 @@ class Context void initializeFileProvider(KeyManagerPtr key_manager, bool enable_encryption); FileProviderPtr getFileProvider() const; - void initializeRateLimiter(Poco::Util::AbstractConfiguration & config); + void initializeRateLimiter(Poco::Util::AbstractConfiguration & config, BackgroundProcessingPool & bg_pool, BackgroundProcessingPool & blockable_bg_pool) const; WriteLimiterPtr getWriteLimiter() const; ReadLimiterPtr getReadLimiter() const; IORateLimiter & getIORateLimiter() const; diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 84afee9af58..44e8ea29c29 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -1215,8 +1215,22 @@ int Server::main(const std::vector & /*args*/) /// Init TiFlash metrics. global_context->initializeTiFlashMetrics(); - /// Init Rate Limiter - global_context->initializeRateLimiter(config()); + /// Initialize users config reloader. + auto users_config_reloader = UserConfig::parseSettings(config(), config_path, global_context, log); + + /// Load global settings from default_profile and system_profile. + /// It internally depends on UserConfig::parseSettings. + global_context->setDefaultProfiles(config()); + Settings & settings = global_context->getSettingsRef(); + + /// Initialize the background thread pool. + /// It internally depends on settings.background_pool_size, + /// so must be called after settings has been load. + auto & bg_pool = global_context->getBackgroundPool(); + auto & blockable_bg_pool = global_context->getBlockableBackgroundPool(); + + /// Initialize RateLimiter. + global_context->initializeRateLimiter(config(), bg_pool, blockable_bg_pool); /// Initialize main config reloader. auto main_config_reloader = std::make_unique( @@ -1230,9 +1244,6 @@ int Server::main(const std::vector & /*args*/) }, /* already_loaded = */ true); - /// Initialize users config reloader. - auto users_config_reloader = UserConfig::parseSettings(config(), config_path, global_context, log); - /// Reload config in SYSTEM RELOAD CONFIG query. global_context->setConfigReloadCallback([&]() { main_config_reloader->reload(); @@ -1254,10 +1265,6 @@ int Server::main(const std::vector & /*args*/) bool use_l0_opt = config().getBool("l0_optimize", false); global_context->setUseL0Opt(use_l0_opt); - /// Load global settings from default_profile and system_profile. - global_context->setDefaultProfiles(config()); - Settings & settings = global_context->getSettingsRef(); - /// Size of cache for marks (index of MergeTree family of tables). It is necessary. size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_SIZE); if (mark_cache_size) diff --git a/dbms/src/Storages/BackgroundProcessingPool.cpp b/dbms/src/Storages/BackgroundProcessingPool.cpp index 601cb31746c..c0c3d8f0e48 100644 --- a/dbms/src/Storages/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/BackgroundProcessingPool.cpp @@ -29,6 +29,15 @@ #ifdef __linux__ #include #include +inline static pid_t gettid() +{ + return syscall(SYS_gettid); +} +#elif +inline static pid_t gettid() +{ + return -1; +} #endif namespace CurrentMetrics @@ -76,6 +85,7 @@ void BackgroundProcessingPool::TaskInfo::wake() BackgroundProcessingPool::BackgroundProcessingPool(int size_) : size(size_) + , thread_ids_counter(size_) { LOG_FMT_INFO(&Poco::Logger::get("BackgroundProcessingPool"), "Create BackgroundProcessingPool with {} threads", size); @@ -140,9 +150,7 @@ void BackgroundProcessingPool::threadFunction() const auto name = "BkgPool" + std::to_string(tid++); setThreadName(name.data()); is_background_thread = true; -#ifdef __linux__ - addThreadId(syscall(SYS_gettid)); -#endif + addThreadId(gettid()); } MemoryTracker memory_tracker; @@ -272,14 +280,23 @@ void BackgroundProcessingPool::threadFunction() std::vector BackgroundProcessingPool::getThreadIds() { + thread_ids_counter.Wait(); std::lock_guard lock(thread_ids_mtx); + if (thread_ids.size() != size) + { + LOG_FMT_ERROR(&Poco::Logger::get("BackgroundProcessingPool"), "thread_ids.size is {}, but {} is required", thread_ids.size(), size); + throw Exception("Background threads' number not match"); + } return thread_ids; } void BackgroundProcessingPool::addThreadId(pid_t tid) { - std::lock_guard lock(thread_ids_mtx); - thread_ids.push_back(tid); + { + std::lock_guard lock(thread_ids_mtx); + thread_ids.push_back(tid); + } + thread_ids_counter.DecrementCount(); } } // namespace DB diff --git a/dbms/src/Storages/BackgroundProcessingPool.h b/dbms/src/Storages/BackgroundProcessingPool.h index 770ef833800..1ba6c4efcf8 100644 --- a/dbms/src/Storages/BackgroundProcessingPool.h +++ b/dbms/src/Storages/BackgroundProcessingPool.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -117,6 +118,7 @@ class BackgroundProcessingPool Threads threads; std::vector thread_ids; // Linux Thread ID std::mutex thread_ids_mtx; + absl::BlockingCounter thread_ids_counter; std::atomic shutdown{false}; std::condition_variable wake_event; diff --git a/libs/libcommon/include/common/logger_useful.h b/libs/libcommon/include/common/logger_useful.h index 21604dd5470..e3981baf34c 100644 --- a/libs/libcommon/include/common/logger_useful.h +++ b/libs/libcommon/include/common/logger_useful.h @@ -18,6 +18,7 @@ #include #include +#include #ifndef QUERY_PREVIEW_LENGTH #define QUERY_PREVIEW_LENGTH 160 From afdd2e0ca23ccd6a19a604d90b9d75c971a3fe7c Mon Sep 17 00:00:00 2001 From: Zhi Qi <30543181+LittleFall@users.noreply.github.com> Date: Mon, 18 Apr 2022 22:08:03 +0800 Subject: [PATCH 16/19] =?UTF-8?q?fix:=20fix=20build=20issue=20`=E2=80=98ma?= =?UTF-8?q?ybe=5Funused=E2=80=99=20attribute=20ignored`=20on=20centos=20se?= =?UTF-8?q?rver=20=20(#4700)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit close pingcap/tiflash#4701 --- dbms/src/Common/CPUAffinityManager.h | 7 +++++-- libs/libcommon/include/common/defines.h | 6 ++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/CPUAffinityManager.h b/dbms/src/Common/CPUAffinityManager.h index 8c88c3ab1fd..5de62cf8368 100644 --- a/dbms/src/Common/CPUAffinityManager.h +++ b/dbms/src/Common/CPUAffinityManager.h @@ -14,6 +14,8 @@ #pragma once +#include + #include #include #include @@ -115,8 +117,9 @@ class CPUAffinityManager #endif // unused except Linux - [[maybe_unused]] int query_cpu_percent; - [[maybe_unused]] int cpu_cores; + MAYBE_UNUSED_MEMBER int query_cpu_percent; + MAYBE_UNUSED_MEMBER int cpu_cores; + std::vector query_threads; Poco::Logger * log; diff --git a/libs/libcommon/include/common/defines.h b/libs/libcommon/include/common/defines.h index 51b99a2d0ba..ff79a4d2077 100644 --- a/libs/libcommon/include/common/defines.h +++ b/libs/libcommon/include/common/defines.h @@ -199,3 +199,9 @@ static ALWAYS_INLINE inline void TIFLASH_NO_OPTIMIZE(T && var) #define TIFLASH_DUMMY_FUNCTION_DEFINITION #define tiflash_compiler_builtin_memcpy __builtin_memcpy #endif + +#ifdef __clang__ +#define MAYBE_UNUSED_MEMBER [[maybe_unused]] +#else +#define MAYBE_UNUSED_MEMBER +#endif From 873d3ff3eb71d30d3f456be2300f289e5e8482ff Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Tue, 19 Apr 2022 18:08:03 +0800 Subject: [PATCH 17/19] *: fix tableScan incorrect trace. (#4699) close pingcap/tiflash#4692 --- dbms/src/Flash/Statistics/TableScanImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Flash/Statistics/TableScanImpl.cpp b/dbms/src/Flash/Statistics/TableScanImpl.cpp index 9d301b69db8..a1f011de695 100644 --- a/dbms/src/Flash/Statistics/TableScanImpl.cpp +++ b/dbms/src/Flash/Statistics/TableScanImpl.cpp @@ -55,7 +55,7 @@ void TableScanStatistics::collectExtraRuntimeDetail() { auto * p_stream = dynamic_cast(io_stream.get()); assert(p_stream); - cop_table_scan_detail.bytes += p_stream->getProfileInfo().bytes; + local_table_scan_detail.bytes += p_stream->getProfileInfo().bytes; } } } From feee96afe1534a8d1c11421f764b0e13de5b9a1e Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 19 Apr 2022 19:40:03 +0800 Subject: [PATCH 18/19] Make performance of TPCH q15 stable (#4570) close pingcap/tiflash#4451 --- dbms/src/Common/MemoryTracker.cpp | 5 +++++ dbms/src/Common/MemoryTracker.h | 1 + .../ParallelAggregatingBlockInputStream.cpp | 2 ++ .../ParallelAggregatingBlockInputStream.h | 1 + dbms/src/Interpreters/Aggregator.cpp | 19 ++++++++++++++++--- dbms/src/Interpreters/Aggregator.h | 13 +++++++++++-- 6 files changed, 36 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/MemoryTracker.cpp b/dbms/src/Common/MemoryTracker.cpp index e79e6077366..f64881ae35a 100644 --- a/dbms/src/Common/MemoryTracker.cpp +++ b/dbms/src/Common/MemoryTracker.cpp @@ -226,6 +226,11 @@ void submitLocalDeltaMemory() local_delta = 0; } +Int64 getLocalDeltaMemory() +{ + return local_delta; +} + void alloc(Int64 size) { checkSubmitAndUpdateLocalDelta(local_delta + size); diff --git a/dbms/src/Common/MemoryTracker.h b/dbms/src/Common/MemoryTracker.h index 457377a7ce0..c87ec713dda 100644 --- a/dbms/src/Common/MemoryTracker.h +++ b/dbms/src/Common/MemoryTracker.h @@ -111,6 +111,7 @@ namespace CurrentMemoryTracker { void disableThreshold(); void submitLocalDeltaMemory(); +Int64 getLocalDeltaMemory(); void alloc(Int64 size); void realloc(Int64 old_size, Int64 new_size); void free(Int64 size); diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp index 62a7e7c4c46..3163975108f 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp @@ -157,6 +157,7 @@ void ParallelAggregatingBlockInputStream::Handler::onBlock(Block & block, size_t parent.file_provider, parent.threads_data[thread_num].key_columns, parent.threads_data[thread_num].aggregate_columns, + parent.threads_data[thread_num].local_delta_memory, parent.no_more_keys); parent.threads_data[thread_num].src_rows += block.rows(); @@ -270,6 +271,7 @@ void ParallelAggregatingBlockInputStream::execute() file_provider, threads_data[0].key_columns, threads_data[0].aggregate_columns, + threads_data[0].local_delta_memory, no_more_keys); } diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h index 3f486d2e35f..398c3d35bbc 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h @@ -106,6 +106,7 @@ class ParallelAggregatingBlockInputStream : public IProfilingBlockInputStream { size_t src_rows = 0; size_t src_bytes = 0; + Int64 local_delta_memory = 0; ColumnRawPtrs key_columns; Aggregator::AggregateColumns aggregate_columns; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index ed640ce5d08..6e067b88d81 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -522,7 +522,14 @@ void Aggregator::prepareAggregateInstructions(Columns columns, AggregateColumns } } -bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & result, const FileProviderPtr & file_provider, ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, bool & no_more_keys) +bool Aggregator::executeOnBlock( + const Block & block, + AggregatedDataVariants & result, + const FileProviderPtr & file_provider, + ColumnRawPtrs & key_columns, + AggregateColumns & aggregate_columns, + Int64 & local_delta_memory, + bool & no_more_keys) { if (isCancelled()) return true; @@ -600,7 +607,13 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re size_t result_size = result.sizeWithoutOverflowRow(); Int64 current_memory_usage = 0; if (current_memory_tracker) + { current_memory_usage = current_memory_tracker->get(); + auto updated_local_delta_memory = CurrentMemoryTracker::getLocalDeltaMemory(); + auto local_delta_memory_diff = updated_local_delta_memory - local_delta_memory; + current_memory_usage += (local_memory_usage.fetch_add(local_delta_memory_diff) + local_delta_memory_diff); + local_delta_memory = updated_local_delta_memory; + } auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation; /// Here all the results in the sum are taken into account, from different threads. @@ -815,14 +828,14 @@ void Aggregator::execute(const BlockInputStreamPtr & stream, AggregatedDataVaria src_rows += block.rows(); src_bytes += block.bytes(); - if (!executeOnBlock(block, result, file_provider, key_columns, aggregate_columns, no_more_keys)) + if (!executeOnBlock(block, result, file_provider, key_columns, aggregate_columns, params.local_delta_memory, no_more_keys)) break; } /// If there was no data, and we aggregate without keys, and we must return single row with the result of empty aggregation. /// To do this, we pass a block with zero rows to aggregate. if (result.empty() && params.keys_size == 0 && !params.empty_result_for_aggregation_by_empty_set) - executeOnBlock(stream->getHeader(), result, file_provider, key_columns, aggregate_columns, no_more_keys); + executeOnBlock(stream->getHeader(), result, file_provider, key_columns, aggregate_columns, params.local_delta_memory, no_more_keys); double elapsed_seconds = watch.elapsedSeconds(); size_t rows = result.sizeWithoutOverflowRow(); diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 672a0951465..b3bb537dc2e 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -706,6 +706,7 @@ class Aggregator AggregateDescriptions aggregates; size_t keys_size; size_t aggregates_size; + Int64 local_delta_memory = 0; /// The settings of approximate calculation of GROUP BY. const bool overflow_row; /// Do we need to put into AggregatedDataVariants::without_key aggregates for keys that are not in max_rows_to_group_by. @@ -799,8 +800,14 @@ class Aggregator using AggregateFunctionsPlainPtrs = std::vector; /// Process one block. Return false if the processing should be aborted (with group_by_overflow_mode = 'break'). - bool executeOnBlock(const Block & block, AggregatedDataVariants & result, const FileProviderPtr & file_provider, ColumnRawPtrs & key_columns, AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block - bool & no_more_keys); + bool executeOnBlock( + const Block & block, + AggregatedDataVariants & result, + const FileProviderPtr & file_provider, + ColumnRawPtrs & key_columns, + AggregateColumns & aggregate_columns, /// Passed to not create them anew for each block + Int64 & local_delta_memory, + bool & no_more_keys); /** Convert the aggregation data structure into a block. * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block. @@ -906,6 +913,8 @@ class Aggregator /// How many RAM were used to process the query before processing the first block. Int64 memory_usage_before_aggregation = 0; + std::atomic local_memory_usage = 0; + std::mutex mutex; const LoggerPtr log; From 636fcd22371266ee2792b4e0636cf96b4cacaa0c Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Tue, 19 Apr 2022 21:06:03 +0800 Subject: [PATCH 19/19] *: fix gettid compile error (#4704) close pingcap/tiflash#4703 --- dbms/src/Storages/BackgroundProcessingPool.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/BackgroundProcessingPool.cpp b/dbms/src/Storages/BackgroundProcessingPool.cpp index c0c3d8f0e48..96c2c6cc622 100644 --- a/dbms/src/Storages/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/BackgroundProcessingPool.cpp @@ -29,12 +29,12 @@ #ifdef __linux__ #include #include -inline static pid_t gettid() +inline static pid_t getTid() { return syscall(SYS_gettid); } -#elif -inline static pid_t gettid() +#else +inline static pid_t getTid() { return -1; } @@ -150,7 +150,7 @@ void BackgroundProcessingPool::threadFunction() const auto name = "BkgPool" + std::to_string(tid++); setThreadName(name.data()); is_background_thread = true; - addThreadId(gettid()); + addThreadId(getTid()); } MemoryTracker memory_tracker;