diff --git a/ydb/core/formats/arrow/reader/merger.cpp b/ydb/core/formats/arrow/reader/merger.cpp index 16b9733ad4c0..5d53e4dbbdcb 100644 --- a/ydb/core/formats/arrow/reader/merger.cpp +++ b/ydb/core/formats/arrow/reader/merger.cpp @@ -154,6 +154,7 @@ void TMergePartialStream::DrainCurrentPosition(TRecordBatchBuilder* builder, std Y_ABORT_UNLESS(SortHeap.Size()); Y_ABORT_UNLESS(!SortHeap.Current().IsControlPoint()); if (!SortHeap.Current().IsDeleted()) { +// AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("key_add", SortHeap.Current().GetKeyColumns().DebugJson().GetStringRobust()); if (builder) { builder->AddRecord(SortHeap.Current().GetKeyColumns()); } @@ -161,6 +162,8 @@ void TMergePartialStream::DrainCurrentPosition(TRecordBatchBuilder* builder, std *resultScanData = SortHeap.Current().GetKeyColumns().GetSorting(); *resultPosition = SortHeap.Current().GetKeyColumns().GetPosition(); } + } else { +// AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("key_skip", SortHeap.Current().GetKeyColumns().DebugJson().GetStringRobust()); } CheckSequenceInDebug(SortHeap.Current().GetKeyColumns()); const ui64 startPosition = SortHeap.Current().GetKeyColumns().GetPosition(); @@ -169,6 +172,7 @@ void TMergePartialStream::DrainCurrentPosition(TRecordBatchBuilder* builder, std bool isFirst = true; while (SortHeap.Size() && (isFirst || SortHeap.Current().GetKeyColumns().Compare(*startSorting, startPosition) == std::partial_ordering::equivalent)) { if (!isFirst) { +// AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("key_skip1", SortHeap.Current().GetKeyColumns().DebugJson().GetStringRobust()); auto& anotherIterator = SortHeap.Current(); if (PossibleSameVersionFlag) { AFL_VERIFY(anotherIterator.GetVersionColumns().Compare(*startVersion, startPosition) != std::partial_ordering::greater) diff --git a/ydb/core/formats/arrow/reader/position.h b/ydb/core/formats/arrow/reader/position.h index 78233e50b4a5..6dcd5ce144cf 100644 --- a/ydb/core/formats/arrow/reader/position.h +++ b/ydb/core/formats/arrow/reader/position.h @@ -451,6 +451,8 @@ class TIntervalPositions { private: std::vector Positions; public: + using const_iterator = std::vector::const_iterator; + bool IsEmpty() const { return Positions.empty(); } @@ -459,6 +461,16 @@ class TIntervalPositions { return Positions.begin(); } + TString DebugString() const { + TStringBuilder sb; + sb << "["; + for (auto&& p : Positions) { + sb << p.DebugJson().GetStringRobust() << ";"; + } + sb << "]"; + return sb; + } + std::vector::const_iterator end() const { return Positions.end(); } @@ -662,6 +674,35 @@ class TRWSortableBatchPosition: public TSortableBatchPosition, public TMoveOnly return SplitByBorders(batch, columnNames, it); } + class TIntervalPointsIterator { + private: + typename TIntervalPositions::const_iterator Current; + typename TIntervalPositions::const_iterator End; + + public: + TIntervalPointsIterator(const TIntervalPositions& container) + : Current(container.begin()) + , End(container.end()) { + } + + bool IsValid() const { + return Current != End; + } + + void Next() { + ++Current; + } + + const auto& CurrentPosition() const { + return Current->GetPosition(); + } + }; + + static std::vector> SplitByBordersInIntervalPositions( + const std::shared_ptr& batch, const std::vector& columnNames, const TIntervalPositions& container) { + TIntervalPointsIterator it(container); + return SplitByBorders(batch, columnNames, it); + } }; } diff --git a/ydb/core/formats/arrow/serializer/native.cpp b/ydb/core/formats/arrow/serializer/native.cpp index e3533172c388..30ec056d33e6 100644 --- a/ydb/core/formats/arrow/serializer/native.cpp +++ b/ydb/core/formats/arrow/serializer/native.cpp @@ -99,7 +99,7 @@ TString TNativeSerializer::DoSerializePayload(const std::shared_ptrschema()).ok()); + AFL_VERIFY_DEBUG(Deserialize(str, batch->schema()).ok()); AFL_DEBUG(NKikimrServices::ARROW_HELPER)("event", "serialize")("size", str.size())("columns", batch->schema()->num_fields()); return str; } diff --git a/ydb/core/formats/arrow/serializer/native.h b/ydb/core/formats/arrow/serializer/native.h index 9a390457aeb5..38615600fc2b 100644 --- a/ydb/core/formats/arrow/serializer/native.h +++ b/ydb/core/formats/arrow/serializer/native.h @@ -62,6 +62,18 @@ class TNativeSerializer: public ISerializer { virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapColumn::TSerializer& proto) const override; public: + static std::shared_ptr GetUncompressed() { + static std::shared_ptr result = + std::make_shared(arrow::Compression::UNCOMPRESSED); + return result; + } + + static std::shared_ptr GetFast() { + static std::shared_ptr result = + std::make_shared(arrow::Compression::LZ4_FRAME); + return result; + } + virtual TString GetClassName() const override { return GetClassNameStatic(); } diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.h b/ydb/core/kqp/ut/common/kqp_ut_common.h index 383dde66c6f5..b67029c15418 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.h +++ b/ydb/core/kqp/ut/common/kqp_ut_common.h @@ -98,6 +98,8 @@ struct TKikimrSettings: public TTestFeatureFlagsHolder { exchangerSettings->SetMaxDelayMs(10); AppConfig.MutableColumnShardConfig()->SetDisabledOnSchemeShard(false); FeatureFlags.SetEnableSparsedColumns(true); + FeatureFlags.SetEnableImmediateWritingOnBulkUpsert(true); + FeatureFlags.SetEnableWritePortionsOnInsert(true); FeatureFlags.SetEnableParameterizedDecimal(true); FeatureFlags.SetEnableTopicAutopartitioningForCDC(true); } diff --git a/ydb/core/kqp/ut/olap/aggregations_ut.cpp b/ydb/core/kqp/ut/olap/aggregations_ut.cpp index 28dcf8d19069..bf1921c5fe4b 100644 --- a/ydb/core/kqp/ut/olap/aggregations_ut.cpp +++ b/ydb/core/kqp/ut/olap/aggregations_ut.cpp @@ -95,7 +95,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); } - while (csController->GetInsertFinishedCounter().Val() == 0) { + while (csController->GetCompactionFinishedCounter().Val() == 0) { Cout << "Wait indexation..." << Endl; Sleep(TDuration::Seconds(2)); } @@ -374,7 +374,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { .AddExpectedPlanOptions("KqpOlapFilter") #if SSA_RUNTIME_VERSION >= 2U .AddExpectedPlanOptions("TKqpOlapAgg") - .MutableLimitChecker().SetExpectedResultCount(1) + .MutableLimitChecker().SetExpectedResultCount(2) #else .AddExpectedPlanOptions("Condense") #endif @@ -417,7 +417,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { .AddExpectedPlanOptions("KqpOlapFilter") #if SSA_RUNTIME_VERSION >= 2U .AddExpectedPlanOptions("TKqpOlapAgg") - .MutableLimitChecker().SetExpectedResultCount(1) + .MutableLimitChecker().SetExpectedResultCount(2) #else .AddExpectedPlanOptions("CombineCore") .AddExpectedPlanOptions("KqpOlapFilter") diff --git a/ydb/core/kqp/ut/olap/indexes_ut.cpp b/ydb/core/kqp/ut/olap/indexes_ut.cpp index 474b9d1b2743..caaf1b44c29d 100644 --- a/ydb/core/kqp/ut/olap/indexes_ut.cpp +++ b/ydb/core/kqp/ut/olap/indexes_ut.cpp @@ -3,10 +3,11 @@ #include #include +#include #include -#include +#include -#include +#include #include @@ -14,8 +15,7 @@ namespace NKikimr::NKqp { Y_UNIT_TEST_SUITE(KqpOlapIndexes) { Y_UNIT_TEST(IndexesActualization) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); + auto settings = TKikimrSettings().SetWithSampleTables(false); TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); @@ -26,11 +26,10 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { TLocalHelper(kikimr).CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); - Tests::NCommon::TLoggerInit(kikimr).SetComponents({NKikimrServices::TX_COLUMNSHARD}, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); - - std::vector uids; - std::vector resourceIds; - std::vector levels; + Tests::NCommon::TLoggerInit(kikimr) + .SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS") + .SetPriority(NActors::NLog::PRI_DEBUG) + .Initialize(); { WriteTestData(kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); @@ -40,28 +39,12 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { WriteTestData(kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); WriteTestData(kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); WriteTestData(kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); - - const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { - for (ui32 i = 0; i < count; ++i) { - uids.emplace_back("uid_" + ::ToString(startUid + i)); - resourceIds.emplace_back(::ToString(startRes + i)); - levels.emplace_back(i % 5); - } - }; - - filler(1000000, 300000000, 10000); - filler(1100000, 300100000, 10000); - filler(1200000, 300200000, 10000); - filler(1300000, 300300000, 10000); - filler(1400000, 300400000, 10000); - filler(2000000, 200000000, 70000); - filler(3000000, 100000000, 110000); - } + csController->WaitCompactions(TDuration::Seconds(5)); { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -69,7 +52,8 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << + auto alterQuery = + TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05}`); )"; @@ -79,22 +63,25 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { } { - auto alterQuery = TStringBuilder() << - "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"; + auto alterQuery = + TStringBuilder() + << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } csController->WaitActualization(TDuration::Seconds(10)); { - auto it = tableClient.StreamExecuteScanQuery(R"( + auto it = tableClient + .StreamExecuteScanQuery(R"( --!syntax_v1 SELECT COUNT(*) FROM `/Root/olapStore/olapTable` WHERE ((resource_id = '2' AND level = 222222) OR (resource_id = '1' AND level = 111111) OR (resource_id LIKE '%11dd%')) AND uid = '222' - )").GetValueSync(); + )") + .GetValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); TString result = StreamResultToYson(it); @@ -102,14 +89,13 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { Cerr << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << Endl; CompareYson(result, R"([[0u;]])"); AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0); - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val() * 0.4) - ("approve", csController->GetIndexesApprovedOnSelect().Val())("skip", csController->GetIndexesSkippingOnSelect().Val()); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()) + ("approve", csController->GetIndexesApprovedOnSelect().Val())("skip", csController->GetIndexesSkippingOnSelect().Val()); } } Y_UNIT_TEST(CountMinSketchIndex) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); + auto settings = TKikimrSettings().SetWithSampleTables(false); TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); @@ -121,11 +107,14 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { auto tableClient = kikimr.GetTableClient(); auto& client = kikimr.GetTestClient(); - Tests::NCommon::TLoggerInit(kikimr).SetComponents({NKikimrServices::TX_COLUMNSHARD}, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + Tests::NCommon::TLoggerInit(kikimr) + .SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS") + .SetPriority(NActors::NLog::PRI_DEBUG) + .Initialize(); { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapTable` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_ts, TYPE=COUNT_MIN_SKETCH, + R"(ALTER OBJECT `/Root/olapTable` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_ts, TYPE=COUNT_MIN_SKETCH, FEATURES=`{"column_names" : ['timestamp']}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -135,7 +124,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapTable` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_res_id, TYPE=COUNT_MIN_SKETCH, + R"(ALTER OBJECT `/Root/olapTable` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_res_id, TYPE=COUNT_MIN_SKETCH, FEATURES=`{"column_names" : ['resource_id']}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -145,7 +134,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapTable` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_uid, TYPE=COUNT_MIN_SKETCH, + R"(ALTER OBJECT `/Root/olapTable` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_uid, TYPE=COUNT_MIN_SKETCH, FEATURES=`{"column_names" : ['uid']}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -155,7 +144,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapTable` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_level, TYPE=COUNT_MIN_SKETCH, + R"(ALTER OBJECT `/Root/olapTable` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_level, TYPE=COUNT_MIN_SKETCH, FEATURES=`{"column_names" : ['level']}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -165,7 +154,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapTable` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_message, TYPE=COUNT_MIN_SKETCH, + R"(ALTER OBJECT `/Root/olapTable` (TYPE TABLE) SET (ACTION=UPSERT_INDEX, NAME=cms_message, TYPE=COUNT_MIN_SKETCH, FEATURES=`{"column_names" : ['message']}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -189,7 +178,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { auto indexes = description.GetSchema().GetIndexes(); UNIT_ASSERT(indexes.size() == 5); - std::unordered_set indexNames{"cms_ts", "cms_res_id", "cms_uid", "cms_level", "cms_message"}; + std::unordered_set indexNames{ "cms_ts", "cms_res_id", "cms_uid", "cms_level", "cms_message" }; for (const auto& i : indexes) { Cerr << ">>> " << i.GetName() << " of class name " << i.GetClassName() << Endl; UNIT_ASSERT(i.GetClassName() == "COUNT_MIN_SKETCH"); @@ -212,8 +201,9 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { Cerr << ">>> path id: " << j << Endl; pathids.insert(j); } - if (++shard == 3) + if (++shard == 3) { break; + } } UNIT_ASSERT(pathids.size() == 1); @@ -224,10 +214,10 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { auto request = std::make_unique(); request->Record.MutableTable()->MutablePathId()->SetLocalId(pathId); - runtime->Send(MakePipePerNodeCacheID(false), sender, new TEvPipeCache::TEvForward( - request.release(), i, false)); - if (++shard == 3) + runtime->Send(MakePipePerNodeCacheID(false), sender, new TEvPipeCache::TEvForward(request.release(), i, false)); + if (++shard == 3) { break; + } } auto sketch = std::unique_ptr(TCountMinSketch::Create()); @@ -248,8 +238,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { } Y_UNIT_TEST(SchemeActualizationOnceOnStart) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); + auto settings = TKikimrSettings().SetWithSampleTables(false); TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); @@ -276,14 +265,14 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { filler(1000000, 300000000, 10000); filler(1100000, 300100000, 10000); - } const ui64 initCount = csController->GetActualizationRefreshSchemeCount().Val(); AFL_VERIFY(initCount == 3)("started_value", initCount); for (ui32 i = 0; i < 10; ++i) { - auto alterQuery = TStringBuilder() << - "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"; + auto alterQuery = + TStringBuilder() + << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); @@ -292,17 +281,19 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { AFL_VERIFY(updatesCount == 30 + initCount)("after_modification", updatesCount); for (auto&& i : csController->GetShardActualIds()) { - kikimr.GetTestServer().GetRuntime()->Send(MakePipePerNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( - new TEvents::TEvPoisonPill(), i, false)); + kikimr.GetTestServer().GetRuntime()->Send( + MakePipePerNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward(new TEvents::TEvPoisonPill(), i, false)); } { - auto it = tableClient.StreamExecuteScanQuery(R"( + auto it = tableClient + .StreamExecuteScanQuery(R"( --!syntax_v1 SELECT COUNT(*) FROM `/Root/olapStore/olapTable` - )").GetValueSync(); + )") + .GetValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); TString result = StreamResultToYson(it); Cout << result << Endl; @@ -310,7 +301,9 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { } AFL_VERIFY(updatesCount + 3 /*tablets count*/ * 1 /*normalizers*/ == - (ui64)csController->GetActualizationRefreshSchemeCount().Val())("updates", updatesCount)("count", csController->GetActualizationRefreshSchemeCount().Val()); + (ui64)csController->GetActualizationRefreshSchemeCount().Val())( + "updates", updatesCount)("count", + csController->GetActualizationRefreshSchemeCount().Val()); } class TTestIndexesScenario { @@ -318,6 +311,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { TKikimrSettings Settings; std::unique_ptr Kikimr; YDB_ACCESSOR(TString, StorageId, "__DEFAULT"); + public: TTestIndexesScenario& Initialize() { Settings = TKikimrSettings().SetWithSampleTables(false); @@ -326,29 +320,31 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { } void Execute() const { + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); TLocalHelper(*Kikimr).CreateTestOlapTable(); auto tableClient = Kikimr->GetTableClient(); // Tests::NCommon::TLoggerInit(kikimr).Initialize(); - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); - { - auto alterQuery = TStringBuilder() << Sprintf( - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + auto alterQuery = + TStringBuilder() << Sprintf( + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05, "storage_id" : "%s"}`); - )", StorageId.data()); + )", + StorageId.data()); auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << Sprintf( - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, + auto alterQuery = + TStringBuilder() << Sprintf( + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05, "storage_id" : "%s"}`); - )", StorageId.data() - ); + )", + StorageId.data()); auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); @@ -359,13 +355,15 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { std::vector levels; { - WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); - WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); - WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); - WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); - WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); - WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); - WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); + for (ui32 i = 0; i < 2; ++i) { + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); + } const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { for (ui32 i = 0; i < count; ++i) { @@ -382,17 +380,18 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { filler(1400000, 300400000, 10000); filler(2000000, 200000000, 70000); filler(3000000, 100000000, 110000); - } { - auto it = tableClient.StreamExecuteScanQuery(R"( + auto it = tableClient + .StreamExecuteScanQuery(R"( --!syntax_v1 SELECT COUNT(*) FROM `/Root/olapStore/olapTable` - )").GetValueSync(); + )") + .GetValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); TString result = StreamResultToYson(it); @@ -412,21 +411,26 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { Cerr << "WAIT_COMPACTION: " << csController->GetCompactionStartedCounter().Val() << Endl; Sleep(TDuration::Seconds(1)); } + // important checker for control compactions (<=21) and control indexes constructed (>=21) + AFL_VERIFY(csController->GetCompactionStartedCounter().Val() == 21)("count", csController->GetCompactionStartedCounter().Val()); { - auto it = tableClient.StreamExecuteScanQuery(R"( + auto it = tableClient + .StreamExecuteScanQuery(R"( --!syntax_v1 SELECT COUNT(*) FROM `/Root/olapStore/olapTable` WHERE ((resource_id = '2' AND level = 222222) OR (resource_id = '1' AND level = 111111) OR (resource_id LIKE '%11dd%')) AND uid = '222' - )").GetValueSync(); + )") + .GetValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); TString result = StreamResultToYson(it); AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("result", result); - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("skip", csController->GetIndexesSkippingOnSelect().Val())("check", csController->GetIndexesApprovedOnSelect().Val()); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("skip", csController->GetIndexesSkippingOnSelect().Val())( + "check", csController->GetIndexesApprovedOnSelect().Val()); CompareYson(result, R"([[0u;]])"); if (StorageId == "__LOCAL_METADATA") { AFL_VERIFY(csController->GetIndexesSkippedNoData().Val()); @@ -452,12 +456,13 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); TString result = StreamResultToYson(it); - Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << " / " << csController->GetIndexesSkippedNoData().Val() << Endl; + Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << " / " + << csController->GetIndexesSkippedNoData().Val() << Endl; CompareYson(result, R"([[1u;]])"); } - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()) - ("approved", csController->GetIndexesApprovedOnSelect().Val())("skipped", csController->GetIndexesSkippingOnSelect().Val()); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() * 5 < csController->GetIndexesSkippingOnSelect().Val()) + ("approved", csController->GetIndexesApprovedOnSelect().Val())("skipped", csController->GetIndexesSkippingOnSelect().Val()); } }; @@ -482,7 +487,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -492,7 +497,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["uid", "resource_id"], "false_positive_probability" : 0.05}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -502,7 +507,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.005}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -512,7 +517,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { { auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.01}`); )"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); @@ -526,8 +531,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } - } } -} +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp index c1fcab4be0fd..bc9ae55b2ffc 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp @@ -12,7 +12,7 @@ using namespace NYdb::NTable; Y_UNIT_TEST_SUITE(KqpOlapStats) { constexpr size_t inserted_rows = 1000; constexpr size_t tables_in_store = 1000; - constexpr size_t size_single_table = 13152; + constexpr size_t size_single_table = 12688; const TVector schema = { TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index 099b664aecd5..358a8982488d 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -1504,7 +1504,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto sender = runtime->AllocateEdgeActor(); InitRoot(server, sender); - Tests::NCommon::TLoggerInit(runtime).Initialize(); +// Tests::NCommon::TLoggerInit(runtime).Initialize(); const ui32 numShards = 10; const ui32 numIterations = 50; @@ -2444,10 +2444,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { tableInserter.AddRow().Add(2).Add("test_res_2").Add("val2").AddNull(); testHelper.BulkUpsert(testTable, tableInserter); } - while (csController->GetInsertFinishedCounter().Val() == 0) { - Cout << "Wait indexation..." << Endl; - Sleep(TDuration::Seconds(2)); - } +// while (csController->GetCompactionFinishedCounter().Val() == 0) { +// Cout << "Wait indexation..." << Endl; +// Sleep(TDuration::Seconds(2)); +// } testHelper.ReadData("SELECT * FROM `/Root/ColumnTableTest` WHERE id=2", "[[2;\"test_res_2\";#;[\"val1\"]]]"); } @@ -2470,10 +2470,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { tableInserter.AddRow().Add(1).Add(10); testHelper.BulkUpsert(testTable, tableInserter); } - while (csController->GetInsertFinishedCounter().Val() < 1) { - Cout << "Wait indexation..." << Endl; - Sleep(TDuration::Seconds(2)); - } +// while (csController->GetCompactionFinishedCounter().Val() < 1) { +// Cout << "Wait compaction..." << Endl; +// Sleep(TDuration::Seconds(2)); +// } testHelper.ReadData("SELECT value FROM `/Root/ColumnTableTest` WHERE id = 1", "[[10]]"); { TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema)); @@ -2481,8 +2481,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) { testHelper.BulkUpsert(testTable, tableInserter); } testHelper.ReadData("SELECT value FROM `/Root/ColumnTableTest` WHERE id = 1", "[[110]]"); - while (csController->GetInsertFinishedCounter().Val() < 2) { - Cout << "Wait indexation..." << Endl; + while (csController->GetCompactionFinishedCounter().Val() < 1) { + Cout << "Wait compaction..." << Endl; Sleep(TDuration::Seconds(2)); } testHelper.ReadData("SELECT value FROM `/Root/ColumnTableTest` WHERE id = 1", "[[110]]"); diff --git a/ydb/core/kqp/ut/olap/tiering_ut.cpp b/ydb/core/kqp/ut/olap/tiering_ut.cpp index b9cceba93738..8d9c96bbd7e2 100644 --- a/ydb/core/kqp/ut/olap/tiering_ut.cpp +++ b/ydb/core/kqp/ut/olap/tiering_ut.cpp @@ -30,8 +30,10 @@ Y_UNIT_TEST_SUITE(KqpOlapTiering) { for (ui64 i = 0; i < 100; ++i) { WriteTestData(testHelper.GetKikimr(), "/Root/olapStore/olapTable", 0, i * 10000, 1000); + WriteTestData(testHelper.GetKikimr(), "/Root/olapStore/olapTable", 0, i * 10000, 1000); } + csController->WaitCompactions(TDuration::Seconds(5)); csController->WaitActualization(TDuration::Seconds(5)); ui64 columnRawBytes = 0; diff --git a/ydb/core/kqp/ut/olap/write_ut.cpp b/ydb/core/kqp/ut/olap/write_ut.cpp index 8d9751f28193..88b349912988 100644 --- a/ydb/core/kqp/ut/olap/write_ut.cpp +++ b/ydb/core/kqp/ut/olap/write_ut.cpp @@ -1,45 +1,52 @@ +#include "helpers/get_value.h" #include "helpers/local.h" -#include "helpers/writer.h" -#include "helpers/typed_local.h" #include "helpers/query_executor.h" -#include "helpers/get_value.h" +#include "helpers/typed_local.h" +#include "helpers/writer.h" -#include -#include #include +#include +#include #include +#include + namespace NKikimr::NKqp { Y_UNIT_TEST_SUITE(KqpOlapWrite) { Y_UNIT_TEST(TierDraftsGC) { - auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetSmallSizeDetector(1000000); csController->SetIndexWriteControllerEnabled(false); csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); Singleton()->ResetWriteCounters(); - auto settings = TKikimrSettings() - .SetWithSampleTables(false); + auto settings = TKikimrSettings().SetWithSampleTables(false); TKikimrRunner kikimr(settings); TLocalHelper(kikimr).CreateTestOlapTable(); - Tests::NCommon::TLoggerInit(kikimr).SetComponents({NKikimrServices::TX_COLUMNSHARD}, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + Tests::NCommon::TLoggerInit(kikimr) + .SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS") + .SetPriority(NActors::NLog::PRI_DEBUG) + .Initialize(); auto tableClient = kikimr.GetTableClient(); - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); - } - while (csController->GetInsertStartedCounter().Val() == 0) { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + while (csController->GetCompactionStartedCounter().Val() == 0) { Cout << "Wait indexation..." << Endl; Sleep(TDuration::Seconds(2)); } - while (!Singleton()->GetWritesCount() || !csController->GetIndexWriteControllerBrokeCount().Val()) { - Cout << "Wait errors on write... " << Singleton()->GetWritesCount() << "/" << csController->GetIndexWriteControllerBrokeCount().Val() << Endl; + while (!Singleton()->GetWritesCount() || + !csController->GetIndexWriteControllerBrokeCount().Val()) { + Cout << "Wait errors on write... " << Singleton()->GetWritesCount() << "/" + << csController->GetIndexWriteControllerBrokeCount().Val() << Endl; Sleep(TDuration::Seconds(2)); } csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); const auto startInstant = TMonotonic::Now(); - while (Singleton()->GetSize() && TMonotonic::Now() - startInstant < TDuration::Seconds(200)) { + while (Singleton()->GetSize() && + TMonotonic::Now() - startInstant < TDuration::Seconds(200)) { Cerr << "Waiting empty... " << Singleton()->GetSize() << Endl; Sleep(TDuration::Seconds(2)); } @@ -57,7 +64,10 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { auto settings = TKikimrSettings().SetWithSampleTables(false); TKikimrRunner kikimr(settings); TLocalHelper(kikimr).CreateTestOlapTable(); - Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + Tests::NCommon::TLoggerInit(kikimr) + .SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS") + .SetPriority(NActors::NLog::PRI_DEBUG) + .Initialize(); auto tableClient = kikimr.GetTableClient(); WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); @@ -69,41 +79,48 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { } Y_UNIT_TEST(TierDraftsGCWithRestart) { - auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetSmallSizeDetector(1000000); csController->SetIndexWriteControllerEnabled(false); csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1000)); csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); Singleton()->ResetWriteCounters(); - auto settings = TKikimrSettings() - .SetWithSampleTables(false); + auto settings = TKikimrSettings().SetWithSampleTables(false); TKikimrRunner kikimr(settings); TLocalHelper(kikimr).CreateTestOlapTable(); - Tests::NCommon::TLoggerInit(kikimr).SetComponents({NKikimrServices::TX_COLUMNSHARD}, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + Tests::NCommon::TLoggerInit(kikimr) + .SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS") + .SetPriority(NActors::NLog::PRI_DEBUG) + .Initialize(); auto tableClient = kikimr.GetTableClient(); - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); - } - while (csController->GetInsertStartedCounter().Val() == 0) { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + + while (csController->GetCompactionStartedCounter().Val() == 0) { Cout << "Wait indexation..." << Endl; Sleep(TDuration::Seconds(2)); } - while (Singleton()->GetWritesCount() < 20 || !csController->GetIndexWriteControllerBrokeCount().Val()) { - Cout << "Wait errors on write... " << Singleton()->GetWritesCount() << "/" << csController->GetIndexWriteControllerBrokeCount().Val() << Endl; + while (Singleton()->GetWritesCount() < 20 || + !csController->GetIndexWriteControllerBrokeCount().Val()) { + Cout << "Wait errors on write... " << Singleton()->GetWritesCount() << "/" + << csController->GetIndexWriteControllerBrokeCount().Val() << Endl; Sleep(TDuration::Seconds(2)); } csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + csController->WaitCompactions(TDuration::Seconds(5)); AFL_VERIFY(Singleton()->GetSize()); { const auto startInstant = TMonotonic::Now(); AFL_VERIFY(Singleton()->GetDeletesCount() == 0) ("count", Singleton()->GetDeletesCount()); - while (Singleton()->GetSize() && TMonotonic::Now() - startInstant < TDuration::Seconds(200)) { + while (Singleton()->GetSize() && + TMonotonic::Now() - startInstant < TDuration::Seconds(200)) { for (auto&& i : csController->GetShardActualIds()) { - kikimr.GetTestServer().GetRuntime()->Send(MakePipePerNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( - new TEvents::TEvPoisonPill(), i, false)); + kikimr.GetTestServer().GetRuntime()->Send(MakePipePerNodeCacheID(false), NActors::TActorId(), + new TEvPipeCache::TEvForward(new TEvents::TEvPoisonPill(), i, false)); } csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); Cerr << "Waiting empty... " << Singleton()->GetSize() << Endl; @@ -118,17 +135,18 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { const auto startInstant = TMonotonic::Now(); while (TMonotonic::Now() - startInstant < TDuration::Seconds(10)) { for (auto&& i : csController->GetShardActualIds()) { - kikimr.GetTestServer().GetRuntime()->Send(MakePipePerNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( - new TEvents::TEvPoisonPill(), i, false)); + kikimr.GetTestServer().GetRuntime()->Send(MakePipePerNodeCacheID(false), NActors::TActorId(), + new TEvPipeCache::TEvForward(new TEvents::TEvPoisonPill(), i, false)); } - Cerr << "Waiting empty... " << Singleton()->GetWritesCount() << "/" << Singleton()->GetDeletesCount() << Endl; + Cerr << "Waiting empty... " << Singleton()->GetWritesCount() << "/" + << Singleton()->GetDeletesCount() << Endl; Sleep(TDuration::MilliSeconds(500)); } } AFL_VERIFY(writesCountStart == Singleton()->GetWritesCount()) - ("writes", writesCountStart)("count", Singleton()->GetWritesCount()); + ("writes", writesCountStart)("count", Singleton()->GetWritesCount()); AFL_VERIFY(deletesCountStart == Singleton()->GetDeletesCount()) - ("deletes", deletesCountStart)("count", Singleton()->GetDeletesCount()); + ("deletes", deletesCountStart)("count", Singleton()->GetDeletesCount()); } Y_UNIT_TEST(DefaultValues) { @@ -137,7 +155,9 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { Tests::NCommon::TLoggerInit(kikimr).Initialize(); TTypedLocalHelper helper("Utf8", kikimr); helper.CreateTestOlapTable(); - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED`=`true`, `DEFAULT_VALUE`=`abcde`);"); + helper.ExecuteSchemeQuery( + "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED`=`true`, " + "`DEFAULT_VALUE`=`abcde`);"); helper.FillPKOnly(0, 800000); auto selectQuery = TString(R"( @@ -153,7 +173,8 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { } Y_UNIT_TEST(WriteDeleteCleanGC) { - auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetSmallSizeDetector(1000000); csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::MilliSeconds(100)); csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); Singleton()->ResetWriteCounters(); @@ -164,51 +185,61 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { auto settings = TKikimrSettings().SetAppConfig(appConfig).SetWithSampleTables(false); TKikimrRunner kikimr(settings); TLocalHelper(kikimr).CreateTestOlapTable(); - Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD, NKikimrServices::TX_COLUMNSHARD_BLOBS }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + Tests::NCommon::TLoggerInit(kikimr) + .SetComponents({ NKikimrServices::TX_COLUMNSHARD, NKikimrServices::TX_COLUMNSHARD_BLOBS }, "CS") + .SetPriority(NActors::NLog::PRI_DEBUG) + .Initialize(); auto tableClient = kikimr.GetTableClient(); auto client = kikimr.GetQueryClient(); { - auto it = client.ExecuteQuery(R"( + auto it = client + .ExecuteQuery(R"( INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:00Z'), 'a', '0'); INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:01Z'), 'a', 'test'); INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:02Z'), 'a', 't'); - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", + NYdb::NQuery::TTxControl::BeginTx().CommitTx()) + .ExtractValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } - while (csController->GetInsertStartedCounter().Val() == 0) { + while (csController->GetCompactionStartedCounter().Val() == 0) { Cerr << "Wait indexation..." << Endl; Sleep(TDuration::Seconds(2)); } { const TInstant start = TInstant::Now(); - while (!Singleton()->GetSize() && TInstant::Now() - start < TDuration::Seconds(10)) { + while ( + !Singleton()->GetSize() && TInstant::Now() - start < TDuration::Seconds(10)) { Cerr << "Wait size in memory... " << Singleton()->GetSize() << Endl; Sleep(TDuration::Seconds(2)); } AFL_VERIFY(Singleton()->GetSize()); } { - auto it = client.ExecuteQuery(R"( + auto it = client + .ExecuteQuery(R"( DELETE FROM `/Root/olapStore/olapTable` ON SELECT CAST(0u AS Timestamp) AS timestamp, Unwrap(CAST('a' AS Utf8)) AS uid; DELETE FROM `/Root/olapStore/olapTable`; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", + NYdb::NQuery::TTxControl::BeginTx().CommitTx()) + .ExtractValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } csController->SetOverrideReadTimeoutClean(TDuration::Zero()); csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); { const TInstant start = TInstant::Now(); - while (Singleton()->GetSize() && TInstant::Now() - start < TDuration::Seconds(10)) { + while ( + Singleton()->GetSize() && TInstant::Now() - start < TDuration::Seconds(10)) { Cerr << "Wait empty... " << Singleton()->GetSize() << Endl; Sleep(TDuration::Seconds(2)); } AFL_VERIFY(!Singleton()->GetSize()); } } - } -} // namespace +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp index f39e02b2f6ba..ffd68aa2dbb7 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp @@ -7632,11 +7632,6 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { testHelper.BulkUpsert(testTable, tableInserter); } - while (csController->GetInsertFinishedCounter().Val() == 0) { - Cout << "Wait indexation..." << Endl; - Sleep(TDuration::Seconds(2)); - } - // const auto ruleName = testHelper.CreateTieringRule("tier1", "created_att"); const auto ruleName = testHelper.CreateTieringRule("tier1", "created_at"); testHelper.SetTiering(tableName, ruleName); diff --git a/ydb/core/protos/feature_flags.proto b/ydb/core/protos/feature_flags.proto index 028bfe58f295..13532f06472b 100644 --- a/ydb/core/protos/feature_flags.proto +++ b/ydb/core/protos/feature_flags.proto @@ -165,4 +165,5 @@ message TFeatureFlags { optional bool EnableImmediateWritingOnBulkUpsert = 146 [default = false]; optional bool EnableInsertWriteIdSpecialColumnCompatibility = 147 [default = false]; optional bool EnableTopicAutopartitioningForCDC = 148 [default = false]; + optional bool EnableWritePortionsOnInsert = 149 [default = false]; } diff --git a/ydb/core/protos/tx_columnshard.proto b/ydb/core/protos/tx_columnshard.proto index 1875cca96112..6bf4e752f6a7 100644 --- a/ydb/core/protos/tx_columnshard.proto +++ b/ydb/core/protos/tx_columnshard.proto @@ -352,4 +352,6 @@ message TEvReadBlobRangesResult { message TInternalOperationData { repeated uint64 InternalWriteIds = 1; optional uint32 ModificationType = 2; + optional uint64 PathId = 3; + optional bool WritePortions = 4; } diff --git a/ydb/core/testlib/test_client.h b/ydb/core/testlib/test_client.h index db3a73415ed5..033d5fe339ab 100644 --- a/ydb/core/testlib/test_client.h +++ b/ydb/core/testlib/test_client.h @@ -258,6 +258,8 @@ namespace Tests { AppConfig->MutableHiveConfig()->SetObjectImbalanceToBalance(100); AppConfig->MutableColumnShardConfig()->SetDisabledOnSchemeShard(false); FeatureFlags.SetEnableSeparationComputeActorsFromRead(true); + FeatureFlags.SetEnableImmediateWritingOnBulkUpsert(true); + FeatureFlags.SetEnableWritePortionsOnInsert(true); } TServerSettings(const TServerSettings& settings) = default; diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_blobs_written.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_blobs_written.cpp new file mode 100644 index 000000000000..9e6c7738b8b1 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_blobs_written.cpp @@ -0,0 +1,129 @@ +#include "tx_blobs_written.h" + +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +bool TTxBlobsWritingFinished::DoExecute(TTransactionContext& txc, const TActorContext&) { + TMemoryProfileGuard mpg("TTxBlobsWritingFinished::Execute"); + txc.DB.NoMoreReadsForTx(); + CommitSnapshot = NOlap::TSnapshot::MaxForPlanStep(Self->GetOutdatedStep()); + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "execute"); + ACFL_DEBUG("event", "start_execute"); + auto& index = Self->MutableIndexAs(); + for (auto&& pack : Packs) { + const auto& writeMeta = pack.GetWriteMeta(); + AFL_VERIFY(Self->TablesManager.IsReadyForWrite(writeMeta.GetTableId())); + AFL_VERIFY(!writeMeta.HasLongTxId()); + auto operation = Self->OperationsManager->GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); + Y_ABORT_UNLESS(operation->GetStatus() == EOperationStatus::Started); + auto& granule = index.MutableGranuleVerified(operation->GetPathId()); + for (auto&& portion : pack.MutablePortions()) { + if (operation->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + static TAtomicCounter Counter = 0; + portion.GetPortionInfoConstructor()->SetInsertWriteId((TInsertWriteId)Counter.Inc()); + } else { + portion.GetPortionInfoConstructor()->SetInsertWriteId(Self->InsertTable->BuildNextWriteId(txc)); + } + pack.AddInsertWriteId(portion.GetPortionInfoConstructor()->GetInsertWriteIdVerified()); + portion.Finalize(Self, txc); + if (operation->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + granule.CommitImmediateOnExecute(txc, *CommitSnapshot, portion.GetPortionInfo()); + } else { + granule.InsertPortionOnExecute(txc, portion.GetPortionInfo()); + } + } + } + + NOlap::TBlobManagerDb blobManagerDb(txc.DB); + if (WritingActions) { + WritingActions->OnExecuteTxAfterWrite(*Self, blobManagerDb, true); + } + std::set operationIds; + for (auto&& pack : Packs) { + const auto& writeMeta = pack.GetWriteMeta(); + auto operation = Self->OperationsManager->GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); + if (!operationIds.emplace(operation->GetWriteId()).second) { + continue; + } + Y_ABORT_UNLESS(operation->GetStatus() == EOperationStatus::Started); + operation->OnWriteFinish(txc, pack.GetInsertWriteIds(), operation->GetBehaviour() == EOperationBehaviour::NoTxWrite); + Self->OperationsManager->LinkInsertWriteIdToOperationWriteId(pack.GetInsertWriteIds(), operation->GetWriteId()); + if (operation->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + auto ev = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID()); + Results.emplace_back(std::move(ev), writeMeta.GetSource(), operation->GetCookie()); + } else { + auto& info = Self->OperationsManager->GetLockVerified(operation->GetLockId()); + NKikimrDataEvents::TLock lock; + lock.SetLockId(operation->GetLockId()); + lock.SetDataShard(Self->TabletID()); + lock.SetGeneration(info.GetGeneration()); + lock.SetCounter(info.GetInternalGenerationCounter()); + lock.SetPathId(writeMeta.GetTableId()); + auto ev = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID(), operation->GetLockId(), lock); + Results.emplace_back(std::move(ev), writeMeta.GetSource(), operation->GetCookie()); + } + } + return true; +} + +void TTxBlobsWritingFinished::DoComplete(const TActorContext& ctx) { + TMemoryProfileGuard mpg("TTxBlobsWritingFinished::Complete"); + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "complete"); + const auto now = TMonotonic::Now(); + if (WritingActions) { + WritingActions->OnCompleteTxAfterWrite(*Self, true); + } + + for (auto&& i : Results) { + i.DoSendReply(ctx); + } + auto& index = Self->MutableIndexAs(); + for (auto&& pack : Packs) { + const auto& writeMeta = pack.GetWriteMeta(); + AFL_VERIFY(!writeMeta.HasLongTxId()); + auto op = Self->GetOperationsManager().GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); + auto& granule = index.MutableGranuleVerified(op->GetPathId()); + for (auto&& portion : pack.GetPortions()) { + if (op->GetBehaviour() == EOperationBehaviour::WriteWithLock || op->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + if (op->GetBehaviour() != EOperationBehaviour::NoTxWrite || Self->GetOperationsManager().HasReadLocks(writeMeta.GetTableId())) { + auto evWrite = std::make_shared( + writeMeta.GetTableId(), portion.GetPKBatch(), Self->GetIndexOptional()->GetVersionedIndex().GetPrimaryKey()); + Self->GetOperationsManager().AddEventForLock(*Self, op->GetLockId(), evWrite); + } + } + if (op->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + AFL_VERIFY(CommitSnapshot); + granule.CommitImmediateOnComplete(portion.GetPortionInfo(), index); + } else { + granule.InsertPortionOnComplete(portion.GetPortionInfo()); + } + } + Self->Counters.GetCSCounters().OnWriteTxComplete(now - writeMeta.GetWriteStartInstant()); + Self->Counters.GetCSCounters().OnSuccessWriteResponse(); + } + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_IMMEDIATE_TX_COMPLETED); + Self->SetupCompaction(); +} + +TTxBlobsWritingFinished::TTxBlobsWritingFinished(TColumnShard* self, const NKikimrProto::EReplyStatus writeStatus, + const std::shared_ptr& writingActions, std::vector&& packs, + const std::vector& fails) + : TBase(self, "TTxBlobsWritingFinished") + , PutBlobResult(writeStatus) + , Packs(std::move(packs)) + , WritingActions(writingActions) { + Y_UNUSED(PutBlobResult); + for (auto&& i : fails) { + auto ev = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID()); + auto op = Self->GetOperationsManager().GetOperationVerified((TOperationWriteId)i.GetWriteMeta().GetWriteId()); + Results.emplace_back(std::move(ev), i.GetWriteMeta().GetSource(), op->GetCookie()); + } +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_blobs_written.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_blobs_written.h new file mode 100644 index 000000000000..531b86385933 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_blobs_written.h @@ -0,0 +1,55 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TColumnShard; + +class TTxBlobsWritingFinished: public NOlap::NDataSharing::TExtendedTransactionBase { +private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const NKikimrProto::EReplyStatus PutBlobResult; + std::vector Packs; + const std::shared_ptr WritingActions; + std::optional CommitSnapshot; + + class TReplyInfo { + private: + std::unique_ptr Event; + TActorId DestinationForReply; + const ui64 Cookie; + + public: + TReplyInfo(std::unique_ptr&& ev, const TActorId& destinationForReply, const ui64 cookie) + : Event(std::move(ev)) + , DestinationForReply(destinationForReply) + , Cookie(cookie) { + } + + void DoSendReply(const TActorContext& ctx) { + ctx.Send(DestinationForReply, Event.release(), 0, Cookie); + } + }; + + std::vector Results; + +public: + TTxBlobsWritingFinished(TColumnShard* self, const NKikimrProto::EReplyStatus writeStatus, + const std::shared_ptr& writingActions, std::vector&& packs, + const std::vector& fails); + + virtual bool DoExecute(TTransactionContext& txc, const TActorContext& ctx) override; + virtual void DoComplete(const TActorContext& ctx) override; + TTxType GetTxType() const override { + return TXTYPE_WRITE; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp index 5b66a0587b5e..4bd2b6faf9c3 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp @@ -33,7 +33,7 @@ bool TTxWrite::CommitOneBlob(TTransactionContext& txc, const NOlap::TWideSeriali return true; } -bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { +bool TTxWrite::DoExecute(TTransactionContext& txc, const TActorContext&) { CommitSnapshot = NOlap::TSnapshot::MaxForPlanStep(Self->GetOutdatedStep()); TMemoryProfileGuard mpg("TTxWrite::Execute"); NActors::TLogContextGuard logGuard = @@ -118,7 +118,7 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { return true; } -void TTxWrite::Complete(const TActorContext& ctx) { +void TTxWrite::DoComplete(const TActorContext& ctx) { TMemoryProfileGuard mpg("TTxWrite::Complete"); NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "complete"); @@ -131,10 +131,7 @@ void TTxWrite::Complete(const TActorContext& ctx) { i->OnCompleteTxAfterRemoving(true); } - AFL_VERIFY(buffer.GetAggregations().size() == Results.size() + ResultOperators.size()); - for (auto&& i : ResultOperators) { - Self->GetProgressTxController().FinishProposeOnComplete(i->GetTxId(), ctx); - } + AFL_VERIFY(buffer.GetAggregations().size() == Results.size()); for (auto&& i : Results) { i.DoSendReply(ctx); } diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h index 6171f2c87a04..aa626c9ea8ed 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h @@ -1,24 +1,28 @@ #pragma once #include +#include #include namespace NKikimr::NColumnShard { -class TTxWrite : public NTabletFlatExecutor::TTransactionBase { +class TTxWrite: public NOlap::NDataSharing::TExtendedTransactionBase { +private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + public: TTxWrite(TColumnShard* self, const TEvPrivate::TEvWriteBlobsResult::TPtr& putBlobResult) - : NTabletFlatExecutor::TTransactionBase(self) - , PutBlobResult(putBlobResult) - , TabletTxNo(++Self->TabletTxCounter) - {} + : TBase(self, "TTxWrite") + , PutBlobResult(putBlobResult) { + } - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - void Complete(const TActorContext& ctx) override; - TTxType GetTxType() const override { return TXTYPE_WRITE; } + bool DoExecute(TTransactionContext& txc, const TActorContext& ctx) override; + void DoComplete(const TActorContext& ctx) override; + TTxType GetTxType() const override { + return TXTYPE_WRITE; + } private: TEvPrivate::TEvWriteBlobsResult::TPtr PutBlobResult; - const ui32 TabletTxNo; std::optional CommitSnapshot; bool CommitOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TInsertWriteId writeId); @@ -29,13 +33,12 @@ class TTxWrite : public NTabletFlatExecutor::TTransactionBase { std::unique_ptr Event; TActorId DestinationForReply; const ui64 Cookie; + public: TReplyInfo(std::unique_ptr&& ev, const TActorId& destinationForReply, const ui64 cookie) : Event(std::move(ev)) , DestinationForReply(destinationForReply) - , Cookie(cookie) - { - + , Cookie(cookie) { } void DoSendReply(const TActorContext& ctx) { @@ -44,17 +47,6 @@ class TTxWrite : public NTabletFlatExecutor::TTransactionBase { }; std::vector Results; - std::vector> ResultOperators; - - - TStringBuilder TxPrefix() const { - return TStringBuilder() << "TxWrite[" << ToString(TabletTxNo) << "] "; - } - - TString TxSuffix() const { - return TStringBuilder() << " at tablet " << Self->TabletID(); - } }; - -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/ya.make b/ydb/core/tx/columnshard/blobs_action/transaction/ya.make index c78e93ef3b7e..405f0e3f9ebd 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/ya.make +++ b/ydb/core/tx/columnshard/blobs_action/transaction/ya.make @@ -7,6 +7,7 @@ SRCS( tx_gc_insert_table.cpp tx_gc_indexed.cpp tx_remove_blobs.cpp + tx_blobs_written.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/columnshard__write.cpp b/ydb/core/tx/columnshard/columnshard__write.cpp index 981443f26eee..b4841f72f7b6 100644 --- a/ydb/core/tx/columnshard/columnshard__write.cpp +++ b/ydb/core/tx/columnshard/columnshard__write.cpp @@ -1,5 +1,6 @@ #include "columnshard_impl.h" +#include "blobs_action/transaction/tx_blobs_written.h" #include "blobs_action/transaction/tx_draft.h" #include "blobs_action/transaction/tx_write.h" #include "common/limits.h" @@ -19,8 +20,8 @@ namespace NKikimr::NColumnShard { using namespace NTabletFlatExecutor; -void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteMeta& writeMeta, const ui64 writeSize, const ui64 cookie, - std::unique_ptr&& event, const TActorContext& ctx) { +void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteMeta& writeMeta, const ui64 writeSize, + const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx) { Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); switch (overloadReason) { case EOverloadStatus::Disk: @@ -51,12 +52,12 @@ void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const ctx.Send(writeMeta.GetSource(), event.release(), 0, cookie); } -TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) const { +TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 pathId) const { if (IsAnyChannelYellowStop()) { return EOverloadStatus::Disk; } - if (InsertTable && InsertTable->IsOverloadedByCommitted(tableId)) { + if (InsertTable && InsertTable->IsOverloadedByCommitted(pathId)) { return EOverloadStatus::InsertTable; } @@ -86,6 +87,23 @@ TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) return EOverloadStatus::None; } +void TColumnShard::Handle(NPrivateEvents::NWrite::TEvWritePortionResult::TPtr& ev, const TActorContext& ctx) { + NActors::TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWritePortionResult"); + AFL_VERIFY(ev->Get()->GetWriteStatus() == NKikimrProto::OK); + std::vector writtenPacks = ev->Get()->DetachInsertedPacks(); + std::vector fails = ev->Get()->DetachFails(); + for (auto&& i : writtenPacks) { + Counters.GetWritesMonitor()->OnFinishWrite(i.GetDataSize(), 1); + } + for (auto&& i : fails) { + Counters.GetWritesMonitor()->OnFinishWrite(i.GetDataSize(), 1); + } + Execute( + new TTxBlobsWritingFinished(this, ev->Get()->GetWriteStatus(), ev->Get()->GetWriteAction(), std::move(writtenPacks), std::move(fails)), + ctx); +} + void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActorContext& ctx) { NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWriteBlobsResult"); @@ -162,20 +180,20 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex Counters.GetCSCounters().OnStartWriteRequest(); const auto& record = Proto(ev->Get()); - const ui64 tableId = record.GetTableId(); + const ui64 pathId = record.GetTableId(); const ui64 writeId = record.GetWriteId(); const ui64 cookie = ev->Cookie; const TString dedupId = record.GetDedupId(); const auto source = ev->Sender; - Counters.GetColumnTablesCounters()->GetPathIdCounter(tableId)->OnWriteEvent(); + Counters.GetColumnTablesCounters()->GetPathIdCounter(pathId)->OnWriteEvent(); std::optional granuleShardingVersion; if (record.HasGranuleShardingVersion()) { granuleShardingVersion = record.GetGranuleShardingVersion(); } - NEvWrite::TWriteMeta writeMeta(writeId, tableId, source, granuleShardingVersion); + NEvWrite::TWriteMeta writeMeta(writeId, pathId, source, granuleShardingVersion); if (record.HasModificationType()) { writeMeta.SetModificationType(TEnumOperator::DeserializeFromProto(record.GetModificationType())); } @@ -197,7 +215,7 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::Disabled); } - if (!TablesManager.IsReadyForWrite(tableId)) { + if (!TablesManager.IsReadyForWrite(pathId)) { LOG_S_NOTICE("Write (fail) into pathId:" << writeMeta.GetTableId() << (TablesManager.HasPrimaryIndex() ? "" : " no index") << " at tablet " << TabletID()); @@ -235,8 +253,8 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex } NEvWrite::TWriteData writeData(writeMeta, arrowData, snapshotSchema->GetIndexInfo().GetReplaceKey(), - StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING)); - auto overloadStatus = CheckOverloaded(tableId); + StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING), false); + auto overloadStatus = CheckOverloaded(pathId); if (overloadStatus != EOverloadStatus::None) { std::unique_ptr result = std::make_unique( TabletID(), writeData.GetWriteMeta(), NKikimrTxColumnShard::EResultStatus::OVERLOADED); @@ -262,8 +280,11 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex << (writeMeta.GetWriteId() ? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : " ") << Counters.GetWritesMonitor()->DebugString() << " at tablet " << TabletID()); writeData.MutableWriteMeta().SetWriteMiddle1StartInstant(TMonotonic::Now()); + + NOlap::TWritingContext context(TabletID(), SelfId(), snapshotSchema, StoragesManager, + Counters.GetIndexationCounters().SplitterCounters, Counters.GetCSCounters().WritingCounters); std::shared_ptr task = std::make_shared( - TabletID(), SelfId(), BufferizationWriteActorId, std::move(writeData), snapshotSchema, GetLastTxSnapshot(), Counters.GetCSCounters().WritingCounters); + BufferizationWriteActorId, std::move(writeData), GetLastTxSnapshot(), context); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); } } @@ -333,8 +354,8 @@ class TCommitOperation { return std::make_unique( TFullTxInfo::BuildFake(kind), LockId, ReceivingShards, SendingShards); } else { - return std::make_unique(TFullTxInfo::BuildFake(kind), LockId, - ArbiterColumnShard, ReceivingShards.contains(TabletId)); + return std::make_unique( + TFullTxInfo::BuildFake(kind), LockId, ArbiterColumnShard, ReceivingShards.contains(TabletId)); } } @@ -433,7 +454,7 @@ void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActor const auto source = ev->Sender; const auto cookie = ev->Cookie; const auto behaviourConclusion = TOperationsManager::GetBehaviour(*ev->Get()); -// AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("ev_write", record.DebugString()); + // AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("ev_write", record.DebugString()); if (behaviourConclusion.IsFail()) { Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, @@ -521,9 +542,9 @@ void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActor return; } - const auto tableId = operation.GetTableId().GetTableId(); + const auto pathId = operation.GetTableId().GetTableId(); - if (!TablesManager.IsReadyForWrite(tableId)) { + if (!TablesManager.IsReadyForWrite(pathId)) { Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, "table not writable"); @@ -539,11 +560,11 @@ void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActor ctx.Send(source, result.release(), 0, cookie); } - auto overloadStatus = CheckOverloaded(tableId); + auto overloadStatus = CheckOverloaded(pathId); if (overloadStatus != EOverloadStatus::None) { std::unique_ptr result = NEvents::TDataEvents::TEvWriteResult::BuildError( TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED, "overload data error"); - OverloadWriteFail(overloadStatus, NEvWrite::TWriteMeta(0, tableId, source, {}), arrowData->GetSize(), cookie, std::move(result), ctx); + OverloadWriteFail(overloadStatus, NEvWrite::TWriteMeta(0, pathId, source, {}), arrowData->GetSize(), cookie, std::move(result), ctx); return; } @@ -562,10 +583,15 @@ void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActor } OperationsManager->RegisterLock(lockId, Generation()); - auto writeOperation = OperationsManager->RegisterOperation(lockId, cookie, granuleShardingVersionId, *mType); + auto writeOperation = OperationsManager->RegisterOperation( + pathId, lockId, cookie, granuleShardingVersionId, *mType, AppDataVerified().FeatureFlags.GetEnableWritePortionsOnInsert()); Y_ABORT_UNLESS(writeOperation); writeOperation->SetBehaviour(behaviour); - writeOperation->Start(*this, tableId, arrowData, source, schema, ctx); + NOlap::TWritingContext wContext( + pathId, SelfId(), schema, StoragesManager, Counters.GetIndexationCounters().SplitterCounters, + Counters.GetCSCounters().WritingCounters); + arrowData->SetSeparationPoints(GetIndexAs().GetGranulePtrVerified(pathId)->GetBucketPositions()); + writeOperation->Start(*this, arrowData, source, wContext); } } // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h index aa6ef920ece7..16ceed2681cc 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.h +++ b/ydb/core/tx/columnshard/columnshard_impl.h @@ -1,35 +1,33 @@ #pragma once -#include "defs.h" #include "background_controller.h" -#include "counters.h" #include "columnshard.h" -#include "columnshard_ttl.h" #include "columnshard_private_events.h" +#include "columnshard_ttl.h" +#include "counters.h" +#include "defs.h" +#include "inflight_request_tracker.h" #include "tables_manager.h" -#include "blobs_action/events/delete_blobs.h" #include "bg_tasks/events/local.h" -#include "transactions/tx_controller.h" -#include "inflight_request_tracker.h" +#include "blobs_action/events/delete_blobs.h" #include "counters/columnshard.h" #include "counters/counters_manager.h" -#include "resource_subscriber/counters.h" -#include "resource_subscriber/task.h" -#include "normalizer/abstract/abstract.h" -#include "operations/manager.h" - -#include "export/events/events.h" - +#include "data_sharing/common/transactions/tx_extension.h" #include "data_sharing/destination/events/control.h" -#include "data_sharing/source/events/control.h" #include "data_sharing/destination/events/transfer.h" -#include "data_sharing/source/events/transfer.h" #include "data_sharing/manager/sessions.h" #include "data_sharing/manager/shared_blobs.h" -#include "data_sharing/common/transactions/tx_extension.h" #include "data_sharing/modification/events/change_owning.h" - +#include "data_sharing/source/events/control.h" +#include "data_sharing/source/events/transfer.h" +#include "export/events/events.h" +#include "normalizer/abstract/abstract.h" +#include "operations/events.h" +#include "operations/manager.h" +#include "resource_subscriber/counters.h" +#include "resource_subscriber/task.h" #include "subscriber/abstract/manager/manager.h" +#include "transactions/tx_controller.h" #include #include @@ -38,12 +36,13 @@ #include #include #include +#include #include #include #include -#include -#include + #include +#include namespace NKikimr::NOlap { class TCleanupPortionsColumnEngineChanges; @@ -60,14 +59,14 @@ class TTxInternalScan; namespace NPlain { class TIndexScannerConstructor; } -} +} // namespace NReader namespace NDataSharing { class TTxDataFromSource; class TTxDataAckToSource; class TTxFinishAckToSource; class TTxFinishAckFromInitiator; -} +} // namespace NDataSharing namespace NBackground { class TSessionsManager; @@ -77,15 +76,15 @@ namespace NBlobOperations { namespace NBlobStorage { class TWriteAction; class TOperator; -} +} // namespace NBlobStorage namespace NTier { class TOperator; } -} +} // namespace NBlobOperations namespace NCompaction { class TGeneralCompactColumnEngineChanges; } -} +} // namespace NKikimr::NOlap namespace NKikimr::NColumnShard { @@ -96,6 +95,7 @@ class TTxInsertTableCleanup; class TTxRemoveSharedBlobs; class TOperationsManager; class TWaitEraseTablesTxSubscriber; +class TTxBlobsWritingFinished; extern bool gAllowLogBatchingDefaultValue; @@ -121,8 +121,8 @@ struct TSettings { TSettings() : BlobWriteGrouppingEnabled(1, 0, 1) , CacheDataAfterIndexing(1, 0, 1) - , CacheDataAfterCompaction(1, 0, 1) - {} + , CacheDataAfterCompaction(1, 0, 1) { + } void RegisterControls(TControlBoard& icb) { icb.RegisterSharedControl(BlobWriteGrouppingEnabled, "ColumnShardControls.BlobWriteGrouppingEnabled"); @@ -136,10 +136,7 @@ using ITransaction = NTabletFlatExecutor::ITransaction; template using TTransactionBase = NTabletFlatExecutor::TTransactionBase; -class TColumnShard - : public TActor - , public NTabletFlatExecutor::TTabletExecutedFlat -{ +class TColumnShard: public TActor, public NTabletFlatExecutor::TTabletExecutedFlat { friend class TEvWriteCommitSecondaryTransactionOperator; friend class TEvWriteCommitPrimaryTransactionOperator; friend class TTxInsertTableCleanup; @@ -150,6 +147,7 @@ class TColumnShard friend class TTxNotifyTxCompletion; friend class TTxPlanStep; friend class TTxWrite; + friend class TTxBlobsWritingFinished; friend class TTxReadBase; friend class TTxRead; friend class TTxWriteIndex; @@ -199,7 +197,6 @@ class TColumnShard friend class IProposeTxOperator; friend class TSharingTransactionOperator; - class TTxProgressTx; class TTxProposeCancel; // proto @@ -220,8 +217,10 @@ class TColumnShard void Handle(TEvMediatorTimecast::TEvRegisterTabletResult::TPtr& ev, const TActorContext& ctx); void Handle(TEvMediatorTimecast::TEvNotifyPlanStep::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPrivate::TEvScanStats::TPtr &ev, const TActorContext &ctx); - void Handle(TEvPrivate::TEvReadFinished::TPtr &ev, const TActorContext &ctx); + void Handle(NPrivateEvents::NWrite::TEvWritePortionResult::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPrivate::TEvScanStats::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx); void Handle(NActors::TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvPingSnapshotsUsage::TPtr& ev, const TActorContext& ctx); @@ -323,14 +322,17 @@ class TColumnShard return TRowVersion(LastCompletedTx.GetPlanStep(), LastCompletedTx.GetTxId()); } - ui32 Generation() const { return Executor()->Generation(); } + ui32 Generation() const { + return Executor()->Generation(); + } bool IsUserTable(const TTableId&) const { return true; } private: - void OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteMeta& writeMeta, const ui64 writeSize, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx); + void OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteMeta& writeMeta, const ui64 writeSize, const ui64 cookie, + std::unique_ptr&& event, const TActorContext& ctx); EOverloadStatus CheckOverloaded(const ui64 tableId) const; protected: @@ -343,17 +345,17 @@ class TColumnShard TRACE_EVENT(NKikimrServices::TX_COLUMNSHARD); switch (ev->GetTypeRewrite()) { HFunc(TEvTablet::TEvTabletDead, HandleTabletDead); - default: - LOG_S_WARN("TColumnShard.StateBroken at " << TabletID() - << " unhandled event type: " << ev->GetTypeRewrite() - << " event: " << ev->ToString()); - Send(IEventHandle::ForwardOnNondelivery(std::move(ev), NActors::TEvents::TEvUndelivered::ReasonActorUnknown)); - break; + default: + LOG_S_WARN("TColumnShard.StateBroken at " << TabletID() << " unhandled event type: " << ev->GetTypeRewrite() + << " event: " << ev->ToString()); + Send(IEventHandle::ForwardOnNondelivery(std::move(ev), NActors::TEvents::TEvUndelivered::ReasonActorUnknown)); + break; } } STFUNC(StateWork) { - const TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); + const TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); TRACE_EVENT(NKikimrServices::TX_COLUMNSHARD); switch (ev->GetTypeRewrite()) { hFunc(NMetadata::NProvider::TEvRefreshSubscriberData, Handle); @@ -374,6 +376,8 @@ class TColumnShard HFunc(TEvTxProcessing::TEvPlanStep, Handle); HFunc(TEvColumnShard::TEvWrite, Handle); HFunc(TEvPrivate::TEvWriteBlobsResult, Handle); + HFunc(NPrivateEvents::NWrite::TEvWritePortionResult, Handle); + HFunc(TEvMediatorTimecast::TEvRegisterTabletResult, Handle); HFunc(TEvMediatorTimecast::TEvNotifyPlanStep, Handle); HFunc(TEvPrivate::TEvWriteIndex, Handle); @@ -382,7 +386,7 @@ class TColumnShard HFunc(TEvPrivate::TEvPeriodicWakeup, Handle); HFunc(NActors::TEvents::TEvWakeup, Handle); HFunc(TEvPrivate::TEvPingSnapshotsUsage, Handle); - + HFunc(NEvents::TDataEvents::TEvWrite, Handle); HFunc(TEvPrivate::TEvWriteDraft, Handle); HFunc(TEvPrivate::TEvGarbageCollectionFinished, Handle); @@ -406,13 +410,12 @@ class TColumnShard HFunc(NOlap::NDataSharing::NEvents::TEvFinishedFromSource, Handle); HFunc(NOlap::NDataSharing::NEvents::TEvAckFinishToSource, Handle); HFunc(NOlap::NDataSharing::NEvents::TEvAckFinishFromInitiator, Handle); - default: - if (!HandleDefaultEvents(ev, SelfId())) { - LOG_S_WARN("TColumnShard.StateWork at " << TabletID() - << " unhandled event type: "<< ev->GetTypeRewrite() - << " event: " << ev->ToString()); - } - break; + default: + if (!HandleDefaultEvents(ev, SelfId())) { + LOG_S_WARN("TColumnShard.StateWork at " << TabletID() << " unhandled event type: " << ev->GetTypeRewrite() + << " event: " << ev->ToString()); + } + break; } } @@ -505,9 +508,11 @@ class TColumnShard } TInsertWriteId HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const; - TInsertWriteId GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId); + TInsertWriteId GetLongTxWrite( + NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId); void AddLongTxWrite(const TInsertWriteId writeId, ui64 txId); - void LoadLongTxWrite(const TInsertWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion); + void LoadLongTxWrite(const TInsertWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, + const std::optional granuleShardingVersion); bool RemoveLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId, const ui64 txId); void EnqueueBackgroundActivities(const bool periodic = false); @@ -516,12 +521,17 @@ class TColumnShard void UpdateSchemaSeqNo(const TMessageSeqNo& seqNo, NTabletFlatExecutor::TTransactionContext& txc); void ProtectSchemaSeqNo(const NKikimrTxColumnShard::TSchemaSeqNo& seqNoProto, NTabletFlatExecutor::TTransactionContext& txc); - void RunSchemaTx(const NKikimrTxColumnShard::TSchemaTxBody& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); + void RunSchemaTx( + const NKikimrTxColumnShard::TSchemaTxBody& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); void RunInit(const NKikimrTxColumnShard::TInitShard& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); - void RunEnsureTable(const NKikimrTxColumnShard::TCreateTable& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); - void RunAlterTable(const NKikimrTxColumnShard::TAlterTable& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); - void RunDropTable(const NKikimrTxColumnShard::TDropTable& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); - void RunAlterStore(const NKikimrTxColumnShard::TAlterStore& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); + void RunEnsureTable( + const NKikimrTxColumnShard::TCreateTable& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); + void RunAlterTable( + const NKikimrTxColumnShard::TAlterTable& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); + void RunDropTable( + const NKikimrTxColumnShard::TDropTable& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); + void RunAlterStore( + const NKikimrTxColumnShard::TAlterStore& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); void StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex); void SetupIndexation(); @@ -622,4 +632,4 @@ class TColumnShard TColumnShard(TTabletStorageInfo* info, const TActorId& tablet); }; -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h index cb0e8cd97150..2f7c887a4367 100644 --- a/ydb/core/tx/columnshard/columnshard_private_events.h +++ b/ydb/core/tx/columnshard/columnshard_private_events.h @@ -1,20 +1,27 @@ #pragma once -#include "blobs_action/abstract/gc.h" #include "defs.h" +#include "blobs_action/abstract/gc.h" + +#include #include #include -#include -#include #include +#include +#include #include -#include namespace NKikimr::NOlap::NReader { class IApplyAction; } +namespace NKikimr::NOlap { +class IBlobsWritingAction; +class TPortionInfo; +class TPortionInfoConstructor; +} // namespace NKikimr::NOlap + namespace NKikimr::NColumnShard { struct TEvPrivate { @@ -47,6 +54,7 @@ struct TEvPrivate { EvTaskProcessedResult, EvPingSnapshotsUsage, + EvWritePortionResult, EvEnd }; @@ -67,23 +75,22 @@ struct TEvPrivate { } }; - struct TEvTieringModified: public TEventLocal { - }; + struct TEvTieringModified: public TEventLocal {}; struct TEvWriteDraft: public TEventLocal { const std::shared_ptr WriteController; TEvWriteDraft(std::shared_ptr controller) : WriteController(controller) { - } }; class TEvNormalizerResult: public TEventLocal { NOlap::INormalizerChanges::TPtr Changes; + public: TEvNormalizerResult(NOlap::INormalizerChanges::TPtr changes) - : Changes(changes) - {} + : Changes(changes) { + } NOlap::INormalizerChanges::TPtr GetChanges() const { Y_ABORT_UNLESS(!!Changes); @@ -95,27 +102,24 @@ struct TEvPrivate { const std::shared_ptr Action; TEvGarbageCollectionFinished(const std::shared_ptr& action) : Action(action) { - } }; /// Common event for Indexing and GranuleCompaction: write index data in TTxWriteIndex transaction. - struct TEvWriteIndex : public TEventLocal { + struct TEvWriteIndex: public TEventLocal { std::shared_ptr IndexInfo; std::shared_ptr IndexChanges; - bool GranuleCompaction{false}; + bool GranuleCompaction{ false }; TUsage ResourceUsage; - bool CacheData{false}; + bool CacheData{ false }; TDuration Duration; TBlobPutResult::TPtr PutResult; - TEvWriteIndex(const std::shared_ptr& indexInfo, - std::shared_ptr indexChanges, - bool cacheData) + TEvWriteIndex( + const std::shared_ptr& indexInfo, std::shared_ptr indexChanges, bool cacheData) : IndexInfo(indexInfo) , IndexChanges(indexChanges) - , CacheData(cacheData) - { + , CacheData(cacheData) { PutResult = std::make_shared(NKikimrProto::UNKNOWN); } @@ -135,13 +139,16 @@ struct TEvPrivate { } }; - struct TEvScanStats : public TEventLocal { - TEvScanStats(ui64 rows, ui64 bytes) : Rows(rows), Bytes(bytes) {} + struct TEvScanStats: public TEventLocal { + TEvScanStats(ui64 rows, ui64 bytes) + : Rows(rows) + , Bytes(bytes) { + } ui64 Rows; ui64 Bytes; }; - struct TEvReadFinished : public TEventLocal { + struct TEvReadFinished: public TEventLocal { explicit TEvReadFinished(ui64 requestCookie, ui64 txId = 0) : RequestCookie(requestCookie) , TxId(txId) { @@ -151,10 +158,10 @@ struct TEvPrivate { ui64 TxId; }; - struct TEvPeriodicWakeup : public TEventLocal { + struct TEvPeriodicWakeup: public TEventLocal { TEvPeriodicWakeup(bool manual = false) - : Manual(manual) - {} + : Manual(manual) { + } bool Manual; }; @@ -169,6 +176,7 @@ struct TEvPrivate { Internal, Request }; + private: NColumnShard::TBlobPutResult::TPtr PutResult; NOlap::TWritingBuffer WritesBuffer; @@ -176,7 +184,6 @@ struct TEvPrivate { YDB_ACCESSOR(EErrorClass, ErrorClass, EErrorClass::Internal); public: - NKikimrDataEvents::TEvWriteResult::EStatus GetWriteResultStatus() const { switch (ErrorClass) { case EErrorClass::Internal: @@ -185,7 +192,7 @@ struct TEvPrivate { return NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST; } } - + static std::unique_ptr Error( const NKikimrProto::EReplyStatus status, NOlap::TWritingBuffer&& writesBuffer, const TString& error, const EErrorClass errorClass) { std::unique_ptr result = @@ -197,8 +204,7 @@ struct TEvPrivate { TEvWriteBlobsResult(const NColumnShard::TBlobPutResult::TPtr& putResult, NOlap::TWritingBuffer&& writesBuffer) : PutResult(putResult) - , WritesBuffer(std::move(writesBuffer)) - { + , WritesBuffer(std::move(writesBuffer)) { Y_ABORT_UNLESS(PutResult); } @@ -216,4 +222,4 @@ struct TEvPrivate { }; }; -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/columnshard_schema.h b/ydb/core/tx/columnshard/columnshard_schema.h index 4f08426ddd70..373077613751 100644 --- a/ydb/core/tx/columnshard/columnshard_schema.h +++ b/ydb/core/tx/columnshard/columnshard_schema.h @@ -503,9 +503,15 @@ struct Schema : NIceDb::Schema { struct XTxId: Column<5, NScheme::NTypeIds::Uint64> {}; struct Metadata: Column<6, NScheme::NTypeIds::String> {}; // NKikimrTxColumnShard.TIndexColumnMeta struct ShardingVersion: Column<7, NScheme::NTypeIds::Uint64> {}; + struct MinSnapshotPlanStep: Column<8, NScheme::NTypeIds::Uint64> {}; + struct MinSnapshotTxId: Column<9, NScheme::NTypeIds::Uint64> {}; + struct CommitPlanStep: Column<10, NScheme::NTypeIds::Uint64> {}; + struct CommitTxId: Column<11, NScheme::NTypeIds::Uint64> {}; + struct InsertWriteId: Column<12, NScheme::NTypeIds::Uint64> {}; using TKey = TableKey; - using TColumns = TableColumns; + using TColumns = TableColumns; }; struct BackgroundSessions: Table { diff --git a/ydb/core/tx/columnshard/common/blob.cpp b/ydb/core/tx/columnshard/common/blob.cpp index 6bcf397e339a..e09452c42f6d 100644 --- a/ydb/core/tx/columnshard/common/blob.cpp +++ b/ydb/core/tx/columnshard/common/blob.cpp @@ -136,6 +136,11 @@ NKikimrColumnShardProto::TBlobRange TBlobRange::SerializeToProto() const { return result; } +TString TBlobRange::GetData(const TString& blobData) const { + AFL_VERIFY(Offset + Size <= blobData.size())("offset", Offset)("size", Size)("blobDataSize", blobData.size()); + return blobData.substr(Offset, Size); +} + NKikimr::TConclusionStatus TBlobRangeLink16::DeserializeFromProto(const NKikimrColumnShardProto::TBlobRangeLink16& proto) { BlobIdx = proto.GetBlobIdx(); Offset = proto.GetOffset(); diff --git a/ydb/core/tx/columnshard/common/blob.h b/ydb/core/tx/columnshard/common/blob.h index 9aa06bd3d558..e1c10a46d403 100644 --- a/ydb/core/tx/columnshard/common/blob.h +++ b/ydb/core/tx/columnshard/common/blob.h @@ -21,8 +21,6 @@ class IBlobGroupSelector { virtual ui32 GetGroup(const TLogoBlobID& blobId) const = 0; }; -class TUnifiedBlobId; - class TUnifiedBlobId { // Id of a blob in YDB distributed storage struct TDsBlobId { @@ -191,6 +189,8 @@ struct TBlobRange { ui32 Offset; ui32 Size; + TString GetData(const TString& blobData) const; + bool operator<(const TBlobRange& br) const { if (BlobId != br.BlobId) { return BlobId.GetLogoBlobId().Compare(br.BlobId.GetLogoBlobId()) < 0; diff --git a/ydb/core/tx/columnshard/data_sharing/protos/data.proto b/ydb/core/tx/columnshard/data_sharing/protos/data.proto index 8b376e919946..6ead2d5241e0 100644 --- a/ydb/core/tx/columnshard/data_sharing/protos/data.proto +++ b/ydb/core/tx/columnshard/data_sharing/protos/data.proto @@ -36,6 +36,8 @@ message TPortionInfo { repeated TIndexChunk Indexes = 7; repeated NKikimrColumnShardProto.TUnifiedBlobId BlobIds = 8; optional uint64 SchemaVersion = 9; + optional uint64 InsertWriteId = 10; + optional NKikimrColumnShardProto.TSnapshot CommitSnapshot = 11; } message TPathIdData { diff --git a/ydb/core/tx/columnshard/engines/changes/compaction.cpp b/ydb/core/tx/columnshard/engines/changes/compaction.cpp index 2441ce4248b8..d0bd7e541a1a 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction.cpp @@ -34,7 +34,6 @@ void TCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { THashMap> blobRanges; auto& index = self.GetIndexAs().GetVersionedIndex(); for (const auto& p : SwitchedPortions) { - Y_ABORT_UNLESS(!p.Empty()); p.FillBlobRangesByStorage(blobRanges, index); } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp index 825f65f80106..90b241d3cff7 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp @@ -6,9 +6,10 @@ #include #include +#include + #include #include -#include namespace NKikimr::NOlap::NCompaction { diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp index 2f76ab4b1772..380f6127b457 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp @@ -24,15 +24,20 @@ std::shared_ptr TGeneralCompactColumnEngineChanges::Build } NArrow::TColumnFilter filterDeleted = NArrow::TColumnFilter::BuildAllowFilter(); if (pInfo.GetMeta().GetDeletionsCount()) { - auto table = batch->BuildTableVerified(std::set({ TIndexInfo::SPEC_COL_DELETE_FLAG })); - AFL_VERIFY(table); - auto col = table->GetColumnByName(TIndexInfo::SPEC_COL_DELETE_FLAG); - AFL_VERIFY(col); - AFL_VERIFY(col->type()->id() == arrow::Type::BOOL); - for (auto&& c : col->chunks()) { - auto bCol = static_pointer_cast(c); - for (ui32 i = 0; i < bCol->length(); ++i) { - filterDeleted.Add(!bCol->GetView(i)); + if (pInfo.HasInsertWriteId()) { + AFL_VERIFY(pInfo.GetMeta().GetDeletionsCount() == pInfo.GetRecordsCount()); + filterDeleted = NArrow::TColumnFilter::BuildDenyFilter(); + } else { + auto table = batch->BuildTableVerified(std::set({ TIndexInfo::SPEC_COL_DELETE_FLAG })); + AFL_VERIFY(table); + auto col = table->GetColumnByName(TIndexInfo::SPEC_COL_DELETE_FLAG); + AFL_VERIFY(col); + AFL_VERIFY(col->type()->id() == arrow::Type::BOOL); + for (auto&& c : col->chunks()) { + auto bCol = static_pointer_cast(c); + for (ui32 i = 0; i < bCol->length(); ++i) { + filterDeleted.Add(!bCol->GetView(i)); + } } } NArrow::TColumnFilter filterCorrection = NArrow::TColumnFilter::BuildDenyFilter(); @@ -102,6 +107,9 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks( } for (auto&& i : SwitchedPortions) { stats->Merge(i.GetSerializationStat(*resultSchema)); + if (i.GetMeta().GetDeletionsCount()) { + dataColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); + } if (dataColumnIds.size() != resultSchema->GetColumnsCount()) { for (auto id : i.GetColumnIds()) { if (resultSchema->HasColumnId(id)) { @@ -116,6 +124,7 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks( } dataColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::WRITE_ID); } + dataColumnIds.insert(IIndexInfo::GetSnapshotColumnIds().begin(), IIndexInfo::GetSnapshotColumnIds().end()); resultFiltered = std::make_shared(resultSchema, dataColumnIds); { auto seqDataColumnIds = dataColumnIds; diff --git a/ydb/core/tx/columnshard/engines/changes/ttl.cpp b/ydb/core/tx/columnshard/engines/changes/ttl.cpp index fc74dbea0454..9774130b561f 100644 --- a/ydb/core/tx/columnshard/engines/changes/ttl.cpp +++ b/ydb/core/tx/columnshard/engines/changes/ttl.cpp @@ -19,7 +19,6 @@ void TTTLColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { auto& engine = self.MutableIndexAs(); auto& index = engine.GetVersionedIndex(); for (const auto& p : PortionsToEvict) { - Y_ABORT_UNLESS(!p.GetPortionInfo().Empty()); p.GetPortionInfo().FillBlobRangesByStorage(blobRanges, index); } for (auto&& i : blobRanges) { diff --git a/ydb/core/tx/columnshard/engines/changes/ttl.h b/ydb/core/tx/columnshard/engines/changes/ttl.h index b75795e16fe4..eaeffc9230a9 100644 --- a/ydb/core/tx/columnshard/engines/changes/ttl.h +++ b/ydb/core/tx/columnshard/engines/changes/ttl.h @@ -95,8 +95,7 @@ class TTTLColumnEngineChanges: public TChangesWithAppend { return PortionsToEvict.size(); } void AddPortionToEvict(const TPortionInfo& info, TPortionEvictionFeatures&& features) { - Y_ABORT_UNLESS(!info.Empty()); - Y_ABORT_UNLESS(!info.HasRemoveSnapshot()); + AFL_VERIFY(!info.HasRemoveSnapshot()); PortionsToEvict.emplace_back(info, std::move(features)); } diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp index 24d44eb34587..b4de9dda9889 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp @@ -12,7 +12,6 @@ void TChangesWithAppend::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, THashSet usedPortionIds; auto schemaPtr = context.EngineLogs.GetVersionedIndex().GetLastSchema(); for (auto& [_, portionInfo] : PortionsToRemove) { - Y_ABORT_UNLESS(!portionInfo.Empty()); Y_ABORT_UNLESS(portionInfo.HasRemoveSnapshot()); AFL_VERIFY(usedPortionIds.emplace(portionInfo.GetPortionId()).second)("portion_info", portionInfo.DebugString(true)); portionInfo.SaveToDatabase(context.DBWrapper, schemaPtr->GetIndexInfo().GetPKFirstColumnId(), false); diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h index 2c616c06e32d..cef8b3442f2d 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.h +++ b/ydb/core/tx/columnshard/engines/column_engine.h @@ -279,7 +279,8 @@ class IColumnEngine { return DoRegisterTable(pathId); } virtual bool IsOverloadedByMetadata(const ui64 limit) const = 0; - virtual std::shared_ptr Select(ui64 pathId, TSnapshot snapshot, const TPKRangesFilter& pkRangesFilter) const = 0; + virtual std::shared_ptr Select( + ui64 pathId, TSnapshot snapshot, const TPKRangesFilter& pkRangesFilter, const bool withUncommitted) const = 0; virtual std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept = 0; virtual std::shared_ptr StartCompaction(const std::shared_ptr& dataLocksManager) noexcept = 0; virtual std::shared_ptr StartCleanupPortions(const TSnapshot& snapshot, const THashSet& pathsToDrop, const std::shared_ptr& dataLocksManager) noexcept = 0; diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index 95f9a41aa050..07d066b00288 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -489,7 +489,6 @@ void TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, const } bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool updateStats) { - Y_ABORT_UNLESS(!portionInfo.Empty()); const ui64 portion = portionInfo.GetPortion(); auto& spg = MutableGranuleVerified(portionInfo.GetPathId()); auto p = spg.GetPortionOptional(portion); @@ -506,19 +505,31 @@ bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool up } } -std::shared_ptr TColumnEngineForLogs::Select(ui64 pathId, TSnapshot snapshot, - const TPKRangesFilter& pkRangesFilter) const { +std::shared_ptr TColumnEngineForLogs::Select( + ui64 pathId, TSnapshot snapshot, const TPKRangesFilter& pkRangesFilter, const bool withUncommitted) const { auto out = std::make_shared(); auto spg = GranulesStorage->GetGranuleOptional(pathId); if (!spg) { return out; } + if (withUncommitted) { + for (const auto& [_, portionInfo] : spg->GetInsertedPortions()) { + AFL_VERIFY(portionInfo->HasInsertWriteId()); + AFL_VERIFY(!portionInfo->HasCommitSnapshot()); + const bool skipPortion = !pkRangesFilter.IsPortionInUsage(*portionInfo); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", skipPortion ? "portion_skipped" : "portion_selected")("pathId", pathId)( + "portion", portionInfo->DebugString()); + if (skipPortion) { + continue; + } + out->PortionsOrderedPK.emplace_back(portionInfo); + } + } for (const auto& [_, portionInfo] : spg->GetPortions()) { - if (!portionInfo->IsVisible(snapshot)) { + if (!portionInfo->IsVisible(snapshot, !withUncommitted)) { continue; } - Y_ABORT_UNLESS(portionInfo->Produced()); const bool skipPortion = !pkRangesFilter.IsPortionInUsage(*portionInfo); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", skipPortion ? "portion_skipped" : "portion_selected")("pathId", pathId)( "portion", portionInfo->DebugString()); diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.h b/ydb/core/tx/columnshard/engines/column_engine_logs.h index 7b515c26f40c..29d88384078c 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.h +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.h @@ -1,18 +1,18 @@ #pragma once -#include "defs.h" #include "column_engine.h" -#include -#include -#include -#include +#include "defs.h" #include "changes/actualization/controller/controller.h" - #include "scheme/tier_info.h" #include "storage/granule.h" #include "storage/storage.h" +#include +#include +#include +#include + namespace NKikimr::NArrow { struct TSortDescription; } @@ -37,7 +37,7 @@ struct TReadMetadata; /// - Columns: granule -> blobs /// /// @note One instance per tablet. -class TColumnEngineForLogs : public IColumnEngine { +class TColumnEngineForLogs: public IColumnEngine { friend class TCompactColumnEngineChanges; friend class TTTLColumnEngineChanges; friend class TChangesWithAppend; @@ -81,10 +81,13 @@ class TColumnEngineForLogs : public IColumnEngine { ADD, }; - TColumnEngineForLogs(ui64 tabletId, const std::shared_ptr& storagesManager, const TSnapshot& snapshot, const NKikimrSchemeOp::TColumnTableSchema& schema); - TColumnEngineForLogs(ui64 tabletId, const std::shared_ptr& storagesManager, const TSnapshot& snapshot, TIndexInfo&& schema); + TColumnEngineForLogs(ui64 tabletId, const std::shared_ptr& storagesManager, const TSnapshot& snapshot, + const NKikimrSchemeOp::TColumnTableSchema& schema); + TColumnEngineForLogs( + ui64 tabletId, const std::shared_ptr& storagesManager, const TSnapshot& snapshot, TIndexInfo&& schema); - virtual void OnTieringModified(const std::shared_ptr& manager, const NColumnShard::TTtl& ttl, const std::optional pathId) override; + virtual void OnTieringModified( + const std::shared_ptr& manager, const NColumnShard::TTtl& ttl, const std::optional pathId) override; virtual std::shared_ptr CopyVersionedIndexPtr() const override { return std::make_shared(VersionedIndex); @@ -111,20 +114,24 @@ class TColumnEngineForLogs : public IColumnEngine { std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept override; std::shared_ptr StartCompaction(const std::shared_ptr& dataLocksManager) noexcept override; - std::shared_ptr StartCleanupPortions(const TSnapshot& snapshot, const THashSet& pathsToDrop, const std::shared_ptr& dataLocksManager) noexcept override; + std::shared_ptr StartCleanupPortions(const TSnapshot& snapshot, const THashSet& pathsToDrop, + const std::shared_ptr& dataLocksManager) noexcept override; std::shared_ptr StartCleanupTables(const THashSet& pathsToDrop) noexcept override; - std::vector> StartTtl(const THashMap& pathEviction, const std::shared_ptr& locksManager, const ui64 memoryUsageLimit) noexcept override; + std::vector> StartTtl(const THashMap& pathEviction, + const std::shared_ptr& locksManager, const ui64 memoryUsageLimit) noexcept override; void ReturnToIndexes(const THashMap>& portions) const { return GranulesStorage->ReturnToIndexes(portions); } virtual bool ApplyChangesOnTxCreate(std::shared_ptr indexChanges, const TSnapshot& snapshot) noexcept override; - virtual bool ApplyChangesOnExecute(IDbWrapper& db, std::shared_ptr indexChanges, const TSnapshot& snapshot) noexcept override; + virtual bool ApplyChangesOnExecute( + IDbWrapper& db, std::shared_ptr indexChanges, const TSnapshot& snapshot) noexcept override; void RegisterSchemaVersion(const TSnapshot& snapshot, TIndexInfo&& info) override; void RegisterSchemaVersion(const TSnapshot& snapshot, const NKikimrSchemeOp::TColumnTableSchema& schema) override; - std::shared_ptr Select(ui64 pathId, TSnapshot snapshot, const TPKRangesFilter& pkRangesFilter) const override; + std::shared_ptr Select( + ui64 pathId, TSnapshot snapshot, const TPKRangesFilter& pkRangesFilter, const bool withUncommitted) const override; bool IsPortionExists(const ui64 pathId, const ui64 portionId) const { return !!GranulesStorage->GetPortionOptional(pathId, portionId); @@ -137,7 +144,6 @@ class TColumnEngineForLogs : public IColumnEngine { return GranulesStorage->EraseTable(pathId); } - virtual bool HasDataInPathId(const ui64 pathId) const override { auto g = GetGranuleOptional(pathId); return g && g->GetPortions().size(); @@ -179,6 +185,7 @@ class TColumnEngineForLogs : public IColumnEngine { void AddShardingInfo(const TGranuleShardingInfo& shardingInfo) { VersionedIndex.AddShardingInfo(shardingInfo); } + void UpsertPortion(const TPortionInfo& portionInfo, const TPortionInfo* exInfo = nullptr); private: TVersionedIndex VersionedIndex; @@ -196,10 +203,11 @@ class TColumnEngineForLogs : public IColumnEngine { bool LoadShardingInfo(IDbWrapper& db); bool LoadCounters(IDbWrapper& db); - void UpsertPortion(const TPortionInfo& portionInfo, const TPortionInfo* exInfo = nullptr); bool ErasePortion(const TPortionInfo& portionInfo, bool updateStats = true); - void UpdatePortionStats(const TPortionInfo& portionInfo, EStatsUpdateType updateType = EStatsUpdateType::DEFAULT, const TPortionInfo* exPortionInfo = nullptr); - void UpdatePortionStats(TColumnEngineStats& engineStats, const TPortionInfo& portionInfo, EStatsUpdateType updateType, const TPortionInfo* exPortionInfo = nullptr) const; + void UpdatePortionStats( + const TPortionInfo& portionInfo, EStatsUpdateType updateType = EStatsUpdateType::DEFAULT, const TPortionInfo* exPortionInfo = nullptr); + void UpdatePortionStats(TColumnEngineStats& engineStats, const TPortionInfo& portionInfo, EStatsUpdateType updateType, + const TPortionInfo* exPortionInfo = nullptr) const; }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.cpp b/ydb/core/tx/columnshard/engines/db_wrapper.cpp index b5c8e5e4ea58..2f3687563202 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.cpp +++ b/ydb/core/tx/columnshard/engines/db_wrapper.cpp @@ -67,13 +67,22 @@ void TDbWrapper::WritePortion(const NOlap::TPortionInfo& portion) { NIceDb::TNiceDb db(Database); auto metaProto = portion.GetMeta().SerializeToProto(); using IndexPortions = NColumnShard::Schema::IndexPortions; - auto removeSnapshot = portion.GetRemoveSnapshotOptional(); - db.Table().Key(portion.GetPathId(), portion.GetPortion()).Update( - NIceDb::TUpdate(portion.GetSchemaVersionVerified()), - NIceDb::TUpdate(portion.GetShardingVersionDef(0)), - NIceDb::TUpdate(removeSnapshot ? removeSnapshot->GetPlanStep() : 0), - NIceDb::TUpdate(removeSnapshot ? removeSnapshot->GetTxId() : 0), - NIceDb::TUpdate(metaProto.SerializeAsString())); + const auto removeSnapshot = portion.GetRemoveSnapshotOptional(); + const auto commitSnapshot = portion.GetCommitSnapshotOptional(); + const auto insertWriteId = portion.GetInsertWriteIdOptional(); + const auto minSnapshotDeprecated = portion.GetMinSnapshotDeprecated(); + db.Table() + .Key(portion.GetPathId(), portion.GetPortion()) + .Update(NIceDb::TUpdate(portion.GetSchemaVersionVerified()), + NIceDb::TUpdate(portion.GetShardingVersionDef(0)), + NIceDb::TUpdate(commitSnapshot ? commitSnapshot->GetPlanStep() : 0), + NIceDb::TUpdate(commitSnapshot ? commitSnapshot->GetTxId() : 0), + NIceDb::TUpdate((ui64)insertWriteId.value_or(TInsertWriteId(0))), + NIceDb::TUpdate(removeSnapshot ? removeSnapshot->GetPlanStep() : 0), + NIceDb::TUpdate(removeSnapshot ? removeSnapshot->GetTxId() : 0), + NIceDb::TUpdate(minSnapshotDeprecated.GetPlanStep()), + NIceDb::TUpdate(minSnapshotDeprecated.GetTxId()), + NIceDb::TUpdate(metaProto.SerializeAsString())); } void TDbWrapper::ErasePortion(const NOlap::TPortionInfo& portion) { @@ -130,6 +139,21 @@ bool TDbWrapper::LoadPortions(const std::function()); } portion.SetRemoveSnapshot(rowset.GetValue(), rowset.GetValue()); + if (rowset.GetValue()) { + portion.SetMinSnapshotDeprecated( + TSnapshot(rowset.GetValue(), rowset.GetValue())); + } + + if (rowset.GetValueOrDefault(0)) { + portion.SetInsertWriteId((TInsertWriteId)rowset.GetValue()); + } + if (rowset.GetValueOrDefault(0)) { + AFL_VERIFY(rowset.GetValueOrDefault(0)); + portion.SetCommitSnapshot( + TSnapshot(rowset.GetValue(), rowset.GetValue())); + } else { + AFL_VERIFY(!rowset.GetValueOrDefault(0)); + } NKikimrTxColumnShard::TIndexPortionMeta metaProto; const TString metadata = rowset.template GetValue(); diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.cpp b/ydb/core/tx/columnshard/engines/portions/constructor.cpp index a9ddacd149a2..7f76fc10749c 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/portions/constructor.cpp @@ -24,6 +24,16 @@ TPortionInfo TPortionInfoConstructor::Build(const bool needChunksNormalization) } result.SchemaVersion = SchemaVersion; result.ShardingVersion = ShardingVersion; + result.CommitSnapshot = CommitSnapshot; + result.InsertWriteId = InsertWriteId; + AFL_VERIFY(!CommitSnapshot || !!InsertWriteId); + + if (result.GetMeta().GetProduced() == NPortion::EProduced::INSERTED) { +// AFL_VERIFY(!!InsertWriteId); + } else { + AFL_VERIFY(!CommitSnapshot); + AFL_VERIFY(!InsertWriteId); + } if (needChunksNormalization) { ReorderChunks(); diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.h b/ydb/core/tx/columnshard/engines/portions/constructor.h index 94b285255c72..4fc28abf9abe 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.h +++ b/ydb/core/tx/columnshard/engines/portions/constructor.h @@ -25,6 +25,9 @@ class TPortionInfoConstructor { std::optional SchemaVersion; std::optional ShardingVersion; + std::optional CommitSnapshot; + std::optional InsertWriteId; + std::vector Indexes; YDB_ACCESSOR_DEF(std::vector, Records); std::vector BlobIds; @@ -49,7 +52,20 @@ class TPortionInfoConstructor { std::vector BlobIdxs; bool NeedBlobIdxsSort = false; + TPortionInfoConstructor(const TPortionInfoConstructor&) = default; + TPortionInfoConstructor& operator=(const TPortionInfoConstructor&) = default; + public: + TPortionInfoConstructor(TPortionInfoConstructor&&) noexcept = default; + TPortionInfoConstructor& operator=(TPortionInfoConstructor&&) noexcept = default; + + class TTestCopier { + public: + static TPortionInfoConstructor Copy(const TPortionInfoConstructor& source) { + return source; + } + }; + void SetPortionId(const ui64 value) { AFL_VERIFY(value); PortionId = value; @@ -75,13 +91,21 @@ class TPortionInfoConstructor { return MetaConstructor; } + TInsertWriteId GetInsertWriteIdVerified() const { + AFL_VERIFY(InsertWriteId); + return *InsertWriteId; + } + TPortionInfoConstructor(const TPortionInfo& portion, const bool withBlobs, const bool withMetadata) : PathId(portion.GetPathId()) , PortionId(portion.GetPortionId()) , MinSnapshotDeprecated(portion.GetMinSnapshotDeprecated()) , RemoveSnapshot(portion.GetRemoveSnapshotOptional()) , SchemaVersion(portion.GetSchemaVersionOptional()) - , ShardingVersion(portion.GetShardingVersionOptional()) { + , ShardingVersion(portion.GetShardingVersionOptional()) + , CommitSnapshot(portion.GetCommitSnapshotOptional()) + , InsertWriteId(portion.GetInsertWriteIdOptional()) + { if (withMetadata) { MetaConstructor = TPortionMetaConstructor(portion.Meta); } @@ -178,6 +202,19 @@ class TPortionInfoConstructor { std::shared_ptr GetSchema(const TVersionedIndex& index) const; + void SetCommitSnapshot(const TSnapshot& snap) { + AFL_VERIFY(!!InsertWriteId); + AFL_VERIFY(!CommitSnapshot); + AFL_VERIFY(snap.Valid()); + CommitSnapshot = snap; + } + + void SetInsertWriteId(const TInsertWriteId value) { + AFL_VERIFY(!InsertWriteId); + AFL_VERIFY((ui64)value); + InsertWriteId = value; + } + void SetMinSnapshotDeprecated(const TSnapshot& snap) { Y_ABORT_UNLESS(snap.Valid()); MinSnapshotDeprecated = snap; @@ -337,6 +374,9 @@ class TPortionInfoConstructor { } TPortionInfo Build(const bool needChunksNormalization); + std::shared_ptr BuildPtr(const bool needChunksNormalization) { + return std::make_shared(Build(needChunksNormalization)); + } }; class TPortionConstructors { diff --git a/ydb/core/tx/columnshard/engines/portions/meta.h b/ydb/core/tx/columnshard/engines/portions/meta.h index ad57ef1325c3..c29dc431d5d3 100644 --- a/ydb/core/tx/columnshard/engines/portions/meta.h +++ b/ydb/core/tx/columnshard/engines/portions/meta.h @@ -17,23 +17,25 @@ struct TPortionMeta { YDB_READONLY_DEF(TString, TierName); YDB_READONLY(ui32, DeletionsCount, 0); friend class TPortionMetaConstructor; + friend class TPortionInfo; TPortionMeta(NArrow::TFirstLastSpecialKeys& pk, const TSnapshot& min, const TSnapshot& max) : ReplaceKeyEdges(pk) - , IndexKeyStart(pk.GetFirst()) - , IndexKeyEnd(pk.GetLast()) , RecordSnapshotMin(min) , RecordSnapshotMax(max) + , IndexKeyStart(pk.GetFirst()) + , IndexKeyEnd(pk.GetLast()) { AFL_VERIFY(IndexKeyStart <= IndexKeyEnd)("start", IndexKeyStart.DebugString())("end", IndexKeyEnd.DebugString()); } + TSnapshot RecordSnapshotMin; + TSnapshot RecordSnapshotMax; + public: using EProduced = NPortion::EProduced; NArrow::TReplaceKey IndexKeyStart; NArrow::TReplaceKey IndexKeyEnd; - TSnapshot RecordSnapshotMin; - TSnapshot RecordSnapshotMax; EProduced Produced = EProduced::UNSPECIFIED; std::optional GetTierNameOptional() const; diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index 6652bf9c4c5f..855faf00d936 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -355,13 +355,26 @@ THashMap TPortionInfo::DecodeBlobAddress } const TString& TPortionInfo::GetColumnStorageId(const ui32 columnId, const TIndexInfo& indexInfo) const { + if (HasInsertWriteId()) { + return { NBlobOperations::TGlobal::DefaultStorageId }; + } return indexInfo.GetColumnStorageId(columnId, GetMeta().GetTierName()); } const TString& TPortionInfo::GetEntityStorageId(const ui32 columnId, const TIndexInfo& indexInfo) const { + if (HasInsertWriteId()) { + return { NBlobOperations::TGlobal::DefaultStorageId }; + } return indexInfo.GetEntityStorageId(columnId, GetMeta().GetTierName()); } +const TString& TPortionInfo::GetIndexStorageId(const ui32 indexId, const TIndexInfo& indexInfo) const { + if (HasInsertWriteId()) { + return { NBlobOperations::TGlobal::DefaultStorageId }; + } + return indexInfo.GetIndexStorageId(indexId); +} + ISnapshotSchema::TPtr TPortionInfo::GetSchema(const TVersionedIndex& index) const { AFL_VERIFY(SchemaVersion); if (SchemaVersion) { @@ -378,7 +391,7 @@ void TPortionInfo::FillBlobRangesByStorage(THashMap namespace { template TPortionInfo::TPreparedBatchData PrepareForAssembleImpl(const TPortionInfo& portion, const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, - THashMap& blobsData) { + THashMap& blobsData, const std::optional& defaultSnapshot) { std::vector columns; columns.reserve(resultSchema.GetColumnIds().size()); const ui32 rowsCount = portion.GetRecordsCount(); for (auto&& i : resultSchema.GetColumnIds()) { columns.emplace_back(rowsCount, dataSchema.GetColumnLoaderOptional(i), resultSchema.GetColumnLoaderVerified(i)); + if (portion.HasInsertWriteId()) { + if (portion.HasCommitSnapshot()) { + if (i == (ui32)IIndexInfo::ESpecialColumn::PLAN_STEP) { + columns.back().AddBlobInfo(0, portion.GetRecordsCount(), + TPortionInfo::TAssembleBlobInfo(portion.GetRecordsCount(), + std::make_shared(portion.GetCommitSnapshotVerified().GetPlanStep()), false)); + } + if (i == (ui32)IIndexInfo::ESpecialColumn::TX_ID) { + columns.back().AddBlobInfo(0, portion.GetRecordsCount(), + TPortionInfo::TAssembleBlobInfo(portion.GetRecordsCount(), + std::make_shared(portion.GetCommitSnapshotVerified().GetPlanStep()), false)); + } + } else { + if (i == (ui32)IIndexInfo::ESpecialColumn::PLAN_STEP) { + columns.back().AddBlobInfo(0, portion.GetRecordsCount(), + TPortionInfo::TAssembleBlobInfo(portion.GetRecordsCount(), std::make_shared(defaultSnapshot ? defaultSnapshot->GetPlanStep() : 0))); + } + if (i == (ui32)IIndexInfo::ESpecialColumn::TX_ID) { + columns.back().AddBlobInfo(0, portion.GetRecordsCount(), + TPortionInfo::TAssembleBlobInfo(portion.GetRecordsCount(), + std::make_shared(defaultSnapshot ? defaultSnapshot->GetTxId() : 0))); + } + } + if (i == (ui32)IIndexInfo::ESpecialColumn::WRITE_ID) { + columns.back().AddBlobInfo(0, portion.GetRecordsCount(), + TPortionInfo::TAssembleBlobInfo( + portion.GetRecordsCount(), std::make_shared((ui64)portion.GetInsertWriteIdVerified()), false)); + } + if (i == (ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG) { + columns.back().AddBlobInfo(0, portion.GetRecordsCount(), + TPortionInfo::TAssembleBlobInfo( + portion.GetRecordsCount(), std::make_shared((bool)portion.GetMeta().GetDeletionsCount()), true)); + } + } } { int skipColumnId = -1; @@ -660,13 +707,14 @@ ISnapshotSchema::TPtr TPortionInfo::TSchemaCursor::GetSchema(const TPortionInfoC return CurrentSchema; } -TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble( - const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const { - return PrepareForAssembleImpl(*this, dataSchema, resultSchema, blobsData); +TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, + THashMap& blobsData, const std::optional& defaultSnapshot) const { + return PrepareForAssembleImpl(*this, dataSchema, resultSchema, blobsData, defaultSnapshot); } -TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const { - return PrepareForAssembleImpl(*this, dataSchema, resultSchema, blobsData); +TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, + THashMap& blobsData, const std::optional& defaultSnapshot) const { + return PrepareForAssembleImpl(*this, dataSchema, resultSchema, blobsData, defaultSnapshot); } bool TPortionInfo::NeedShardingFilter(const TGranuleShardingInfo& shardingInfo) const { @@ -676,6 +724,16 @@ bool TPortionInfo::NeedShardingFilter(const TGranuleShardingInfo& shardingInfo) return true; } +NKikimr::NOlap::NSplitter::TEntityGroups TPortionInfo::GetEntityGroupsByStorageId( + const TString& specialTier, const IStoragesManager& storages, const TIndexInfo& indexInfo) const { + if (HasInsertWriteId()) { + NSplitter::TEntityGroups groups(storages.GetDefaultOperator()->GetBlobSplitSettings(), IStoragesManager::DefaultStorageId); + return groups; + } else { + return indexInfo.GetEntityGroupsByStorageId(specialTier, storages); + } +} + std::shared_ptr TPortionInfo::TPreparedColumn::AssembleAccessor() const { Y_ABORT_UNLESS(!Blobs.empty()); @@ -721,8 +779,13 @@ NArrow::NAccessor::TDeserializeChunkedArray::TChunk TPortionInfo::TAssembleBlobI std::shared_ptr TPortionInfo::TAssembleBlobInfo::BuildRecordBatch(const TColumnLoader& loader) const { if (DefaultRowsCount) { Y_ABORT_UNLESS(!Data); - return std::make_shared( - NArrow::TThreadSimpleArraysCache::Get(loader.GetField()->type(), DefaultValue, DefaultRowsCount)); + if (NeedCache) { + return std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(loader.GetField()->type(), DefaultValue, DefaultRowsCount)); + } else { + return std::make_shared( + NArrow::TStatusValidator::GetValid(arrow::MakeArrayFromScalar(*DefaultValue, DefaultRowsCount))); + } } else { AFL_VERIFY(ExpectedRowsCount); return loader.ApplyVerified(Data, *ExpectedRowsCount); diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 1591765f83e6..09cc515fef90 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -63,10 +63,14 @@ class TPortionInfo { private: friend class TPortionInfoConstructor; TPortionInfo(TPortionMeta&& meta) - : Meta(std::move(meta)) - { - + : Meta(std::move(meta)) { + if (HasInsertWriteId()) { + AFL_VERIFY(!Meta.GetTierName()); + } } + std::optional CommitSnapshot; + std::optional InsertWriteId; + ui64 PathId = 0; ui64 Portion = 0; // Id of independent (overlayed by PK) portion of data in pathId TSnapshot MinSnapshotDeprecated = TSnapshot::Zero(); // {PlanStep, TxId} is min snapshot for {Granule, Portion} @@ -127,10 +131,40 @@ class TPortionInfo { bool NeedShardingFilter(const TGranuleShardingInfo& shardingInfo) const; + NSplitter::TEntityGroups GetEntityGroupsByStorageId( + const TString& specialTier, const IStoragesManager& storages, const TIndexInfo& indexInfo) const; + const std::optional& GetShardingVersionOptional() const { return ShardingVersion; } + bool HasCommitSnapshot() const { + return !!CommitSnapshot; + } + bool HasInsertWriteId() const { + return !!InsertWriteId; + } + const TSnapshot& GetCommitSnapshotVerified() const { + AFL_VERIFY(!!CommitSnapshot); + return *CommitSnapshot; + } + TInsertWriteId GetInsertWriteIdVerified() const { + AFL_VERIFY(InsertWriteId); + return *InsertWriteId; + } + const std::optional& GetCommitSnapshotOptional() const { + return CommitSnapshot; + } + const std::optional& GetInsertWriteIdOptional() const { + return InsertWriteId; + } + void SetCommitSnapshot(const TSnapshot& value) { + AFL_VERIFY(!!InsertWriteId); + AFL_VERIFY(!CommitSnapshot); + AFL_VERIFY(value.Valid()); + CommitSnapshot = value; + } + bool CrossSSWith(const TPortionInfo& p) const { return std::min(RecordSnapshotMax(), p.RecordSnapshotMax()) <= std::max(RecordSnapshotMin(), p.RecordSnapshotMin()); } @@ -218,6 +252,7 @@ class TPortionInfo { THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TIndexInfo& indexInfo) const; const TString& GetColumnStorageId(const ui32 columnId, const TIndexInfo& indexInfo) const; + const TString& GetIndexStorageId(const ui32 columnId, const TIndexInfo& indexInfo) const; const TString& GetEntityStorageId(const ui32 entityId, const TIndexInfo& indexInfo) const; ui64 GetTxVolume() const; // fake-correct method for determ volume on rewrite this portion in transaction progress @@ -367,14 +402,7 @@ class TPortionInfo { return false; } - bool Empty() const { return Records.empty(); } - bool Produced() const { return Meta.GetProduced() != TPortionMeta::EProduced::UNSPECIFIED; } - bool Valid() const { return ValidSnapshotInfo() && !Empty() && Produced(); } bool ValidSnapshotInfo() const { return MinSnapshotDeprecated.Valid() && PathId && Portion; } - bool IsInserted() const { return Meta.GetProduced() == TPortionMeta::EProduced::INSERTED; } - bool IsEvicted() const { return Meta.GetProduced() == TPortionMeta::EProduced::EVICTED; } - bool CanHaveDups() const { return !Produced(); /* || IsInserted(); */ } - bool CanIntersectOthers() const { return !Valid() || IsInserted() || IsEvicted(); } size_t NumChunks() const { return Records.size(); } TString DebugString(const bool withDetails = false) const; @@ -446,12 +474,9 @@ class TPortionInfo { return SchemaVersion; } - bool IsVisible(const TSnapshot& snapshot) const { - if (Empty()) { - return false; - } - - const bool visible = (Meta.RecordSnapshotMin <= snapshot) && (!RemoveSnapshot.Valid() || snapshot < RemoveSnapshot); + bool IsVisible(const TSnapshot& snapshot, const bool checkCommitSnapshot = true) const { + const bool visible = (Meta.RecordSnapshotMin <= snapshot) && (!RemoveSnapshot.Valid() || snapshot < RemoveSnapshot) && + (!checkCommitSnapshot || !CommitSnapshot || *CommitSnapshot <= snapshot); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "IsVisible")("analyze_portion", DebugString())("visible", visible)("snapshot", snapshot.DebugString()); return visible; @@ -467,12 +492,30 @@ class TPortionInfo { return Meta.IndexKeyEnd; } - const TSnapshot& RecordSnapshotMin() const { - return Meta.RecordSnapshotMin; + const TSnapshot& RecordSnapshotMin(const std::optional& snapshotDefault = std::nullopt) const { + if (InsertWriteId) { + if (CommitSnapshot) { + return *CommitSnapshot; + } else { + AFL_VERIFY(snapshotDefault); + return *snapshotDefault; + } + } else { + return Meta.RecordSnapshotMin; + } } - const TSnapshot& RecordSnapshotMax() const { - return Meta.RecordSnapshotMax; + const TSnapshot& RecordSnapshotMax(const std::optional& snapshotDefault = std::nullopt) const { + if (InsertWriteId) { + if (CommitSnapshot) { + return *CommitSnapshot; + } else { + AFL_VERIFY(snapshotDefault); + return *snapshotDefault; + } + } else { + return Meta.RecordSnapshotMax; + } } @@ -569,6 +612,7 @@ class TPortionInfo { ui32 DefaultRowsCount = 0; std::shared_ptr DefaultValue; TString Data; + const bool NeedCache = true; public: ui32 GetExpectedRowsCountVerified() const { AFL_VERIFY(ExpectedRowsCount); @@ -583,9 +627,10 @@ class TPortionInfo { } } - TAssembleBlobInfo(const ui32 rowsCount, const std::shared_ptr& defValue) + TAssembleBlobInfo(const ui32 rowsCount, const std::shared_ptr& defValue, const bool needCache = true) : DefaultRowsCount(rowsCount) , DefaultValue(defValue) + , NeedCache(needCache) { AFL_VERIFY(DefaultRowsCount); } @@ -745,8 +790,10 @@ class TPortionInfo { } }; - TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const; - TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const; + TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, + THashMap& blobsData, const std::optional& defaultSnapshot = std::nullopt) const; + TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, + THashMap& blobsData, const std::optional& defaultSnapshot = std::nullopt) const; friend IOutputStream& operator << (IOutputStream& out, const TPortionInfo& info) { out << info.DebugString(); diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index ae85ef59842c..67a070d11bb8 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -119,7 +119,7 @@ std::optional TReadPortionInfoWithBlobs::SyncP to->GetIndexInfo().AppendIndex(entityChunksNew, i.first, storages, secondaryData).Validate(); } - const NSplitter::TEntityGroups groups = to->GetIndexInfo().GetEntityGroupsByStorageId(targetTier, *storages); + const NSplitter::TEntityGroups groups = source.PortionInfo.GetEntityGroupsByStorageId(targetTier, *storages, to->GetIndexInfo()); auto schemaTo = std::make_shared(to, std::make_shared()); TGeneralSerializedSlice slice(secondaryData.GetExternalData(), schemaTo, counters); diff --git a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp index 3f580531b749..b500b529d35e 100644 --- a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp @@ -17,7 +17,9 @@ void TWritePortionInfoWithBlobsConstructor::TBlobInfo::AddChunk(TWritePortionInf chunk->AddIntoPortionBeforeBlob(bRange, owner.GetPortionConstructor()); } -void TWritePortionInfoWithBlobsResult::TBlobInfo::RegisterBlobId(TWritePortionInfoWithBlobsResult& owner, const TUnifiedBlobId& blobId) const { +void TWritePortionInfoWithBlobsResult::TBlobInfo::RegisterBlobId(TWritePortionInfoWithBlobsResult& owner, const TUnifiedBlobId& blobId) { + AFL_VERIFY(!BlobId); + BlobId = blobId; const TBlobRangeLink16::TLinkId idx = owner.GetPortionConstructor().RegisterBlobId(blobId); for (auto&& i : Chunks) { owner.GetPortionConstructor().RegisterBlobIdx(i, idx); diff --git a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h index dde424fd63b8..85dcdd7b72dc 100644 --- a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h +++ b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h @@ -113,10 +113,16 @@ class TWritePortionInfoWithBlobsResult { private: using TBlobChunks = std::vector; YDB_READONLY_DEF(TBlobChunks, Chunks); + std::optional BlobId; const TString ResultBlob; YDB_READONLY_DEF(std::shared_ptr, Operator); public: + const TUnifiedBlobId& GetBlobIdVerified() const { + AFL_VERIFY(BlobId); + return *BlobId; + } + ui64 GetSize() const { return ResultBlob.size(); } @@ -133,13 +139,17 @@ class TWritePortionInfoWithBlobsResult { return ResultBlob; } - void RegisterBlobId(TWritePortionInfoWithBlobsResult& owner, const TUnifiedBlobId& blobId) const; + void RegisterBlobId(TWritePortionInfoWithBlobsResult& owner, const TUnifiedBlobId& blobId); }; private: std::optional PortionConstructor; std::optional PortionResult; YDB_READONLY_DEF(std::vector, Blobs); public: + std::vector& MutableBlobs() { + return Blobs; + } + TWritePortionInfoWithBlobsResult(TWritePortionInfoWithBlobsConstructor&& constructor) : PortionConstructor(std::move(constructor.PortionConstructor)) { for (auto&& i : constructor.Blobs) { @@ -171,6 +181,11 @@ class TWritePortionInfoWithBlobsResult { AFL_VERIFY(!PortionResult); return *PortionConstructor; } + + std::shared_ptr DetachPortionConstructor() { + AFL_VERIFY(PortionConstructor); + return std::make_shared(std::move(*PortionConstructor)); + } }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp index 88416a4d214f..803aa7030543 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp @@ -9,12 +9,12 @@ TDataStorageAccessor::TDataStorageAccessor(const std::unique_ptr& , Index(index) { } -std::shared_ptr TDataStorageAccessor::Select(const TReadDescription& readDescription) const { +std::shared_ptr TDataStorageAccessor::Select(const TReadDescription& readDescription, const bool withUncommitted) const { if (readDescription.ReadNothing) { return std::make_shared(); } AFL_VERIFY(readDescription.PKRangesFilter); - return Index->Select(readDescription.PathId, readDescription.GetSnapshot(), *readDescription.PKRangesFilter); + return Index->Select(readDescription.PathId, readDescription.GetSnapshot(), *readDescription.PKRangesFilter, withUncommitted); } ISnapshotSchema::TPtr TReadMetadataBase::GetLoadSchemaVerified(const TPortionInfo& portion) const { diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h index d87fcf02868e..ac608b5ad670 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h @@ -24,7 +24,7 @@ class TDataStorageAccessor { public: TDataStorageAccessor(const std::unique_ptr& insertTable, const std::unique_ptr& index); - std::shared_ptr Select(const TReadDescription& readDescription) const; + std::shared_ptr Select(const TReadDescription& readDescription, const bool withUncommitted) const; std::vector GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema, const std::optional lockId, const TSnapshot& reqSnapshot) const; }; diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp index 5d93272c07b7..5623d1e4dc3a 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp @@ -44,7 +44,19 @@ TConclusionStatus TReadMetadata::Init( } } - SelectInfo = dataAccessor.Select(readDescription); + SelectInfo = dataAccessor.Select(readDescription, !!LockId); + if (LockId) { + for (auto&& i : SelectInfo->PortionsOrderedPK) { + if (i->HasInsertWriteId() && !i->HasCommitSnapshot()) { + if (owner->HasLongTxWrites(i->GetInsertWriteIdVerified())) { + } else { + auto op = owner->GetOperationsManager().GetOperationByInsertWriteIdVerified(i->GetInsertWriteIdVerified()); + AddWriteIdToCheck(i->GetInsertWriteIdVerified(), op->GetLockId()); + } + } + } + } + StatsMode = readDescription.StatsMode; return TConclusionStatus::Success(); } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp index 0efd8bfbb9d2..ed6db34c5e59 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp @@ -157,11 +157,13 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Fetching); if (needSnapshots) { acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); - result->AddStep(std::make_shared()); } if (!exclusiveSource) { acc.AddAssembleStep(*result, *MergeColumns, "LAST_PK", false); } + if (needSnapshots) { + result->AddStep(std::make_shared()); + } if (needFilterDeletion) { acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); result->AddStep(std::make_shared()); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index 5a7338652966..dcb8935f3227 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -66,8 +66,7 @@ void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlob for (auto&& c : columnChunks) { AFL_VERIFY(!itFinished); if (!itFilter.IsBatchForSkip(c->GetMeta().GetNumRows())) { - auto reading = - blobsAction.GetReading(Schema->GetIndexInfo().GetColumnStorageId(c->GetColumnId(), Portion->GetMeta().GetTierName())); + auto reading = blobsAction.GetReading(Portion->GetColumnStorageId(c->GetColumnId(), Schema->GetIndexInfo())); reading->SetIsBackgroundProcess(false); reading->AddRange(Portion->RestoreBlobRange(c->BlobRange)); ++fetchedChunks; @@ -124,7 +123,7 @@ bool TPortionDataSource::DoStartFetchingIndexes( } indexIds.emplace(i.GetIndexId()); if (auto bRange = i.GetBlobRangeOptional()) { - auto readAction = action.GetReading(Schema->GetIndexInfo().GetIndexStorageId(i.GetIndexId())); + auto readAction = action.GetReading(Portion->GetIndexStorageId(i.GetIndexId(), Schema->GetIndexInfo())); readAction->SetIsBackgroundProcess(false); readAction->AddRange(Portion->RestoreBlobRange(*bRange)); } @@ -189,8 +188,20 @@ void TPortionDataSource::DoApplyIndex(const NIndexes::TIndexCheckerContainer& in void TPortionDataSource::DoAssembleColumns(const std::shared_ptr& columns) { auto blobSchema = GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion); - MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs()) - .AssembleToGeneralContainer(SequentialEntityIds)); + + std::optional ss; + if (Portion->HasInsertWriteId()) { + if (Portion->HasCommitSnapshot()) { + ss = Portion->GetCommitSnapshotVerified(); + } else if (GetContext()->GetReadMetadata()->IsMyUncommitted(Portion->GetInsertWriteIdVerified())) { + ss = GetContext()->GetReadMetadata()->GetRequestSnapshot(); + } + } + + auto batch = Portion->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs(), ss) + .AssembleToGeneralContainer(SequentialEntityIds); + + MutableStageData().AddBatch(batch); } bool TCommittedDataSource::DoStartFetchingColumns( diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h index 69b39059bff5..4e6a645ebb93 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h @@ -281,6 +281,8 @@ class TPortionDataSource: public IDataSource { NJson::TJsonValue result = NJson::JSON_MAP; result.InsertValue("type", "portion"); result.InsertValue("info", Portion->DebugString()); + result.InsertValue("commit", Portion->GetCommitSnapshotOptional().value_or(TSnapshot::Zero()).DebugString()); + result.InsertValue("insert", (ui64)Portion->GetInsertWriteIdOptional().value_or(TInsertWriteId(0))); return result; } @@ -314,7 +316,13 @@ class TPortionDataSource: public IDataSource { public: virtual bool DoAddTxConflict() override { - GetContext()->GetReadMetadata()->SetBrokenWithCommitted(); + if (Portion->HasCommitSnapshot() || !Portion->HasInsertWriteId()) { + GetContext()->GetReadMetadata()->SetBrokenWithCommitted(); + return true; + } else if (!GetContext()->GetReadMetadata()->IsMyUncommitted(Portion->GetInsertWriteIdVerified())) { + GetContext()->GetReadMetadata()->SetConflictedWriteId(Portion->GetInsertWriteIdVerified()); + return true; + } return false; } @@ -379,7 +387,8 @@ class TPortionDataSource: public IDataSource { } TPortionDataSource(const ui32 sourceIdx, const std::shared_ptr& portion, const std::shared_ptr& context) - : TBase(sourceIdx, context, portion->IndexKeyStart(), portion->IndexKeyEnd(), portion->RecordSnapshotMin(), portion->RecordSnapshotMax(), + : TBase(sourceIdx, context, portion->IndexKeyStart(), portion->IndexKeyEnd(), portion->RecordSnapshotMin(TSnapshot::Zero()), + portion->RecordSnapshotMax(TSnapshot::Zero()), portion->GetRecordsCount(), portion->GetShardingVersionOptional(), portion->GetMeta().GetDeletionsCount()) , Portion(portion) , Schema(GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion)) { diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp index da3cc74f8e92..5d3132b99e06 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp @@ -51,8 +51,9 @@ void TStatsIterator::AppendStats(const std::vectorGetColumnId()); if (it == entityStorages.end()) { - it = entityStorages.emplace(r->GetColumnId(), - portionSchema->GetIndexInfo().GetEntityStorageId(r->GetColumnId(), portion.GetMeta().GetTierName())).first; + it = + entityStorages.emplace(r->GetColumnId(), portion.GetEntityStorageId(r->GetColumnId(), portionSchema->GetIndexInfo())) + .first; } lastTierName = it->second.GetView(); } @@ -106,7 +107,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[11], bData->size()); } NArrow::Append(*builders[12], activity); - const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetIndexId(), portion.GetMeta().GetTierName()); + const auto tierName = portion.GetEntityStorageId(r->GetIndexId(), portionSchema->GetIndexInfo()); std::string strTierName(tierName.data(), tierName.size()); NArrow::Append(*builders[13], strTierName); NArrow::Append(*builders[14], ConstantEntityIsIndexView); diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.cpp index c1c31cb5487f..46418998ea4a 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.cpp @@ -24,9 +24,9 @@ void IIndexInfo::AddDeleteFlagsColumn(NArrow::TGeneralContainer& batch, const bo void IIndexInfo::AddSnapshotColumns(NArrow::TGeneralContainer& batch, const TSnapshot& snapshot, const ui64 insertWriteId) { const i64 numRows = batch.num_rows(); - batch.AddField(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), NArrow::MakeUI64Array(snapshot.GetPlanStep(), numRows)).Validate(); - batch.AddField(arrow::field(SPEC_COL_TX_ID, arrow::uint64()), NArrow::MakeUI64Array(snapshot.GetTxId(), numRows)).Validate(); - batch.AddField(arrow::field(SPEC_COL_WRITE_ID, arrow::uint64()), NArrow::MakeUI64Array(insertWriteId, numRows)).Validate(); + batch.AddField(PlanStepField, NArrow::MakeUI64Array(snapshot.GetPlanStep(), numRows)).Validate(); + batch.AddField(TxIdField, NArrow::MakeUI64Array(snapshot.GetTxId(), numRows)).Validate(); + batch.AddField(WriteIdField, NArrow::MakeUI64Array(insertWriteId, numRows)).Validate(); } void IIndexInfo::NormalizeDeletionColumn(NArrow::TGeneralContainer& batch) { diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h index 19fbe2267a7e..954363204f08 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h @@ -32,6 +32,10 @@ class IIndexInfo { static constexpr const char* SPEC_COL_WRITE_ID = NOlap::NPortion::TSpecialColumns::SPEC_COL_WRITE_ID; static constexpr const char* SPEC_COL_DELETE_FLAG = NOlap::NPortion::TSpecialColumns::SPEC_COL_DELETE_FLAG; + static const inline std::shared_ptr PlanStepField = arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()); + static const inline std::shared_ptr TxIdField = arrow::field(SPEC_COL_TX_ID, arrow::uint64()); + static const inline std::shared_ptr WriteIdField = arrow::field(SPEC_COL_WRITE_ID, arrow::uint64()); + static const char* GetDeleteFlagColumnName() { return SPEC_COL_DELETE_FLAG; } diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h index ea7cd7784d82..48910722fc71 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h @@ -24,7 +24,7 @@ class Schema; } // namespace arrow namespace NKikimr::NOlap { - +class TPortionInfo; namespace NIndexes::NMax { class TIndexMeta; } @@ -106,6 +106,7 @@ class TSchemaObjectsCache { struct TIndexInfo: public IIndexInfo { private: using TColumns = THashMap; + friend class TPortionInfo; class TNameInfo { private: @@ -163,7 +164,32 @@ struct TIndexInfo: public IIndexInfo { std::shared_ptr BuildDefaultColumnFeatures( const ui32 columnId, const THashMap& columns, const std::shared_ptr& operators) const; + const TString& GetIndexStorageId(const ui32 indexId) const { + auto it = Indexes.find(indexId); + AFL_VERIFY(it != Indexes.end()); + return it->second->GetStorageId(); + } + + const TString& GetColumnStorageId(const ui32 columnId, const TString& specialTier) const { + if (specialTier && specialTier != IStoragesManager::DefaultStorageId) { + return specialTier; + } else { + return GetColumnFeaturesVerified(columnId).GetOperator()->GetStorageId(); + } + } + + const TString& GetEntityStorageId(const ui32 entityId, const TString& specialTier) const { + auto it = Indexes.find(entityId); + if (it != Indexes.end()) { + return it->second->GetStorageId(); + } + return GetColumnStorageId(entityId, specialTier); + } + + void SetAllKeys(const std::shared_ptr& operators, const THashMap& columns); + public: + NSplitter::TEntityGroups GetEntityGroupsByStorageId(const TString& specialTier, const IStoragesManager& storages) const; std::optional GetPKColumnIndexByIndexVerified(const ui32 columnIndex) const { AFL_VERIFY(columnIndex < ColumnFeatures.size()); return ColumnFeatures[columnIndex]->GetPKColumnIndex(); @@ -200,8 +226,6 @@ struct TIndexInfo: public IIndexInfo { } } - NSplitter::TEntityGroups GetEntityGroupsByStorageId(const TString& specialTier, const IStoragesManager& storages) const; - bool GetSchemeNeedActualization() const { return SchemeNeedActualization; } @@ -222,28 +246,6 @@ struct TIndexInfo: public IIndexInfo { return Indexes; } - const TString& GetIndexStorageId(const ui32 indexId) const { - auto it = Indexes.find(indexId); - AFL_VERIFY(it != Indexes.end()); - return it->second->GetStorageId(); - } - - const TString& GetColumnStorageId(const ui32 columnId, const TString& specialTier) const { - if (specialTier && specialTier != IStoragesManager::DefaultStorageId) { - return specialTier; - } else { - return GetColumnFeaturesVerified(columnId).GetOperator()->GetStorageId(); - } - } - - const TString& GetEntityStorageId(const ui32 entityId, const TString& specialTier) const { - auto it = Indexes.find(entityId); - if (it != Indexes.end()) { - return it->second->GetStorageId(); - } - return GetColumnStorageId(entityId, specialTier); - } - TString DebugString() const { TStringBuilder sb; sb << "(" @@ -256,9 +258,6 @@ struct TIndexInfo: public IIndexInfo { return sb; } - void SetAllKeys(const std::shared_ptr& operators, const THashMap& columns); - -public: static TIndexInfo BuildDefault() { TIndexInfo result("dummy"); return result; diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp index 90cac5b44501..9b833f8ed1cb 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp @@ -1,8 +1,15 @@ #include "abstract_scheme.h" -#include +#include #include +#include +#include +#include +#include +#include + #include + #include namespace NKikimr::NOlap { @@ -24,7 +31,6 @@ std::set ISnapshotSchema::GetPkColumnsIds() const { result.emplace(GetColumnId(field->name())); } return result; - } TConclusion> ISnapshotSchema::NormalizeBatch( @@ -108,8 +114,7 @@ TConclusion> ISnapshotSchema::PrepareForModi if (GetIndexInfo().GetColumnExternalDefaultValueByIndexVerified(targetIdx)) { return TConclusionStatus::Success(); } else { - return TConclusionStatus::Fail( - "empty field for non-default column: '" + dstSchema->field(targetIdx)->name() + "'"); + return TConclusionStatus::Fail("empty field for non-default column: '" + dstSchema->field(targetIdx)->name() + "'"); } } case NEvWrite::EModificationType::Delete: @@ -120,8 +125,8 @@ TConclusion> ISnapshotSchema::PrepareForModi const auto nameResolver = [&](const std::string& fieldName) -> i32 { return GetIndexInfo().GetColumnIndexOptional(fieldName).value_or(-1); }; - auto batchConclusion = - NArrow::TColumnOperator().SkipIfAbsent().ErrorOnDifferentFieldTypes().AdaptIncomingToDestinationExt(incomingBatch, dstSchema, pred, nameResolver); + auto batchConclusion = NArrow::TColumnOperator().SkipIfAbsent().ErrorOnDifferentFieldTypes().AdaptIncomingToDestinationExt( + incomingBatch, dstSchema, pred, nameResolver); if (batchConclusion.IsFail()) { return batchConclusion; } @@ -277,4 +282,61 @@ std::set ISnapshotSchema::GetColumnsWithDifferentDefaults( return result; } +TConclusion ISnapshotSchema::PrepareForWrite(const ISnapshotSchema::TPtr& selfPtr, const ui64 pathId, + const std::shared_ptr& incomingBatch, const NEvWrite::EModificationType mType, + const std::shared_ptr& storagesManager, const std::shared_ptr& splitterCounters) const { + AFL_VERIFY(incomingBatch->num_rows()); + auto itIncoming = incomingBatch->schema()->fields().begin(); + auto itIncomingEnd = incomingBatch->schema()->fields().end(); + auto itIndex = GetIndexInfo().ArrowSchema()->fields().begin(); + auto itIndexEnd = GetIndexInfo().ArrowSchema()->fields().end(); + THashMap>> chunks; + + std::shared_ptr schemaDetails( + new TDefaultSchemaDetails(selfPtr, std::make_shared())); + + while (itIncoming != itIncomingEnd && itIndex != itIndexEnd) { + if ((*itIncoming)->name() == (*itIndex)->name()) { + const ui32 incomingIndex = itIncoming - incomingBatch->schema()->fields().begin(); + const ui32 columnIndex = itIndex - GetIndexInfo().ArrowSchema()->fields().begin(); + const ui32 columnId = GetIndexInfo().GetColumnIdByIndexVerified(columnIndex); + auto loader = GetIndexInfo().GetColumnLoaderVerified(columnId); + auto saver = GetIndexInfo().GetColumnSaver(columnId); + saver.AddSerializerWithBorder(100, NArrow::NSerialization::TNativeSerializer::GetUncompressed()); + saver.AddSerializerWithBorder(100000000, NArrow::NSerialization::TNativeSerializer::GetFast()); + const auto& columnFeatures = GetIndexInfo().GetColumnFeaturesVerified(columnId); + auto accessor = std::make_shared(incomingBatch->column(incomingIndex)); + std::shared_ptr rbToWrite = + loader->GetAccessorConstructor()->Construct(accessor, loader->BuildAccessorContext(accessor->GetRecordsCount())); + std::shared_ptr arrToWrite = + loader->GetAccessorConstructor()->Construct(rbToWrite, loader->BuildAccessorContext(accessor->GetRecordsCount())).DetachResult(); + + std::vector> columnChunks = { std::make_shared( + saver.Apply(rbToWrite), arrToWrite, TChunkAddress(columnId, 0), columnFeatures) }; + AFL_VERIFY(chunks.emplace(columnId, std::move(columnChunks)).second); + ++itIncoming; + ++itIndex; + } else { + ++itIndex; + } + } + AFL_VERIFY(itIncoming == itIncomingEnd); + + TGeneralSerializedSlice slice(chunks, schemaDetails, splitterCounters); + std::vector blobs; + if (!slice.GroupBlobs(blobs, NSplitter::TEntityGroups(NSplitter::TSplitSettings(), NBlobOperations::TGlobal::DefaultStorageId))) { + return TConclusionStatus::Fail("cannot split data for appropriate blobs size"); + } + auto constructor = + TWritePortionInfoWithBlobsConstructor::BuildByBlobs(std::move(blobs), {}, pathId, GetVersion(), GetSnapshot(), storagesManager); + + NArrow::TFirstLastSpecialKeys primaryKeys(slice.GetFirstLastPKBatch(GetIndexInfo().GetReplaceKey())); + NArrow::TMinMaxSpecialKeys snapshotKeys(NArrow::MakeEmptyBatch(TIndexInfo::ArrowSchemaSnapshot(), 1), TIndexInfo::ArrowSchemaSnapshot()); + const ui32 deletionsCount = (mType == NEvWrite::EModificationType::Delete) ? incomingBatch->num_rows() : 0; + constructor.GetPortionConstructor().AddMetadata(*this, deletionsCount, primaryKeys, snapshotKeys); + constructor.GetPortionConstructor().MutableMeta().SetTierName(IStoragesManager::DefaultStorageId); + constructor.GetPortionConstructor().MutableMeta().UpdateRecordsMeta(NPortion::EProduced::INSERTED); + return TWritePortionInfoWithBlobsResult(std::move(constructor)); } + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h index d6f1c9436570..e57a1a4f22f8 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h @@ -1,11 +1,11 @@ #pragma once #include - -#include #include -#include - +#include +#include #include +#include +#include #include @@ -13,14 +13,16 @@ namespace NKikimr::NOlap { struct TIndexInfo; class TSaverContext; +class TWritePortionInfoWithBlobsResult; class ISnapshotSchema { protected: virtual TString DoDebugString() const = 0; + public: using TPtr = std::shared_ptr; - virtual ~ISnapshotSchema() {} + virtual ~ISnapshotSchema() = default; virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; std::shared_ptr GetColumnLoaderOptional(const std::string& columnName) const; @@ -71,14 +73,19 @@ class ISnapshotSchema { std::set GetPkColumnsIds() const; - static std::set GetColumnsWithDifferentDefaults(const THashMap& schemas, const ISnapshotSchema::TPtr& targetSchema); + static std::set GetColumnsWithDifferentDefaults( + const THashMap& schemas, const ISnapshotSchema::TPtr& targetSchema); - [[nodiscard]] TConclusion> NormalizeBatch( - const ISnapshotSchema& dataSchema, const std::shared_ptr& batch, const std::set& restoreColumnIds) const; + [[nodiscard]] TConclusion> NormalizeBatch(const ISnapshotSchema& dataSchema, + const std::shared_ptr& batch, const std::set& restoreColumnIds) const; [[nodiscard]] TConclusion> PrepareForModification( const std::shared_ptr& incomingBatch, const NEvWrite::EModificationType mType) const; + [[nodiscard]] TConclusion PrepareForWrite(const ISnapshotSchema::TPtr& selfPtr, const ui64 pathId, + const std::shared_ptr& incomingBatch, const NEvWrite::EModificationType mType, + const std::shared_ptr& storagesManager, const std::shared_ptr& splitterCounters) const; + void AdaptBatchToSchema(NArrow::TGeneralContainer& batch, const ISnapshotSchema::TPtr& targetSchema) const; std::set GetColumnIdsToDelete(const ISnapshotSchema::TPtr& targetSchema) const; std::vector ConvertColumnIdsToIndexes(const std::set& idxs) const; }; -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp index f664eb6afb6a..b8481aab3e88 100644 --- a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp @@ -123,7 +123,7 @@ void TTieringActualizer::DoExtractTasks(TTieringProcessContext& tasksContext, co for (auto&& p : portions) { auto portion = externalContext.GetPortionVerified(p); if (!address.WriteIs(NBlobOperations::TGlobal::DefaultStorageId) && !address.WriteIs(NTiering::NCommon::DeleteTierName)) { - if (!portion->HasRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized)) { + if (!portion->HasRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized) || portion->HasInsertWriteId()) { Counters.SkipEvictionForCompaction->Add(1); continue; } diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp index ebb2b9acde63..668ac539e781 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp @@ -1,11 +1,13 @@ #include "granule.h" #include "storage.h" + +#include +#include +#include #include #include #include -#include -#include namespace NKikimr::NOlap { @@ -14,7 +16,6 @@ void TGranuleMeta::UpsertPortion(const TPortionInfo& info) { auto it = Portions.find(info.GetPortion()); AFL_VERIFY(info.GetPathId() == GetPathId())("event", "incompatible_granule")("portion", info.DebugString())("path_id", GetPathId()); - AFL_VERIFY(info.Valid())("event", "invalid_portion")("portion", info.DebugString()); AFL_VERIFY(info.ValidSnapshotInfo())("event", "incorrect_portion_snapshots")("portion", info.DebugString()); for (auto& record : info.Records) { AFL_VERIFY(record.Valid())("event", "incorrect_record")("record", record.DebugString())("portion", info.DebugString()); @@ -45,7 +46,8 @@ bool TGranuleMeta::ErasePortion(const ui64 portion) { return true; } -void TGranuleMeta::OnAfterChangePortion(const std::shared_ptr portionAfter, NStorageOptimizer::IOptimizerPlanner::TModificationGuard* modificationGuard) { +void TGranuleMeta::OnAfterChangePortion( + const std::shared_ptr portionAfter, NStorageOptimizer::IOptimizerPlanner::TModificationGuard* modificationGuard) { if (portionAfter) { PortionInfoGuard.OnNewPortion(portionAfter); if (!portionAfter->HasRemoveSnapshot()) { @@ -130,25 +132,33 @@ const NKikimr::NOlap::TGranuleAdditiveSummary& TGranuleMeta::GetAdditiveSummary( return *AdditiveSummaryCache; } -TGranuleMeta::TGranuleMeta(const ui64 pathId, const TGranulesStorage& owner, const NColumnShard::TGranuleDataCounters& counters, const TVersionedIndex& versionedIndex) +TGranuleMeta::TGranuleMeta( + const ui64 pathId, const TGranulesStorage& owner, const NColumnShard::TGranuleDataCounters& counters, const TVersionedIndex& versionedIndex) : PathId(pathId) , Counters(counters) , PortionInfoGuard(owner.GetCounters().BuildPortionBlobsGuard()) , Stats(owner.GetStats()) , StoragesManager(owner.GetStoragesManager()) , PortionsIndex(*this, Counters.GetPortionsIndexCounters()) { - NStorageOptimizer::IOptimizerPlannerConstructor::TBuildContext context(PathId, owner.GetStoragesManager(), versionedIndex.GetLastSchema()->GetIndexInfo().GetPrimaryKey()); + NStorageOptimizer::IOptimizerPlannerConstructor::TBuildContext context( + PathId, owner.GetStoragesManager(), versionedIndex.GetLastSchema()->GetIndexInfo().GetPrimaryKey()); OptimizerPlanner = versionedIndex.GetLastSchema()->GetIndexInfo().GetCompactionPlannerConstructor()->BuildPlanner(context).DetachResult(); AFL_VERIFY(!!OptimizerPlanner); ActualizationIndex = std::make_shared(PathId, versionedIndex); - } std::shared_ptr TGranuleMeta::UpsertPortionOnLoad(TPortionInfo&& portion) { - auto portionId = portion.GetPortionId(); - auto emplaceInfo = Portions.emplace(portionId, std::make_shared(std::move(portion))); - AFL_VERIFY(emplaceInfo.second); - return emplaceInfo.first->second; + if (portion.HasInsertWriteId() && !portion.HasCommitSnapshot()) { + const TInsertWriteId insertWriteId = portion.GetInsertWriteIdVerified(); + auto emplaceInfo = InsertedPortions.emplace(insertWriteId, std::make_shared(std::move(portion))); + AFL_VERIFY(emplaceInfo.second); + return emplaceInfo.first->second; + } else { + auto portionId = portion.GetPortionId(); + auto emplaceInfo = Portions.emplace(portionId, std::make_shared(std::move(portion))); + AFL_VERIFY(emplaceInfo.second); + return emplaceInfo.first->second; + } } void TGranuleMeta::BuildActualizationTasks(NActualizer::TTieringProcessContext& context, const TDuration actualizationLag) const { @@ -160,7 +170,8 @@ void TGranuleMeta::BuildActualizationTasks(NActualizer::TTieringProcessContext& NextActualizations = context.GetActualInstant() + actualizationLag; } -void TGranuleMeta::ResetOptimizer(const std::shared_ptr& constructor, std::shared_ptr& storages, const std::shared_ptr& pkSchema) { +void TGranuleMeta::ResetOptimizer(const std::shared_ptr& constructor, + std::shared_ptr& storages, const std::shared_ptr& pkSchema) { if (constructor->ApplyToCurrentObject(OptimizerPlanner)) { return; } @@ -177,4 +188,24 @@ void TGranuleMeta::ResetOptimizer(const std::shared_ptrModifyPortions(portions, {}); } -} // namespace NKikimr::NOlap +void TGranuleMeta::CommitPortionOnComplete(const TInsertWriteId insertWriteId, IColumnEngine& engine) { + auto it = InsertedPortions.find(insertWriteId); + AFL_VERIFY(it != InsertedPortions.end()); + (static_cast(engine)).UpsertPortion(*it->second); + InsertedPortions.erase(it); +} + +void TGranuleMeta::CommitImmediateOnExecute( + NTabletFlatExecutor::TTransactionContext& txc, const TSnapshot& snapshot, const std::shared_ptr& portion) const { + AFL_VERIFY(portion); + AFL_VERIFY(!InsertedPortions.contains(portion->GetInsertWriteIdVerified())); + portion->SetCommitSnapshot(snapshot); + TDbWrapper wrapper(txc.DB, nullptr); + portion->SaveToDatabase(wrapper, 0, false); +} + +void TGranuleMeta::CommitImmediateOnComplete(const std::shared_ptr portion, IColumnEngine& engine) { + (static_cast(engine)).UpsertPortion(*portion); +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.h b/ydb/core/tx/columnshard/engines/storage/granule/granule.h index d79ef50e1883..deca161e69f1 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.h @@ -107,14 +107,14 @@ class TGranuleAdditiveSummary { } void AddPortion(const TPortionInfo& info) { - if (info.IsInserted()) { + if (info.GetMeta().GetProduced() == NPortion::EProduced::INSERTED) { Owner.Inserted.AddPortion(info); } else { Owner.Compacted.AddPortion(info); } } void RemovePortion(const TPortionInfo& info) { - if (info.IsInserted()) { + if (info.GetMeta().GetProduced() == NPortion::EProduced::INSERTED) { Owner.Inserted.RemovePortion(info); } else { Owner.Compacted.RemovePortion(info); @@ -140,6 +140,7 @@ class TGranuleMeta: TNonCopyable { private: TMonotonic ModificationLastTime = TMonotonic::Now(); THashMap> Portions; + THashMap> InsertedPortions; mutable std::optional AdditiveSummaryCache; void RebuildHardMetrics() const; @@ -168,6 +169,45 @@ class TGranuleMeta: TNonCopyable { ActualizationIndex->RefreshTiering(tiering, context); } + void InsertPortionOnExecute( + NTabletFlatExecutor::TTransactionContext& txc, const std::shared_ptr& portion) const { + AFL_VERIFY(!InsertedPortions.contains(portion->GetInsertWriteIdVerified())); + TDbWrapper wrapper(txc.DB, nullptr); + portion->SaveToDatabase(wrapper, 0, false); + } + + void InsertPortionOnComplete(const std::shared_ptr& portion) { + AFL_VERIFY(InsertedPortions.emplace(portion->GetInsertWriteIdVerified(), portion).second); + } + + void CommitPortionOnExecute( + NTabletFlatExecutor::TTransactionContext& txc, const TInsertWriteId insertWriteId, const TSnapshot& snapshot) const { + auto it = InsertedPortions.find(insertWriteId); + AFL_VERIFY(it != InsertedPortions.end()); + it->second->SetCommitSnapshot(snapshot); + TDbWrapper wrapper(txc.DB, nullptr); + it->second->SaveToDatabase(wrapper, 0, true); + } + + void CommitPortionOnComplete(const TInsertWriteId insertWriteId, IColumnEngine& engine); + + void AbortPortionOnExecute( + NTabletFlatExecutor::TTransactionContext& txc, const TInsertWriteId insertWriteId) const { + auto it = InsertedPortions.find(insertWriteId); + AFL_VERIFY(it != InsertedPortions.end()); + TDbWrapper wrapper(txc.DB, nullptr); + it->second->RemoveFromDatabase(wrapper); + } + + void AbortPortionOnComplete(const TInsertWriteId insertWriteId) { + AFL_VERIFY(InsertedPortions.erase(insertWriteId)); + } + + void CommitImmediateOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TSnapshot& snapshot, + const std::shared_ptr& portion) const; + + void CommitImmediateOnComplete(const std::shared_ptr portion, IColumnEngine& engine); + std::vector GetOptimizerTasksDescription() const { return OptimizerPlanner->GetTasksDescription(); } @@ -285,6 +325,10 @@ class TGranuleMeta: TNonCopyable { return Portions; } + const THashMap>& GetInsertedPortions() const { + return InsertedPortions; + } + std::vector> GetPortionsVector() const { std::vector> result; for (auto&& i : Portions) { diff --git a/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp b/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp index b017464eefeb..18f0f7043ff2 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp @@ -10,7 +10,7 @@ std::shared_ptr TGranulesStorage::GetGranuleForCom std::optional priorityChecker; const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetCompactionActualizationLag(); for (auto&& i : Tables) { - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("path_id", i.first); +// NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("path_id", i.first); i.second->ActualizeOptimizer(now, actualizationLag); auto gPriority = i.second->GetCompactionPriority(); if (gPriority.IsZero() || (priorityChecker && gPriority < *priorityChecker)) { diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h index 2d580ae7cb0f..28da42f25991 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h @@ -963,11 +963,18 @@ class TPortionsBucket: public TMoveOnly { class TPortionBuckets { private: + + struct TReverseComparator { + bool operator()(const i64 l, const i64 r) const { + return r < l; + } + }; + const std::shared_ptr PrimaryKeysSchema; const std::shared_ptr StoragesManager; std::shared_ptr LeftBucket; std::map> Buckets; - std::map> BucketsByWeight; + std::map, TReverseComparator> BucketsByWeight; std::shared_ptr Counters; std::vector> GetAffectedBuckets(const NArrow::TReplaceKey& fromInclude, const NArrow::TReplaceKey& toInclude) { std::vector> result; @@ -1073,8 +1080,8 @@ class TPortionBuckets { if (BucketsByWeight.empty()) { return false; } - AFL_VERIFY(BucketsByWeight.rbegin()->second.size()); - const TPortionsBucket* bucketForOptimization = *BucketsByWeight.rbegin()->second.begin(); + AFL_VERIFY(BucketsByWeight.begin()->second.size()); + const TPortionsBucket* bucketForOptimization = *BucketsByWeight.begin()->second.begin(); return bucketForOptimization->IsLocked(dataLocksManager); } @@ -1103,7 +1110,7 @@ class TPortionBuckets { i64 GetWeight() const { AFL_VERIFY(BucketsByWeight.size()); - return BucketsByWeight.rbegin()->first; + return BucketsByWeight.begin()->first; } void RemovePortion(const std::shared_ptr& portion) { @@ -1117,11 +1124,11 @@ class TPortionBuckets { std::shared_ptr BuildOptimizationTask(std::shared_ptr granule, const std::shared_ptr& locksManager) const { AFL_VERIFY(BucketsByWeight.size()); - if (!BucketsByWeight.rbegin()->first) { + if (!BucketsByWeight.begin()->first) { return nullptr; } - AFL_VERIFY(BucketsByWeight.rbegin()->second.size()); - const TPortionsBucket* bucketForOptimization = *BucketsByWeight.rbegin()->second.begin(); + AFL_VERIFY(BucketsByWeight.begin()->second.size()); + const TPortionsBucket* bucketForOptimization = *BucketsByWeight.begin()->second.begin(); if (bucketForOptimization == LeftBucket.get()) { if (Buckets.size()) { return bucketForOptimization->BuildOptimizationTask(granule, locksManager, &Buckets.begin()->first, PrimaryKeysSchema, StoragesManager); @@ -1190,10 +1197,6 @@ class TPortionBuckets { AFL_VERIFY(i.second->GetStartPos()); result.AddPosition(*i.second->GetStartPos(), false); } - if (Buckets.size() && Buckets.rbegin()->second->GetPortion()->GetRecordsCount() > 1) { - NArrow::NMerger::TSortableBatchPosition pos(Buckets.rbegin()->second->GetPortion()->IndexKeyEnd().ToBatch(PrimaryKeysSchema), 0, PrimaryKeysSchema->field_names(), {}, false); - result.AddPosition(std::move(pos), false); - } return result; } }; diff --git a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp index 38dc6ffa044d..6dd7bad1e4a8 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp @@ -157,7 +157,7 @@ class TTestDbWrapper : public IDbWrapper { auto& columns = Indices[0].Columns; for (auto& [pathId, portions] : columns) { for (auto& [portionId, portionLocal] : portions) { - auto copy = portionLocal; + auto copy = NOlap::TPortionInfoConstructor::TTestCopier::Copy(portionLocal); copy.MutableRecords().clear(); for (const auto& rec : portionLocal.GetRecords()) { auto itContextLoader = LoadContexts[copy.GetAddress()].find(rec.GetAddress()); @@ -278,7 +278,7 @@ TString MakeTestBlob(i64 start = 0, i64 end = 100, ui32 step = 1) { void AddIdsToBlobs(std::vector& portions, NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { for (auto& portion : portions) { THashMap blobsData; - for (auto& b : portion.GetBlobs()) { + for (auto& b : portion.MutableBlobs()) { const auto blobId = MakeUnifiedBlobId(++step, b.GetSize()); b.RegisterBlobId(portion, blobId); blobsData.emplace(blobId, b.GetResultBlob()); @@ -481,21 +481,21 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { { // select from snap before insert ui64 planStep = 1; ui64 txId = 0; - auto selectInfo = engine.Select(paths[0], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false)); + auto selectInfo = engine.Select(paths[0], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false), false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 0); } { // select from snap between insert (greater txId) ui64 planStep = 1; ui64 txId = 2; - auto selectInfo = engine.Select(paths[0], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false)); + auto selectInfo = engine.Select(paths[0], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false), false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 0); } { // select from snap after insert (greater planStep) ui64 planStep = 2; ui64 txId = 1; - auto selectInfo = engine.Select(paths[0], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false)); + auto selectInfo = engine.Select(paths[0], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false), false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 1); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK[0]->NumChunks(), columnIds.size() + TIndexInfo::GetSnapshotColumnIdsSet().size() - 1); } @@ -503,7 +503,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { { // select another pathId ui64 planStep = 2; ui64 txId = 1; - auto selectInfo = engine.Select(paths[1], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false)); + auto selectInfo = engine.Select(paths[1], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false), false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 0); } } @@ -576,7 +576,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { { // full scan ui64 txId = 1; - auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false)); + auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false), false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 20); } @@ -590,7 +590,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } NOlap::TPKRangesFilter pkFilter(false); Y_ABORT_UNLESS(pkFilter.Add(gt10k, nullptr, indexInfo.GetReplaceKey())); - auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), pkFilter); + auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), pkFilter, false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 10); } @@ -602,7 +602,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } NOlap::TPKRangesFilter pkFilter(false); Y_ABORT_UNLESS(pkFilter.Add(nullptr, lt10k, indexInfo.GetReplaceKey())); - auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), pkFilter); + auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), pkFilter, false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 9); } } @@ -758,7 +758,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { { // full scan ui64 txId = 1; - auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false)); + auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false), false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 20); } @@ -767,7 +767,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { { // full scan ui64 txId = 1; - auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false)); + auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false), false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 20); } @@ -783,7 +783,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { { // full scan ui64 txId = 1; - auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false)); + auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false), false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 10); } } @@ -798,7 +798,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { { // full scan ui64 txId = 1; - auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false)); + auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false), false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 10); } } diff --git a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp index 9836a72a60cc..be69c73aff28 100644 --- a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp @@ -20,7 +20,7 @@ TCompactedWriteController::TCompactedWriteController(const TActorId& dstActor, T auto* pInfo = changes.GetWritePortionInfo(i); Y_ABORT_UNLESS(pInfo); TWritePortionInfoWithBlobsResult& portionWithBlobs = *pInfo; - for (auto&& b : portionWithBlobs.GetBlobs()) { + for (auto&& b : portionWithBlobs.MutableBlobs()) { auto& task = AddWriteTask(TBlobWriteInfo::BuildWriteTask(b.GetResultBlob(), changes.MutableBlobsAction().GetWriting(b.GetOperator()->GetStorageId()))); b.RegisterBlobId(portionWithBlobs, task.GetBlobId()); WriteVolume += b.GetSize(); diff --git a/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp b/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp index e17acbe10690..e76affbf5549 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp +++ b/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp @@ -16,7 +16,7 @@ void TBuildBatchesTask::ReplyError(const TString& message, const NColumnShard::T TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); auto result = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message, errorClass); - TActorContext::AsActorContext().Send(ParentActorId, result.release()); + TActorContext::AsActorContext().Send(Context.GetTabletActorId(), result.release()); } TConclusionStatus TBuildBatchesTask::DoExecute(const std::shared_ptr& /*taskPtr*/) { @@ -26,10 +26,10 @@ TConclusionStatus TBuildBatchesTask::DoExecute(const std::shared_ptr& /*t "cannot extract incoming batch: " + batchConclusion.GetErrorMessage(), NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); return TConclusionStatus::Fail("cannot extract incoming batch: " + batchConclusion.GetErrorMessage()); } - WritingCounters->OnIncomingData(NArrow::GetBatchDataSize(*batchConclusion)); + Context.GetWritingCounters()->OnIncomingData(NArrow::GetBatchDataSize(*batchConclusion)); auto preparedConclusion = - ActualSchema->PrepareForModification(batchConclusion.DetachResult(), WriteData.GetWriteMeta().GetModificationType()); + Context.GetActualSchema()->PrepareForModification(batchConclusion.DetachResult(), WriteData.GetWriteMeta().GetModificationType()); if (preparedConclusion.IsFail()) { ReplyError("cannot prepare incoming batch: " + preparedConclusion.GetErrorMessage(), NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Request); @@ -39,42 +39,43 @@ TConclusionStatus TBuildBatchesTask::DoExecute(const std::shared_ptr& /*t std::shared_ptr merger; switch (WriteData.GetWriteMeta().GetModificationType()) { case NEvWrite::EModificationType::Upsert: { - const std::vector> defaultFields = ActualSchema->GetAbsentFields(batch->schema()); + const std::vector> defaultFields = Context.GetActualSchema()->GetAbsentFields(batch->schema()); if (defaultFields.empty()) { std::shared_ptr task = - std::make_shared(TabletId, ParentActorId, BufferActorId, std::move(WriteData), batch, ActualSchema); + std::make_shared(BufferActorId, std::move(WriteData), batch, Context); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); return TConclusionStatus::Success(); } else { - auto insertionConclusion = ActualSchema->CheckColumnsDefault(defaultFields); - auto conclusion = ActualSchema->BuildDefaultBatch(ActualSchema->GetIndexInfo().ArrowSchema()->fields(), 1, true); + auto insertionConclusion = Context.GetActualSchema()->CheckColumnsDefault(defaultFields); + auto conclusion = + Context.GetActualSchema()->BuildDefaultBatch(Context.GetActualSchema()->GetIndexInfo().ArrowSchema()->fields(), 1, true); AFL_VERIFY(!conclusion.IsFail())("error", conclusion.GetErrorMessage()); auto batchDefault = conclusion.DetachResult(); NArrow::NMerger::TSortableBatchPosition pos( batchDefault, 0, batchDefault->schema()->field_names(), batchDefault->schema()->field_names(), false); merger = std::make_shared( - batch, ActualSchema, insertionConclusion.IsSuccess() ? "" : insertionConclusion.GetErrorMessage(), pos); + batch, Context.GetActualSchema(), insertionConclusion.IsSuccess() ? "" : insertionConclusion.GetErrorMessage(), pos); break; } } case NEvWrite::EModificationType::Insert: { - merger = std::make_shared(batch, ActualSchema); + merger = std::make_shared(batch, Context.GetActualSchema()); break; } case NEvWrite::EModificationType::Update: { - merger = std::make_shared(batch, ActualSchema, ""); + merger = std::make_shared(batch, Context.GetActualSchema(), ""); break; } case NEvWrite::EModificationType::Replace: case NEvWrite::EModificationType::Delete: { std::shared_ptr task = - std::make_shared(TabletId, ParentActorId, BufferActorId, std::move(WriteData), batch, ActualSchema); + std::make_shared(BufferActorId, std::move(WriteData), batch, Context); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); return TConclusionStatus::Success(); } } - std::shared_ptr task = std::make_shared( - TabletId, ParentActorId, BufferActorId, std::move(WriteData), merger, ActualSchema, ActualSnapshot, batch); + std::shared_ptr task = + std::make_shared(BufferActorId, std::move(WriteData), merger, ActualSnapshot, batch, Context); NActors::TActivationContext::AsActorContext().Register(new NDataReader::TActor(task)); return TConclusionStatus::Success(); diff --git a/ydb/core/tx/columnshard/operations/batch_builder/builder.h b/ydb/core/tx/columnshard/operations/batch_builder/builder.h index 31ca0ac7ac43..33b0281e71a6 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/builder.h +++ b/ydb/core/tx/columnshard/operations/batch_builder/builder.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -11,12 +12,9 @@ namespace NKikimr::NOlap { class TBuildBatchesTask: public NConveyor::ITask { private: NEvWrite::TWriteData WriteData; - const ui64 TabletId; - const NActors::TActorId ParentActorId; const NActors::TActorId BufferActorId; - const std::shared_ptr ActualSchema; const TSnapshot ActualSnapshot; - const std::shared_ptr WritingCounters; + const TWritingContext Context; void ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); protected: @@ -27,16 +25,12 @@ class TBuildBatchesTask: public NConveyor::ITask { return "Write::ConstructBatches"; } - TBuildBatchesTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, - NEvWrite::TWriteData&& writeData, const std::shared_ptr& actualSchema, const TSnapshot& actualSnapshot, - const std::shared_ptr& writingCounters) + TBuildBatchesTask( + const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const TSnapshot& actualSnapshot, const TWritingContext& context) : WriteData(std::move(writeData)) - , TabletId(tabletId) - , ParentActorId(parentActorId) , BufferActorId(bufferActorId) - , ActualSchema(actualSchema) , ActualSnapshot(actualSnapshot) - , WritingCounters(writingCounters) { + , Context(context) { } }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp b/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp index e13c7fc74eaf..25b5784144eb 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp +++ b/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp @@ -1,6 +1,7 @@ #include "restore.h" -#include + #include +#include #include namespace NKikimr::NOlap { @@ -8,10 +9,10 @@ namespace NKikimr::NOlap { std::unique_ptr TModificationRestoreTask::DoBuildRequestInitiator() const { auto request = std::make_unique(LocalPathId, WriteData.GetWriteMeta().GetLockIdOptional()); request->ReadToSnapshot = Snapshot; - auto pkData = NArrow::TColumnOperator().VerifyIfAbsent().Extract(IncomingData, ActualSchema->GetPKColumnNames()); + auto pkData = NArrow::TColumnOperator().VerifyIfAbsent().Extract(IncomingData, Context.GetActualSchema()->GetPKColumnNames()); request->RangesFilter = TPKRangesFilter::BuildFromRecordBatchLines(pkData, false); - for (auto&& i : ActualSchema->GetIndexInfo().GetColumnIds(false)) { - request->AddColumn(i, ActualSchema->GetIndexInfo().GetColumnName(i)); + for (auto&& i : Context.GetActualSchema()->GetIndexInfo().GetColumnIds(false)) { + request->AddColumn(i, Context.GetActualSchema()->GetIndexInfo().GetColumnName(i)); } return request; } @@ -19,8 +20,8 @@ std::unique_ptr TModificationRestoreTa NKikimr::TConclusionStatus TModificationRestoreTask::DoOnDataChunk(const std::shared_ptr& data) { auto result = Merger->AddExistsDataOrdered(data); if (result.IsFail()) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "merge_data_problems") - ("write_id", WriteData.GetWriteMeta().GetWriteId())("tablet_id", TabletId)("message", result.GetErrorMessage()); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "merge_data_problems")("write_id", WriteData.GetWriteMeta().GetWriteId())( + "tablet_id", GetTabletId())("message", result.GetErrorMessage()); SendErrorMessage(result.GetErrorMessage(), NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Request); } return result; @@ -28,7 +29,7 @@ NKikimr::TConclusionStatus TModificationRestoreTask::DoOnDataChunk(const std::sh void TModificationRestoreTask::DoOnError(const TString& errorMessage) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "restore_data_problems")("write_id", WriteData.GetWriteMeta().GetWriteId())( - "tablet_id", TabletId)("message", errorMessage); + "tablet_id", GetTabletId())("message", errorMessage); SendErrorMessage(errorMessage, NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); } @@ -41,31 +42,32 @@ NKikimr::TConclusionStatus TModificationRestoreTask::DoOnFinished() { } auto batchResult = Merger->BuildResultBatch(); - std::shared_ptr task = std::make_shared( - TabletId, ParentActorId, BufferActorId, std::move(WriteData), batchResult, ActualSchema); + std::shared_ptr task = + std::make_shared(BufferActorId, std::move(WriteData), batchResult, Context); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); return TConclusionStatus::Success(); } -TModificationRestoreTask::TModificationRestoreTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& merger, const std::shared_ptr& actualSchema, const TSnapshot actualSnapshot, const std::shared_ptr& incomingData) - : TBase(tabletId, parentActorId) +TModificationRestoreTask::TModificationRestoreTask(const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, + const std::shared_ptr& merger, const TSnapshot actualSnapshot, const std::shared_ptr& incomingData, + const TWritingContext& context) + : TBase(context.GetTabletId(), context.GetTabletActorId()) , WriteData(std::move(writeData)) - , TabletId(tabletId) - , ParentActorId(parentActorId) , BufferActorId(bufferActorId) , Merger(merger) - , ActualSchema(actualSchema) , LocalPathId(WriteData.GetWriteMeta().GetTableId()) , Snapshot(actualSnapshot) - , IncomingData(incomingData) { - + , IncomingData(incomingData) + , Context(context) { } -void TModificationRestoreTask::SendErrorMessage(const TString& errorMessage, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { +void TModificationRestoreTask::SendErrorMessage( + const TString& errorMessage, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { auto writeDataPtr = std::make_shared(std::move(WriteData)); TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); - auto evResult = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), errorMessage, errorClass); - TActorContext::AsActorContext().Send(ParentActorId, evResult.release()); + auto evResult = + NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), errorMessage, errorClass); + TActorContext::AsActorContext().Send(Context.GetTabletActorId(), evResult.release()); } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/batch_builder/restore.h b/ydb/core/tx/columnshard/operations/batch_builder/restore.h index b69a856a8a58..11ab07311fa4 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/restore.h +++ b/ydb/core/tx/columnshard/operations/batch_builder/restore.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace NKikimr::NOlap { @@ -11,14 +12,12 @@ class TModificationRestoreTask: public NDataReader::IRestoreTask { private: using TBase = NDataReader::IRestoreTask; NEvWrite::TWriteData WriteData; - const ui64 TabletId; - const NActors::TActorId ParentActorId; const NActors::TActorId BufferActorId; std::shared_ptr Merger; - const std::shared_ptr ActualSchema; const ui64 LocalPathId; const TSnapshot Snapshot; std::shared_ptr IncomingData; + const TWritingContext Context; virtual std::unique_ptr DoBuildRequestInitiator() const override; virtual TConclusionStatus DoOnDataChunk(const std::shared_ptr& data) override; @@ -27,9 +26,8 @@ class TModificationRestoreTask: public NDataReader::IRestoreTask { void SendErrorMessage(const TString& errorMessage, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); public: - TModificationRestoreTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, - NEvWrite::TWriteData&& writeData, const std::shared_ptr& merger, const std::shared_ptr& actualSchema, - const TSnapshot actualSnapshot, const std::shared_ptr& incomingData); + TModificationRestoreTask(const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& merger, + const TSnapshot actualSnapshot, const std::shared_ptr& incomingData, const TWritingContext& context); }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/common/context.cpp b/ydb/core/tx/columnshard/operations/common/context.cpp new file mode 100644 index 000000000000..76af2c9fa917 --- /dev/null +++ b/ydb/core/tx/columnshard/operations/common/context.cpp @@ -0,0 +1,5 @@ +#include "context.h" + +namespace NKikimr::NOlap { + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/common/context.h b/ydb/core/tx/columnshard/operations/common/context.h new file mode 100644 index 000000000000..41c10d2eb009 --- /dev/null +++ b/ydb/core/tx/columnshard/operations/common/context.h @@ -0,0 +1,30 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap { + +class TWritingContext { +private: + YDB_READONLY(ui64, TabletId, 0); + YDB_READONLY(NActors::TActorId, TabletActorId, NActors::TActorId()); + YDB_READONLY_DEF(std::shared_ptr, ActualSchema); + YDB_READONLY_DEF(std::shared_ptr, StoragesManager); + YDB_READONLY_DEF(std::shared_ptr, SplitterCounters); + YDB_READONLY_DEF(std::shared_ptr, WritingCounters); + +public: + TWritingContext(const ui64 tabletId, const NActors::TActorId& tabletActorId, const std::shared_ptr& actualSchema, + const std::shared_ptr& operators, const std::shared_ptr& splitterCounters, + const std::shared_ptr& writingCounters) + : TabletId(tabletId) + , TabletActorId(tabletActorId) + , ActualSchema(actualSchema) + , StoragesManager(operators) + , SplitterCounters(splitterCounters) + , WritingCounters(writingCounters) { + } +}; +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/common/ya.make b/ydb/core/tx/columnshard/operations/common/ya.make new file mode 100644 index 000000000000..8416d5dce223 --- /dev/null +++ b/ydb/core/tx/columnshard/operations/common/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +SRCS( + context.cpp +) + +PEERDIR( + ydb/core/tx/conveyor/usage + ydb/core/tx/data_events + ydb/core/formats/arrow + ydb/core/tx/columnshard/engines/scheme/versions + ydb/core/tx/columnshard/engines/scheme + ydb/core/tx/columnshard/engines/writer +) + +END() diff --git a/ydb/core/tx/columnshard/operations/events.cpp b/ydb/core/tx/columnshard/operations/events.cpp new file mode 100644 index 000000000000..09c1ad1ae2cf --- /dev/null +++ b/ydb/core/tx/columnshard/operations/events.cpp @@ -0,0 +1,20 @@ +#include "events.h" + +#include +#include + +namespace NKikimr::NColumnShard { + +void TInsertedPortion::Finalize(TColumnShard* shard, NTabletFlatExecutor::TTransactionContext& txc) { + AFL_VERIFY(PortionInfoConstructor); + auto* lastPortionId = shard->MutableIndexAs().GetLastPortionPointer(); + PortionInfoConstructor->SetPortionId(++*lastPortionId); + NOlap::TDbWrapper wrapper(txc.DB, nullptr); + wrapper.WriteCounter(NOlap::TColumnEngineForLogs::LAST_PORTION, *lastPortionId); + PortionInfo = PortionInfoConstructor->BuildPtr(true); + PortionInfoConstructor = nullptr; +} + +} // namespace NKikimr::NColumnShard + +namespace NKikimr::NColumnShard::NPrivateEvents::NWrite {} diff --git a/ydb/core/tx/columnshard/operations/events.h b/ydb/core/tx/columnshard/operations/events.h new file mode 100644 index 000000000000..c555eb8f9fc2 --- /dev/null +++ b/ydb/core/tx/columnshard/operations/events.h @@ -0,0 +1,103 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TInsertedPortion { +private: + YDB_READONLY_DEF(std::shared_ptr, PortionInfoConstructor); + std::shared_ptr PortionInfo; + YDB_READONLY_DEF(std::shared_ptr, PKBatch); + +public: + const std::shared_ptr& GetPortionInfo() const { + AFL_VERIFY(PortionInfo); + return PortionInfo; + } + TInsertedPortion(NOlap::TWritePortionInfoWithBlobsResult&& portion, const std::shared_ptr& pkBatch) + : PortionInfoConstructor(portion.DetachPortionConstructor()) + , PKBatch(pkBatch) { + AFL_VERIFY(PKBatch); + } + + void Finalize(TColumnShard* shard, NTabletFlatExecutor::TTransactionContext& txc); +}; + +class TInsertedPortions { +private: + NEvWrite::TWriteMeta WriteMeta; + YDB_ACCESSOR_DEF(std::vector, Portions); + YDB_READONLY(ui64, DataSize, 0); + YDB_READONLY_DEF(std::vector, InsertWriteIds); + +public: + const NEvWrite::TWriteMeta& GetWriteMeta() const { + return WriteMeta; + } + + void AddInsertWriteId(const NOlap::TInsertWriteId id) { + InsertWriteIds.emplace_back(id); + } + + void Finalize(TColumnShard* shard, NTabletFlatExecutor::TTransactionContext& txc); + + TInsertedPortions(const NEvWrite::TWriteMeta& writeMeta, std::vector&& portions, const ui64 dataSize) + : WriteMeta(writeMeta) + , Portions(std::move(portions)) + , DataSize(dataSize) { + AFL_VERIFY(!WriteMeta.HasLongTxId()); + for (auto&& i : Portions) { + AFL_VERIFY(i.GetPKBatch()); + } + } +}; + +class TFailedWrite { +private: + NEvWrite::TWriteMeta WriteMeta; + YDB_READONLY(ui64, DataSize, 0); + +public: + const NEvWrite::TWriteMeta& GetWriteMeta() const { + return WriteMeta; + } + + TFailedWrite(const NEvWrite::TWriteMeta& writeMeta, const ui64 dataSize) + : WriteMeta(writeMeta) + , DataSize(dataSize) { + AFL_VERIFY(!WriteMeta.HasLongTxId()); + } +}; + +} // namespace NKikimr::NColumnShard + +namespace NKikimr::NColumnShard::NPrivateEvents::NWrite { + +class TEvWritePortionResult: public TEventLocal { +private: + YDB_READONLY_DEF(NKikimrProto::EReplyStatus, WriteStatus); + YDB_READONLY_DEF(std::shared_ptr, WriteAction); + std::vector InsertedPacks; + std::vector Fails; + +public: + std::vector&& DetachInsertedPacks() { + return std::move(InsertedPacks); + } + std::vector&& DetachFails() { + return std::move(Fails); + } + + TEvWritePortionResult(const NKikimrProto::EReplyStatus writeStatus, const std::shared_ptr& writeAction, + std::vector&& portions, std::vector&& fails) + : WriteStatus(writeStatus) + , WriteAction(writeAction) + , InsertedPacks(portions) + , Fails(fails) { + } +}; + +} // namespace NKikimr::NColumnShard::NPrivateEvents::NWrite diff --git a/ydb/core/tx/columnshard/operations/manager.cpp b/ydb/core/tx/columnshard/operations/manager.cpp index 2fdb5d0e181b..6e4d1783745e 100644 --- a/ydb/core/tx/columnshard/operations/manager.cpp +++ b/ydb/core/tx/columnshard/operations/manager.cpp @@ -28,8 +28,8 @@ bool TOperationsManager::Load(NTabletFlatExecutor::TTransactionContext& txc) { NKikimrTxColumnShard::TInternalOperationData metaProto; Y_ABORT_UNLESS(metaProto.ParseFromString(metadata)); - auto operation = std::make_shared( - writeId, lockId, cookie, status, TInstant::Seconds(createdAtSec), granuleShardingVersionId, NEvWrite::EModificationType::Upsert); + auto operation = std::make_shared(0, writeId, lockId, cookie, status, TInstant::Seconds(createdAtSec), + granuleShardingVersionId, NEvWrite::EModificationType::Upsert, false); operation->FromProto(metaProto); LinkInsertWriteIdToOperationWriteId(operation->GetInsertWriteIds(), operation->GetWriteId()); AFL_VERIFY(operation->GetStatus() != EOperationStatus::Draft); @@ -201,11 +201,11 @@ void TOperationsManager::LinkTransactionOnExecute(const ui64 lockId, const ui64 void TOperationsManager::LinkTransactionOnComplete(const ui64 /*lockId*/, const ui64 /*txId*/) { } -TWriteOperation::TPtr TOperationsManager::RegisterOperation( - const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) { +TWriteOperation::TPtr TOperationsManager::RegisterOperation(const ui64 pathId, const ui64 lockId, const ui64 cookie, + const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType, const bool portionsWriting) { auto writeId = BuildNextOperationWriteId(); - auto operation = std::make_shared( - writeId, lockId, cookie, EOperationStatus::Draft, AppData()->TimeProvider->Now(), granuleShardingVersionId, mType); + auto operation = std::make_shared(pathId, writeId, lockId, cookie, EOperationStatus::Draft, AppData()->TimeProvider->Now(), + granuleShardingVersionId, mType, portionsWriting); Y_ABORT_UNLESS(Operations.emplace(operation->GetWriteId(), operation).second); GetLockVerified(operation->GetLockId()).MutableWriteOperations().emplace_back(operation); GetLockVerified(operation->GetLockId()).AddWrite(); diff --git a/ydb/core/tx/columnshard/operations/manager.h b/ydb/core/tx/columnshard/operations/manager.h index 7912df4f968c..7d93bbf4950e 100644 --- a/ydb/core/tx/columnshard/operations/manager.h +++ b/ydb/core/tx/columnshard/operations/manager.h @@ -182,8 +182,8 @@ class TOperationsManager { return *result; } - TWriteOperation::TPtr RegisterOperation( - const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); + TWriteOperation::TPtr RegisterOperation(const ui64 pathId, const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, + const NEvWrite::EModificationType mType, const bool portionsWriting); bool RegisterLock(const ui64 lockId, const ui64 generationId) { if (LockFeatures.contains(lockId)) { return false; diff --git a/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp b/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp index d79eb2708cec..bd4666679c57 100644 --- a/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp +++ b/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp @@ -1,9 +1,12 @@ #include "builder.h" + +#include #include -#include -#include -#include #include +#include +#include +#include +#include namespace NKikimr::NOlap { @@ -15,13 +18,15 @@ std::optional> TBuildSlicesTask:: context.SetFieldsForSpecialKeys(WriteData.GetPrimaryKeySchema()); auto splitResult = NArrow::SplitByBlobSize(OriginalBatch, context); if (splitResult.IsFail()) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", TStringBuilder() << "cannot split batch in according to limits: " + splitResult.GetErrorMessage()); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)( + "event", TStringBuilder() << "cannot split batch in according to limits: " + splitResult.GetErrorMessage()); return {}; } auto result = splitResult.DetachResult(); if (result.size() > 1) { for (auto&& i : result) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "strange_blobs_splitting")("blob", i.DebugString())("original_size", WriteData.GetSize()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "strange_blobs_splitting")("blob", i.DebugString())( + "original_size", WriteData.GetSize()); } } return result; @@ -30,61 +35,164 @@ std::optional> TBuildSlicesTask:: void TBuildSlicesTask::ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { auto writeDataPtr = std::make_shared(std::move(WriteData)); TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); - auto result = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error( - NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message, errorClass); - TActorContext::AsActorContext().Send(ParentActorId, result.release()); + auto result = + NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message, errorClass); + TActorContext::AsActorContext().Send(Context.GetTabletActorId(), result.release()); } +class TPortionWriteController: public NColumnShard::IWriteController, + public NColumnShard::TMonitoringObjectsCounter { +public: + class TInsertPortion { + private: + TWritePortionInfoWithBlobsResult Portion; + std::shared_ptr PKBatch; + + public: + TWritePortionInfoWithBlobsResult& MutablePortion() { + return Portion; + } + const TWritePortionInfoWithBlobsResult& GetPortion() const { + return Portion; + } + TWritePortionInfoWithBlobsResult&& ExtractPortion() { + return std::move(Portion); + } + const std::shared_ptr& GetPKBatch() const { + return PKBatch; + } + TInsertPortion(TWritePortionInfoWithBlobsResult&& portion, const std::shared_ptr pkBatch) + : Portion(std::move(portion)) + , PKBatch(pkBatch) { + AFL_VERIFY(PKBatch); + } + }; + +private: + const std::shared_ptr Action; + std::vector Portions; + NEvWrite::TWriteMeta WriteMeta; + TActorId DstActor; + const ui64 DataSize; + void DoOnReadyResult(const NActors::TActorContext& ctx, const NColumnShard::TBlobPutResult::TPtr& putResult) override { + std::vector portions; + std::vector fails; + for (auto&& i : Portions) { + portions.emplace_back(i.ExtractPortion(), i.GetPKBatch()); + } + NColumnShard::TInsertedPortions pack(std::move(WriteMeta), std::move(portions), DataSize); + std::vector packs = { pack }; + auto result = std::make_unique( + putResult->GetPutStatus(), Action, std::move(packs), std::move(fails)); + ctx.Send(DstActor, result.release()); + } + virtual void DoOnStartSending() override { + } + +public: + TPortionWriteController(const TActorId& dstActor, const std::shared_ptr& action, const NEvWrite::TWriteMeta& writeMeta, + std::vector&& portions, const ui64 dataSize) + : Action(action) + , Portions(std::move(portions)) + , WriteMeta(writeMeta) + , DstActor(dstActor) + , DataSize(dataSize) + { + for (auto&& p : Portions) { + for (auto&& b : p.MutablePortion().MutableBlobs()) { + auto& task = AddWriteTask(TBlobWriteInfo::BuildWriteTask(b.GetResultBlob(), action)); + b.RegisterBlobId(p.MutablePortion(), task.GetBlobId()); + } + } + } +}; + TConclusionStatus TBuildSlicesTask::DoExecute(const std::shared_ptr& /*taskPtr*/) { - NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletId)("parent_id", ParentActorId)); + NActors::TLogContextGuard g( + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletId)("parent_id", Context.GetTabletActorId())); if (!OriginalBatch) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "ev_write_bad_data")("write_id", WriteData.GetWriteMeta().GetWriteId())("table_id", WriteData.GetWriteMeta().GetTableId()); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "ev_write_bad_data")("write_id", WriteData.GetWriteMeta().GetWriteId())( + "table_id", WriteData.GetWriteMeta().GetTableId()); ReplyError("no data in batch", NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); return TConclusionStatus::Fail("no data in batch"); } - const auto& indexSchema = ActualSchema->GetIndexInfo().ArrowSchema(); - auto subsetConclusion = NArrow::TColumnOperator().IgnoreOnDifferentFieldTypes().BuildSequentialSubset(OriginalBatch, indexSchema); - if (subsetConclusion.IsFail()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unadaptable schemas")("index", indexSchema->ToString())( - "problem", subsetConclusion.GetErrorMessage()); - ReplyError( - "unadaptable schema: " + subsetConclusion.GetErrorMessage(), - NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); - return TConclusionStatus::Fail("cannot reorder schema: " + subsetConclusion.GetErrorMessage()); - } - NArrow::TSchemaSubset subset = subsetConclusion.DetachResult(); + if (WriteData.GetWritePortions()) { + if (OriginalBatch->num_rows() == 0) { + std::vector portions; + std::vector fails = { NColumnShard::TFailedWrite(WriteData.GetWriteMeta(), WriteData.GetSize()) }; + auto result = std::make_unique( + NKikimrProto::EReplyStatus::OK, nullptr, std::move(portions), std::move(fails)); + NActors::TActivationContext::AsActorContext().Send(Context.GetTabletActorId(), result.release()); + } else { + auto batches = NArrow::NMerger::TRWSortableBatchPosition::SplitByBordersInIntervalPositions(OriginalBatch, + Context.GetActualSchema()->GetIndexInfo().GetPrimaryKey()->field_names(), WriteData.GetData()->GetSeparationPoints()); + std::vector portions; + for (auto&& batch : batches) { + if (!batch) { + continue; + } + auto portionConclusion = + Context.GetActualSchema()->PrepareForWrite(Context.GetActualSchema(), WriteData.GetWriteMeta().GetTableId(), batch, + WriteData.GetWriteMeta().GetModificationType(), Context.GetStoragesManager(), Context.GetSplitterCounters()); + if (portionConclusion.IsFail()) { + ReplyError(portionConclusion.GetErrorMessage(), NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Request); + return portionConclusion; + } + std::shared_ptr pkBatch = + NArrow::TColumnOperator().Extract(batch, Context.GetActualSchema()->GetIndexInfo().GetPrimaryKey()->fields()); + portions.emplace_back(portionConclusion.DetachResult(), pkBatch); + } + auto writeController = std::make_shared( + Context.GetTabletActorId(), WriteData.GetBlobsAction(), WriteData.GetWriteMeta(), std::move(portions), WriteData.GetSize()); + if (WriteData.GetBlobsAction()->NeedDraftTransaction()) { + TActorContext::AsActorContext().Send( + Context.GetTabletActorId(), std::make_unique(writeController)); + } else { + TActorContext::AsActorContext().Register(NColumnShard::CreateWriteActor(TabletId, writeController, TInstant::Max())); + } + } + } else { + const auto& indexSchema = Context.GetActualSchema()->GetIndexInfo().ArrowSchema(); + auto subsetConclusion = NArrow::TColumnOperator().IgnoreOnDifferentFieldTypes().BuildSequentialSubset(OriginalBatch, indexSchema); + if (subsetConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unadaptable schemas")("index", indexSchema->ToString())( + "problem", subsetConclusion.GetErrorMessage()); + ReplyError("unadaptable schema: " + subsetConclusion.GetErrorMessage(), + NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("cannot reorder schema: " + subsetConclusion.GetErrorMessage()); + } + NArrow::TSchemaSubset subset = subsetConclusion.DetachResult(); - if (OriginalBatch->num_columns() != indexSchema->num_fields()) { - AFL_VERIFY(OriginalBatch->num_columns() < indexSchema->num_fields())("original", OriginalBatch->num_columns())( - "index", indexSchema->num_fields()); - if (HasAppData() && !AppDataVerified().FeatureFlags.GetEnableOptionalColumnsInColumnShard() && - WriteData.GetWriteMeta().GetModificationType() != NEvWrite::EModificationType::Delete) { - subset = NArrow::TSchemaSubset::AllFieldsAccepted(); - const std::vector& columnIdsVector = ActualSchema->GetIndexInfo().GetColumnIds(false); - const std::set columnIdsSet(columnIdsVector.begin(), columnIdsVector.end()); - auto normalized = - ActualSchema->NormalizeBatch(*ActualSchema, std::make_shared(OriginalBatch), columnIdsSet).DetachResult(); - OriginalBatch = NArrow::ToBatch(normalized->BuildTableVerified(), true); + if (OriginalBatch->num_columns() != indexSchema->num_fields()) { + AFL_VERIFY(OriginalBatch->num_columns() < indexSchema->num_fields())("original", OriginalBatch->num_columns())( + "index", indexSchema->num_fields()); + if (HasAppData() && !AppDataVerified().FeatureFlags.GetEnableOptionalColumnsInColumnShard() && + WriteData.GetWriteMeta().GetModificationType() != NEvWrite::EModificationType::Delete) { + subset = NArrow::TSchemaSubset::AllFieldsAccepted(); + const std::vector& columnIdsVector = Context.GetActualSchema()->GetIndexInfo().GetColumnIds(false); + const std::set columnIdsSet(columnIdsVector.begin(), columnIdsVector.end()); + auto normalized = + Context.GetActualSchema() + ->NormalizeBatch(*Context.GetActualSchema(), std::make_shared(OriginalBatch), columnIdsSet) + .DetachResult(); + OriginalBatch = NArrow::ToBatch(normalized->BuildTableVerified(), true); + } } - } - WriteData.MutableWriteMeta().SetWriteMiddle2StartInstant(TMonotonic::Now()); - auto batches = BuildSlices(); - WriteData.MutableWriteMeta().SetWriteMiddle3StartInstant(TMonotonic::Now()); - if (batches) { - auto writeDataPtr = std::make_shared(std::move(WriteData)); - writeDataPtr->SetSchemaSubset(std::move(subset)); - std::shared_ptr pkBatch; - if (!writeDataPtr->GetWriteMeta().HasLongTxId()) { - pkBatch = NArrow::TColumnOperator().Extract(OriginalBatch, ActualSchema->GetIndexInfo().GetPrimaryKey()->fields()); + WriteData.MutableWriteMeta().SetWriteMiddle2StartInstant(TMonotonic::Now()); + auto batches = BuildSlices(); + WriteData.MutableWriteMeta().SetWriteMiddle3StartInstant(TMonotonic::Now()); + if (batches) { + auto writeDataPtr = std::make_shared(std::move(WriteData)); + writeDataPtr->SetSchemaSubset(std::move(subset)); + std::shared_ptr pkBatch = + NArrow::TColumnOperator().Extract(OriginalBatch, Context.GetActualSchema()->GetIndexInfo().GetPrimaryKey()->fields()); + auto result = std::make_unique(writeDataPtr, std::move(*batches), pkBatch); + TActorContext::AsActorContext().Send(BufferActorId, result.release()); + } else { + ReplyError("Cannot slice input to batches", NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("Cannot slice input to batches"); } - auto result = std::make_unique(writeDataPtr, std::move(*batches), pkBatch); - TActorContext::AsActorContext().Send(BufferActorId, result.release()); - } else { - ReplyError("Cannot slice input to batches", NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); - return TConclusionStatus::Fail("Cannot slice input to batches"); } - return TConclusionStatus::Success(); } - -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/slice_builder/builder.h b/ydb/core/tx/columnshard/operations/slice_builder/builder.h index a22b0c7d6ca7..be0fc432c277 100644 --- a/ydb/core/tx/columnshard/operations/slice_builder/builder.h +++ b/ydb/core/tx/columnshard/operations/slice_builder/builder.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -11,11 +12,10 @@ class TBuildSlicesTask: public NConveyor::ITask { private: NEvWrite::TWriteData WriteData; const ui64 TabletId; - const NActors::TActorId ParentActorId; const NActors::TActorId BufferActorId; std::shared_ptr OriginalBatch; std::optional> BuildSlices(); - const std::shared_ptr ActualSchema; + const TWritingContext Context; void ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); protected: @@ -26,14 +26,13 @@ class TBuildSlicesTask: public NConveyor::ITask { return "Write::ConstructBlobs::Slices"; } - TBuildSlicesTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, - NEvWrite::TWriteData&& writeData, const std::shared_ptr& batch, const std::shared_ptr& actualSchema) + TBuildSlicesTask(const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& batch, + const TWritingContext& context) : WriteData(std::move(writeData)) - , TabletId(tabletId) - , ParentActorId(parentActorId) + , TabletId(WriteData.GetWriteMeta().GetTableId()) , BufferActorId(bufferActorId) , OriginalBatch(batch) - , ActualSchema(actualSchema) { + , Context(context) { } }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/write.cpp b/ydb/core/tx/columnshard/operations/write.cpp index 1068e7167413..10b0b564631c 100644 --- a/ydb/core/tx/columnshard/operations/write.cpp +++ b/ydb/core/tx/columnshard/operations/write.cpp @@ -7,58 +7,73 @@ #include #include #include +#include #include #include namespace NKikimr::NColumnShard { -TWriteOperation::TWriteOperation(const TOperationWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, - const TInstant createdAt, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) - : Status(status) +TWriteOperation::TWriteOperation(const ui64 pathId, const TOperationWriteId writeId, const ui64 lockId, const ui64 cookie, + const EOperationStatus& status, const TInstant createdAt, const std::optional granuleShardingVersionId, + const NEvWrite::EModificationType mType, const bool writePortions) + : PathId(pathId) + , Status(status) , CreatedAt(createdAt) , WriteId(writeId) , LockId(lockId) , Cookie(cookie) , GranuleShardingVersionId(granuleShardingVersionId) - , ModificationType(mType) { + , ModificationType(mType) + , WritePortions(writePortions) { } -void TWriteOperation::Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, const NActors::TActorId& source, - const std::shared_ptr& schema, const TActorContext& ctx) { +void TWriteOperation::Start( + TColumnShard& owner, const NEvWrite::IDataContainer::TPtr& data, const NActors::TActorId& source, const NOlap::TWritingContext& context) { Y_ABORT_UNLESS(Status == EOperationStatus::Draft); - NEvWrite::TWriteMeta writeMeta((ui64)WriteId, tableId, source, GranuleShardingVersionId); + NEvWrite::TWriteMeta writeMeta((ui64)WriteId, GetPathId(), source, GranuleShardingVersionId); writeMeta.SetLockId(LockId); writeMeta.SetModificationType(ModificationType); + NEvWrite::TWriteData writeData(writeMeta, data, owner.TablesManager.GetPrimaryIndex()->GetReplaceKey(), + owner.StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING_OPERATOR), WritePortions); std::shared_ptr task = - std::make_shared(owner.TabletID(), ctx.SelfID, owner.BufferizationWriteActorId, - NEvWrite::TWriteData(writeMeta, data, owner.TablesManager.GetPrimaryIndex()->GetReplaceKey(), - owner.StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING_OPERATOR)), - schema, owner.GetLastTxSnapshot(), owner.Counters.GetCSCounters().WritingCounters); + std::make_shared(owner.BufferizationWriteActorId, std::move(writeData), owner.GetLastTxSnapshot(), context); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); Status = EOperationStatus::Started; } -void TWriteOperation::CommitOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const { +void TWriteOperation::CommitOnExecute( + TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const { Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); TBlobGroupSelector dsGroupSelector(owner.Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - for (auto gWriteId : InsertWriteIds) { + if (!WritePortions) { + THashSet insertWriteIds(InsertWriteIds.begin(), InsertWriteIds.end()); auto pathExists = [&](ui64 pathId) { return owner.TablesManager.HasTable(pathId); }; - - const auto counters = owner.InsertTable->Commit(dbTable, snapshot.GetPlanStep(), snapshot.GetTxId(), { gWriteId }, pathExists); + const auto counters = owner.InsertTable->Commit(dbTable, snapshot.GetPlanStep(), snapshot.GetTxId(), insertWriteIds, pathExists); owner.Counters.GetTabletCounters()->OnWriteCommitted(counters); + } else { + for (auto&& i : InsertWriteIds) { + owner.MutableIndexAs().MutableGranuleVerified(PathId).CommitPortionOnExecute(txc, i, snapshot); + } } } void TWriteOperation::CommitOnComplete(TColumnShard& owner, const NOlap::TSnapshot& /*snapshot*/) const { Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); - owner.UpdateInsertTableCounters(); + if (!WritePortions) { + owner.UpdateInsertTableCounters(); + } else { + for (auto&& i : InsertWriteIds) { + owner.MutableIndexAs().MutableGranuleVerified(PathId).CommitPortionOnComplete( + i, owner.MutableIndexAs()); + } + } } void TWriteOperation::OnWriteFinish( @@ -91,12 +106,17 @@ void TWriteOperation::ToProto(NKikimrTxColumnShard::TInternalOperationData& prot proto.AddInternalWriteIds((ui64)writeId); } proto.SetModificationType((ui32)ModificationType); + proto.SetWritePortions(WritePortions); + proto.SetPathId(PathId); } void TWriteOperation::FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto) { for (auto&& writeId : proto.GetInternalWriteIds()) { InsertWriteIds.push_back(TInsertWriteId(writeId)); } + WritePortions = proto.GetWritePortions(); + PathId = proto.GetPathId(); + AFL_VERIFY(!WritePortions || PathId); if (proto.HasModificationType()) { ModificationType = (NEvWrite::EModificationType)proto.GetModificationType(); } else { @@ -110,13 +130,24 @@ void TWriteOperation::AbortOnExecute(TColumnShard& owner, NTabletFlatExecutor::T TBlobGroupSelector dsGroupSelector(owner.Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - THashSet writeIds; - writeIds.insert(InsertWriteIds.begin(), InsertWriteIds.end()); - owner.InsertTable->Abort(dbTable, writeIds); + if (!WritePortions) { + THashSet writeIds; + writeIds.insert(InsertWriteIds.begin(), InsertWriteIds.end()); + owner.InsertTable->Abort(dbTable, writeIds); + } else { + for (auto&& i : InsertWriteIds) { + owner.MutableIndexAs().MutableGranuleVerified(PathId).AbortPortionOnExecute(txc, i); + } + } } -void TWriteOperation::AbortOnComplete(TColumnShard& /*owner*/) const { +void TWriteOperation::AbortOnComplete(TColumnShard& owner) const { Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); + if (WritePortions) { + for (auto&& i : InsertWriteIds) { + owner.MutableIndexAs().MutableGranuleVerified(PathId).AbortPortionOnComplete(i); + } + } } } // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/operations/write.h b/ydb/core/tx/columnshard/operations/write.h index d388d09eaf3a..6e67abea8730 100644 --- a/ydb/core/tx/columnshard/operations/write.h +++ b/ydb/core/tx/columnshard/operations/write.h @@ -1,5 +1,7 @@ #pragma once +#include "common/context.h" + #include #include #include @@ -44,6 +46,8 @@ enum class EOperationBehaviour : ui32 { }; class TWriteOperation { +private: + YDB_READONLY(ui64, PathId, 0); YDB_READONLY(EOperationStatus, Status, EOperationStatus::Draft); YDB_READONLY_DEF(TInstant, CreatedAt); YDB_READONLY_DEF(TOperationWriteId, WriteId); @@ -53,16 +57,19 @@ class TWriteOperation { YDB_ACCESSOR(EOperationBehaviour, Behaviour, EOperationBehaviour::Undefined); YDB_READONLY_DEF(std::optional, GranuleShardingVersionId); YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert); + bool WritePortions = false; public: using TPtr = std::shared_ptr; - TWriteOperation(const TOperationWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt, - const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); + TWriteOperation(const ui64 pathId, const TOperationWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, + const TInstant createdAt, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType, + const bool writePortions); - void Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, const NActors::TActorId& source, - const std::shared_ptr& schema, const TActorContext& ctx); - void OnWriteFinish(NTabletFlatExecutor::TTransactionContext& txc, const std::vector& insertWriteIds, const bool ephemeralFlag); + void Start(TColumnShard& owner, const NEvWrite::IDataContainer::TPtr& data, const NActors::TActorId& source, + const NOlap::TWritingContext& context); + void OnWriteFinish( + NTabletFlatExecutor::TTransactionContext& txc, const std::vector& insertWriteIds, const bool ephemeralFlag); void CommitOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const; void CommitOnComplete(TColumnShard& owner, const NOlap::TSnapshot& snapshot) const; void AbortOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const; diff --git a/ydb/core/tx/columnshard/operations/write_data.cpp b/ydb/core/tx/columnshard/operations/write_data.cpp index 0f7440aaf5e4..9cb50e023b1f 100644 --- a/ydb/core/tx/columnshard/operations/write_data.cpp +++ b/ydb/core/tx/columnshard/operations/write_data.cpp @@ -1,12 +1,12 @@ #include "write_data.h" +#include #include - namespace NKikimr::NColumnShard { bool TArrowData::Parse(const NKikimrDataEvents::TEvWrite_TOperation& proto, const NEvWrite::IPayloadReader& payload) { - if(proto.GetPayloadFormat() != NKikimrDataEvents::FORMAT_ARROW) { + if (proto.GetPayloadFormat() != NKikimrDataEvents::FORMAT_ARROW) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "invalid_payload_format")("payload_format", (ui64)proto.GetPayloadFormat()); return false; } @@ -48,7 +48,7 @@ TConclusion> TArrowData::ExtractBatch() { } else { result = NArrow::DeserializeBatch(IncomingData, std::make_shared(BatchSchema->GetSchema()->fields())); } - + IncomingData = ""; return result; } @@ -70,11 +70,20 @@ bool TProtoArrowData::ParseFromProto(const NKikimrTxColumnShard::TEvWrite& proto } ArrowSchema = NArrow::DeserializeSchema(incomingDataScheme); if (!ArrowSchema) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_deserialize_data"); return false; } } OriginalDataSize = IncomingData.size(); - return !IncomingData.empty() && IncomingData.size() <= NColumnShard::TLimits::GetMaxBlobSize(); + if (IncomingData.empty()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "empty_data"); + return false; + } + if (NColumnShard::TLimits::GetMaxBlobSize() < IncomingData.size() && !AppDataVerified().FeatureFlags.GetEnableWritePortionsOnInsert()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "too_big_blob"); + return false; + } + return true; } TConclusion> TProtoArrowData::ExtractBatch() { @@ -88,4 +97,4 @@ ui64 TProtoArrowData::GetSchemaVersion() const { return IndexSchema->GetVersion(); } -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/operations/ya.make b/ydb/core/tx/columnshard/operations/ya.make index c0bd3f234b78..84dbd2f5b671 100644 --- a/ydb/core/tx/columnshard/operations/ya.make +++ b/ydb/core/tx/columnshard/operations/ya.make @@ -4,6 +4,7 @@ SRCS( write.cpp write_data.cpp manager.cpp + events.cpp ) PEERDIR( @@ -15,6 +16,7 @@ PEERDIR( ydb/core/tx/columnshard/transactions/locks ydb/core/tx/columnshard/operations/batch_builder ydb/core/tx/columnshard/operations/slice_builder + ydb/core/tx/columnshard/operations/common ) END() diff --git a/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp b/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp index d941d548414c..d22bfb316599 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp @@ -315,11 +315,6 @@ Y_UNIT_TEST_SUITE(Normalizers) { { auto readResult = ReadAllAsBatch(runtime, tableId, NOlap::TSnapshot(11, txId), schema); UNIT_ASSERT_VALUES_EQUAL(readResult->num_rows(), 20048); - while (!csControllerGuard->GetInsertFinishedCounter().Val()) { - Cerr << csControllerGuard->GetInsertStartedCounter().Val() << Endl; - Wakeup(runtime, writer.GetSender(), TTestTxConfig::TxTablet0); - runtime.SimulateSleep(TDuration::Seconds(1)); - } } RebootTablet(runtime, TTestTxConfig::TxTablet0, writer.GetSender()); diff --git a/ydb/core/tx/data_events/columnshard_splitter.cpp b/ydb/core/tx/data_events/columnshard_splitter.cpp index 19a787167270..8d644cb1cfbc 100644 --- a/ydb/core/tx/data_events/columnshard_splitter.cpp +++ b/ydb/core/tx/data_events/columnshard_splitter.cpp @@ -1,8 +1,12 @@ #include "columnshard_splitter.h" +#include +#include + namespace NKikimr::NEvWrite { -NKikimr::NEvWrite::IShardsSplitter::TYdbConclusionStatus TColumnShardShardsSplitter::DoSplitData(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, const IEvWriteDataAccessor& data) { +NKikimr::NEvWrite::IShardsSplitter::TYdbConclusionStatus TColumnShardShardsSplitter::DoSplitData( + const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, const IEvWriteDataAccessor& data) { if (schemeEntry.Kind != NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { return TYdbConclusionStatus::Fail(Ydb::StatusIds::SCHEME_ERROR, "The specified path is not an column table"); } @@ -36,7 +40,8 @@ NKikimr::NEvWrite::IShardsSplitter::TYdbConclusionStatus TColumnShardShardsSplit std::shared_ptr arrowScheme = ExtractArrowSchema(scheme); batch = NArrow::DeserializeBatch(data.GetSerializedData(), arrowScheme); if (!batch) { - return TYdbConclusionStatus::Fail(Ydb::StatusIds::SCHEME_ERROR, TString("cannot deserialize batch with schema ") + arrowScheme->ToString()); + return TYdbConclusionStatus::Fail( + Ydb::StatusIds::SCHEME_ERROR, TString("cannot deserialize batch with schema ") + arrowScheme->ToString()); } auto res = batch->ValidateFull(); @@ -55,12 +60,13 @@ NKikimr::NEvWrite::IShardsSplitter::TYdbConclusionStatus TColumnShardShardsSplit return SplitImpl(batch, shardingConclusion.DetachResult()); } -NKikimr::NEvWrite::IShardsSplitter::TYdbConclusionStatus TColumnShardShardsSplitter::SplitImpl(const std::shared_ptr& batch, - const std::shared_ptr& sharding) -{ +NKikimr::NEvWrite::IShardsSplitter::TYdbConclusionStatus TColumnShardShardsSplitter::SplitImpl( + const std::shared_ptr& batch, const std::shared_ptr& sharding) { Y_ABORT_UNLESS(batch); - auto split = sharding->SplitByShards(batch, NColumnShard::TLimits::GetMaxBlobSize() * 0.875); + auto split = sharding->SplitByShards(batch, AppDataVerified().FeatureFlags.GetEnableWritePortionsOnInsert() + ? NOlap::NSplitter::TSplitSettings().GetExpectedPortionSize() + : NColumnShard::TLimits::GetMaxBlobSize() * 0.875); if (split.IsFail()) { return TYdbConclusionStatus::Fail(Ydb::StatusIds::SCHEME_ERROR, split.GetErrorMessage()); } @@ -69,7 +75,8 @@ NKikimr::NEvWrite::IShardsSplitter::TYdbConclusionStatus TColumnShardShardsSplit const TString schemaString = NArrow::SerializeSchema(*batch->schema()); for (auto&& [shardId, chunks] : split.GetResult()) { for (auto&& c : chunks) { - result.AddShardInfo(shardId, std::make_shared(schemaString, c.GetData(), c.GetRowsCount(), sharding->GetShardInfoVerified(shardId).GetShardingVersion())); + result.AddShardInfo(shardId, std::make_shared(schemaString, c.GetData(), c.GetRowsCount(), + sharding->GetShardInfoVerified(shardId).GetShardingVersion())); } } @@ -88,4 +95,4 @@ std::shared_ptr TColumnShardShardsSplitter::ExtractArrowSchema(co return NArrow::TStatusValidator::GetValid(NArrow::MakeArrowSchema(columns)); } -} +} // namespace NKikimr::NEvWrite diff --git a/ydb/core/tx/data_events/write_data.cpp b/ydb/core/tx/data_events/write_data.cpp index 390667624dda..05237dbb0701 100644 --- a/ydb/core/tx/data_events/write_data.cpp +++ b/ydb/core/tx/data_events/write_data.cpp @@ -1,20 +1,22 @@ #include "write_data.h" + #include #include -#include +#include namespace NKikimr::NEvWrite { -TWriteData::TWriteData(const TWriteMeta& writeMeta, IDataContainer::TPtr data, const std::shared_ptr& primaryKeySchema, const std::shared_ptr& blobsAction) +TWriteData::TWriteData(const TWriteMeta& writeMeta, IDataContainer::TPtr data, const std::shared_ptr& primaryKeySchema, + const std::shared_ptr& blobsAction, const bool writePortions) : WriteMeta(writeMeta) , Data(data) , PrimaryKeySchema(primaryKeySchema) , BlobsAction(blobsAction) -{ + , WritePortions(writePortions) { Y_ABORT_UNLESS(Data); Y_ABORT_UNLESS(PrimaryKeySchema); Y_ABORT_UNLESS(BlobsAction); } -} +} // namespace NKikimr::NEvWrite diff --git a/ydb/core/tx/data_events/write_data.h b/ydb/core/tx/data_events/write_data.h index 0acbec1bcf98..fb9ca8fa7304 100644 --- a/ydb/core/tx/data_events/write_data.h +++ b/ydb/core/tx/data_events/write_data.h @@ -1,13 +1,15 @@ #pragma once #include "common/modification_type.h" -#include #include -#include -#include +#include +#include +#include #include #include +#include + #include namespace NKikimr::NOlap { @@ -17,9 +19,13 @@ class IBlobsWritingAction; namespace NKikimr::NEvWrite { class IDataContainer { +private: + YDB_ACCESSOR_DEF(NArrow::NMerger::TIntervalPositions, SeparationPoints); + public: using TPtr = std::shared_ptr; - virtual ~IDataContainer() {} + virtual ~IDataContainer() { + } virtual TConclusion> ExtractBatch() = 0; virtual ui64 GetSchemaVersion() const = 0; virtual ui64 GetSize() const = 0; @@ -47,6 +53,7 @@ class TWriteMeta { YDB_ACCESSOR(TMonotonic, WriteMiddle5StartInstant, TMonotonic::Now()); YDB_ACCESSOR(TMonotonic, WriteMiddle6StartInstant, TMonotonic::Now()); std::optional LockId; + public: void SetLockId(const ui64 lockId) { LockId = lockId; @@ -77,8 +84,8 @@ class TWriteMeta { : WriteId(writeId) , TableId(tableId) , Source(source) - , GranuleShardingVersion(granuleShardingVersion) - {} + , GranuleShardingVersion(granuleShardingVersion) { + } }; class TWriteData { @@ -88,8 +95,11 @@ class TWriteData { YDB_READONLY_DEF(std::shared_ptr, PrimaryKeySchema); YDB_READONLY_DEF(std::shared_ptr, BlobsAction); YDB_ACCESSOR_DEF(std::optional, SchemaSubset); + YDB_READONLY(bool, WritePortions, false); + public: - TWriteData(const TWriteMeta& writeMeta, IDataContainer::TPtr data, const std::shared_ptr& primaryKeySchema, const std::shared_ptr& blobsAction); + TWriteData(const TWriteMeta& writeMeta, IDataContainer::TPtr data, const std::shared_ptr& primaryKeySchema, + const std::shared_ptr& blobsAction, const bool writePortions); const NArrow::TSchemaSubset& GetSchemaSubsetVerified() const { AFL_VERIFY(SchemaSubset); @@ -109,4 +119,4 @@ class TWriteData { } }; -} +} // namespace NKikimr::NEvWrite diff --git a/ydb/core/tx/tiering/ut/ut_tiers.cpp b/ydb/core/tx/tiering/ut/ut_tiers.cpp index 21fa01b29a6e..a8acda7b0f5e 100644 --- a/ydb/core/tx/tiering/ut/ut_tiers.cpp +++ b/ydb/core/tx/tiering/ut/ut_tiers.cpp @@ -606,9 +606,10 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { runtime.UpdateCurrentTime(now); const TInstant pkStart = now - TDuration::Days(15); - auto batch = lHelper.TestArrowBatch(0, pkStart.GetValue(), 6000); + auto batch1 = lHelper.TestArrowBatch(0, pkStart.GetValue(), 6000); + auto batch2 = lHelper.TestArrowBatch(0, pkStart.GetValue() - 100, 6000); auto batchSmall = lHelper.TestArrowBatch(0, now.GetValue(), 1); - auto batchSize = NArrow::GetBatchDataSize(batch); + auto batchSize = NArrow::GetBatchDataSize(batch1); Cerr << "Inserting " << batchSize << " bytes..." << Endl; UNIT_ASSERT(batchSize > 4 * 1024 * 1024); // NColumnShard::TLimits::MIN_BYTES_TO_INSERT UNIT_ASSERT(batchSize < 8 * 1024 * 1024); @@ -617,7 +618,8 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { TAtomic unusedPrev; runtime.GetAppData().Icb->SetValue("ColumnShardControls.GranuleIndexedPortionsCountLimit", 1, unusedPrev); } - lHelper.SendDataViaActorSystem("/Root/olapStore/olapTable", batch); + lHelper.SendDataViaActorSystem("/Root/olapStore/olapTable", batch1); + lHelper.SendDataViaActorSystem("/Root/olapStore/olapTable", batch2); { const TInstant start = Now(); bool check = false; diff --git a/ydb/library/formats/arrow/arrow_helpers.cpp b/ydb/library/formats/arrow/arrow_helpers.cpp index 3244a9bf4417..78fa0c3e715d 100644 --- a/ydb/library/formats/arrow/arrow_helpers.cpp +++ b/ydb/library/formats/arrow/arrow_helpers.cpp @@ -237,12 +237,18 @@ std::vector ColumnNames(const std::shared_ptr& schema) { return out; } -std::shared_ptr MakeUI64Array(ui64 value, i64 size) { +std::shared_ptr MakeUI64Array(const ui64 value, const i64 size) { auto res = arrow::MakeArrayFromScalar(arrow::UInt64Scalar(value), size); Y_ABORT_UNLESS(res.ok()); return std::static_pointer_cast(*res); } +std::shared_ptr MakeStringArray(const TString& value, const i64 size) { + auto res = arrow::MakeArrayFromScalar(arrow::StringScalar(value), size); + Y_ABORT_UNLESS(res.ok()); + return std::static_pointer_cast(*res); +} + std::pair FindMinMaxPosition(const std::shared_ptr& array) { if (array->length() == 0) { return {-1, -1}; diff --git a/ydb/library/formats/arrow/arrow_helpers.h b/ydb/library/formats/arrow/arrow_helpers.h index b8145b8ad6b9..de9f23da43be 100644 --- a/ydb/library/formats/arrow/arrow_helpers.h +++ b/ydb/library/formats/arrow/arrow_helpers.h @@ -57,7 +57,8 @@ std::vector> MakeBuilders(const std::shared size_t reserve = 0, const std::map& sizeByColumn = {}); std::vector> Finish(std::vector>&& builders); -std::shared_ptr MakeUI64Array(ui64 value, i64 size); +std::shared_ptr MakeUI64Array(const ui64 value, const i64 size); +std::shared_ptr MakeStringArray(const TString& value, const i64 size); std::vector ColumnNames(const std::shared_ptr& schema); bool ReserveData(arrow::ArrayBuilder& builder, const size_t size); bool MergeBatchColumns(const std::vector>& batches, std::shared_ptr& result, const std::vector& columnsOrder = {}, const bool orderFieldsAreNecessary = true);