From 67edc941543fcdb5332013a150619c5ef85bef59 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 14 Oct 2024 11:28:37 +0300 Subject: [PATCH 1/2] fix splitter condition for avoid split micro-chunks --- ydb/core/tx/columnshard/splitter/batch_slice.cpp | 2 ++ ydb/core/tx/columnshard/splitter/settings.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/splitter/batch_slice.cpp b/ydb/core/tx/columnshard/splitter/batch_slice.cpp index 7f6cc05c1e7b..1e33b4bf9777 100644 --- a/ydb/core/tx/columnshard/splitter/batch_slice.cpp +++ b/ydb/core/tx/columnshard/splitter/batch_slice.cpp @@ -99,6 +99,8 @@ bool TGeneralSerializedSlice::GroupBlobsImpl(const NSplitter::TGroupFeatures& fe chunksInProgress.PopFront(i); hasNoSplitChanges = true; } else { + // in this case chunksInProgress[i] size >= Max - Min for case nextPartSize >= features.GetSplitSettings().GetMaxBlobSize() + // in this case chunksInProgress[i] size >= Max - 2 * Min for case nextOtherSize < features.GetSplitSettings().GetMinBlobSize() Y_ABORT_UNLESS((i64)chunksInProgress[i]->GetPackedSize() > features.GetSplitSettings().GetMinBlobSize() - partSize); Y_ABORT_UNLESS(otherSize - (features.GetSplitSettings().GetMinBlobSize() - partSize) >= features.GetSplitSettings().GetMinBlobSize()); diff --git a/ydb/core/tx/columnshard/splitter/settings.h b/ydb/core/tx/columnshard/splitter/settings.h index d370a5206047..003010bc8023 100644 --- a/ydb/core/tx/columnshard/splitter/settings.h +++ b/ydb/core/tx/columnshard/splitter/settings.h @@ -15,7 +15,7 @@ namespace NKikimr::NOlap::NSplitter { class TSplitSettings { private: static const inline i64 DefaultMaxBlobSize = 8 * 1024 * 1024; - static const inline i64 DefaultMinBlobSize = 4 * 1024 * 1024; + static const inline i64 DefaultMinBlobSize = 3 * 1024 * 1024; static const inline i64 DefaultMinRecordsCount = 10000; static const inline i64 DefaultMaxPortionSize = 6 * DefaultMaxBlobSize; YDB_ACCESSOR(i64, MaxBlobSize, DefaultMaxBlobSize); From b94f92995b2eec7fad8af8f0c4ccb823e48ccc0e Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 14 Oct 2024 11:30:29 +0300 Subject: [PATCH 2/2] correction --- ydb/core/tx/columnshard/splitter/settings.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ydb/core/tx/columnshard/splitter/settings.h b/ydb/core/tx/columnshard/splitter/settings.h index 003010bc8023..6f9f843cf874 100644 --- a/ydb/core/tx/columnshard/splitter/settings.h +++ b/ydb/core/tx/columnshard/splitter/settings.h @@ -14,8 +14,10 @@ namespace NKikimr::NOlap::NSplitter { class TSplitSettings { private: +// DefaultMaxBlobSize - 2 * DefaultMinBlobSize have to been enought to "guarantee" records count > 1 through blobs splitting static const inline i64 DefaultMaxBlobSize = 8 * 1024 * 1024; static const inline i64 DefaultMinBlobSize = 3 * 1024 * 1024; + static const inline i64 DefaultMinRecordsCount = 10000; static const inline i64 DefaultMaxPortionSize = 6 * DefaultMaxBlobSize; YDB_ACCESSOR(i64, MaxBlobSize, DefaultMaxBlobSize);