From 744f4e1c44d8debcdebd5a83f127cbbc28c261e4 Mon Sep 17 00:00:00 2001 From: Satya Bodapati Date: Fri, 26 Apr 2019 17:29:53 +0530 Subject: [PATCH] PS-5562 : index_merge + partitioning + MyRocks crashes the server Problem: -------- With native paritioning and index_merge, the native partition handler is cloned. One important point missed when fixing PS-5206 is that both the original and clone share the parition_info object (m_part_info). It has bitmap of partitions read and used. The cloned handler, which has reference to the shared to partition_info object, resets the partition bitmap during creation. This causes problem during the actual index scan. Fix: ---- 1. Create context to identify if the native partition handler creation is from clone or brand new. 2. When it is clone, do not reset the partition bitmaps (i.e avoid calling m_part_info->set_partition_bitmaps()). This is the core fix. 3. Additionally, we fix the cloned object creation to use clone mem_root instead of TABLE->mem_root. Optimizer can destroy the cloned handler immediately after its use. (See QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT). So the memory used by cloned handler objects is freed immediately at end of query execution. --- .../multi_con_read.result | 4 +- .../suite/rocksdb/r/multi_con_read.result | 4 +- sql/partitioning/partition_base.cc | 44 +++++++++++++++---- sql/partitioning/partition_base.h | 11 ++++- storage/rocksdb/ha_rockspart.cc | 3 +- storage/rocksdb/ha_rockspart.h | 7 ++- storage/tokudb/ha_tokupart.cc | 3 +- storage/tokudb/ha_tokupart.h | 7 ++- 8 files changed, 63 insertions(+), 20 deletions(-) diff --git a/mysql-test/suite/rocksdb/r-native-partitioning/multi_con_read.result b/mysql-test/suite/rocksdb/r-native-partitioning/multi_con_read.result index 46bbbc7dab9c..d1f1ea475711 100644 --- a/mysql-test/suite/rocksdb/r-native-partitioning/multi_con_read.result +++ b/mysql-test/suite/rocksdb/r-native-partitioning/multi_con_read.result @@ -10,12 +10,12 @@ CREATE TABLE `t1` ( PRIMARY KEY (`id`), KEY `k_1` (`k`) ) ENGINE='ROCKSDB' AUTO_INCREMENT=10001 DEFAULT CHARSET=utf8mb4 -/*!50100 PARTITION BY RANGE (`id`) +PARTITION BY RANGE (`id`) SUBPARTITION BY HASH (`id`) SUBPARTITIONS 20 (PARTITION p0 VALUES LESS THAN (333333), PARTITION p1 VALUES LESS THAN (666666), -PARTITION p3 VALUES LESS THAN MAXVALUE) */; +PARTITION p3 VALUES LESS THAN MAXVALUE); INSERT INTO t1 VALUES (20,5011,'82095294113-46718689069-93751429117-94982456004-28879831505-39496075815-87005112245-60372548581-07992589471-35458088958','78183819052-54767035794-64034000286-75682734084-27025271045'),(40,5037,'96458712584-04274358647-34600172982-61258877004-02456216612-93017353068-68948947187-65320412979-86812414667-04055433428','95674959566-59630142193-63453128243-68849021141-08672523473'),(60,3761,'51940584032-40151584646-01263150854-80980948000-73690232712-88605487014-13973299616-30051733966-92938202491-23238143422','35056562675-38773091386-28325876611-93328856424-07318410640'),(80,5030,'97366695989-67793472372-01073016448-88420943430-06742885226-53625048051-49633140042-38845136504-58561938096-76961679381','51626628663-10353574199-64993757935-88432770455-20027636424'),(100,4996,'45575584051-98066223638-50505155169-83257674849-92578957352-29933207560-84271372173-53917929075-64247206072-28852761782','69156787560-46290357747-57606466747-21639043920-96331231238'),(120,5073,'55169033460-32678558171-43672981950-72378152482-84693479612-76708194965-33191893015-16735847859-84813156526-50930934949','32512962596-41287818775-90537814331-28667161937-98732853550'); SELECT c FROM t1 WHERE id = 1 * 20 % 120; SELECT c FROM t1 WHERE id = 2 * 20 % 120; diff --git a/mysql-test/suite/rocksdb/r/multi_con_read.result b/mysql-test/suite/rocksdb/r/multi_con_read.result index 5da307d1238b..e3435af94634 100644 --- a/mysql-test/suite/rocksdb/r/multi_con_read.result +++ b/mysql-test/suite/rocksdb/r/multi_con_read.result @@ -10,12 +10,12 @@ CREATE TABLE `t1` ( PRIMARY KEY (`id`), KEY `k_1` (`k`) ) ENGINE='ROCKSDB' AUTO_INCREMENT=10001 DEFAULT CHARSET=utf8mb4 -/*!50100 PARTITION BY RANGE (`id`) +PARTITION BY RANGE (`id`) SUBPARTITION BY HASH (`id`) SUBPARTITIONS 20 (PARTITION p0 VALUES LESS THAN (333333), PARTITION p1 VALUES LESS THAN (666666), -PARTITION p3 VALUES LESS THAN MAXVALUE) */; +PARTITION p3 VALUES LESS THAN MAXVALUE); Warnings: Warning 1287 The partition engine, used by table 'test.t1', is deprecated and will be removed in a future release. Please use native partitioning instead. INSERT INTO t1 VALUES (20,5011,'82095294113-46718689069-93751429117-94982456004-28879831505-39496075815-87005112245-60372548581-07992589471-35458088958','78183819052-54767035794-64034000286-75682734084-27025271045'),(40,5037,'96458712584-04274358647-34600172982-61258877004-02456216612-93017353068-68948947187-65320412979-86812414667-04055433428','95674959566-59630142193-63453128243-68849021141-08672523473'),(60,3761,'51940584032-40151584646-01263150854-80980948000-73690232712-88605487014-13973299616-30051733966-92938202491-23238143422','35056562675-38773091386-28325876611-93328856424-07318410640'),(80,5030,'97366695989-67793472372-01073016448-88420943430-06742885226-53625048051-49633140042-38845136504-58561938096-76961679381','51626628663-10353574199-64993757935-88432770455-20027636424'),(100,4996,'45575584051-98066223638-50505155169-83257674849-92578957352-29933207560-84271372173-53917929075-64247206072-28852761782','69156787560-46290357747-57606466747-21639043920-96331231238'),(120,5073,'55169033460-32678558171-43672981950-72378152482-84693479612-76708194965-33191893015-16735847859-84813156526-50930934949','32512962596-41287818775-90537814331-28667161937-98732853550'); diff --git a/sql/partitioning/partition_base.cc b/sql/partitioning/partition_base.cc index e9ed38dbc13c..1bf0e7fc2565 100644 --- a/sql/partitioning/partition_base.cc +++ b/sql/partitioning/partition_base.cc @@ -384,7 +384,8 @@ const uint32 Partition_base::NO_CURRENT_PART_ID= NOT_A_PARTITION_ID; */ Partition_base::Partition_base(handlerton *hton, TABLE_SHARE *share) - : handler(hton, share), Partition_helper(this) + : handler(hton, share), Partition_helper(this), m_clone_base(nullptr), + m_clone_mem_root(nullptr) { DBUG_ENTER("Partition_base::Partition_base(table)"); init_handler_variables(); @@ -392,12 +393,14 @@ Partition_base::Partition_base(handlerton *hton, TABLE_SHARE *share) } Partition_base::Partition_base(handlerton *hton, TABLE_SHARE *share, - Handler_share** ha_share) - : handler(hton, share), Partition_helper(this) + Partition_base* clone_base, + MEM_ROOT* clone_mem_root) + : handler(hton, share), Partition_helper(this), + m_clone_base(clone_base), m_clone_mem_root(clone_mem_root) { DBUG_ENTER("Partition_base::Partition_base(table, ha_share)"); init_handler_variables(); - set_ha_share_ref(ha_share); + set_ha_share_ref(&share->ha_share); DBUG_VOID_RETURN; } @@ -1871,10 +1874,14 @@ bool Partition_base::init_partition_bitmaps() } bitmap_clear_all(&m_partitions_to_reset); - if (m_part_info->set_partition_bitmaps(nullptr)) - { - free_partition_bitmaps(); - DBUG_RETURN(true); + /* When Partition_base is cloned, both the clone and the original object + share partition_info object (m_part_info). Do not reset the partition + bitmaps. */ + if (!m_clone_base) { + if (m_part_info->set_partition_bitmaps(nullptr)) { + free_partition_bitmaps(); + DBUG_RETURN(true); + } } DBUG_RETURN(false); @@ -1908,6 +1915,7 @@ int Partition_base::open(const char *name, int mode, uint test_if_locked) { int error= HA_ERR_INITIALIZATION; handler **file; + handler **clone_base_file = nullptr; ulonglong check_table_flags; DBUG_ENTER("Partition_base::open"); @@ -1917,8 +1925,11 @@ int Partition_base::open(const char *name, int mode, uint test_if_locked) m_mode= mode; m_open_test_lock= test_if_locked; + MEM_ROOT *mem_root= + m_clone_mem_root != nullptr ? m_clone_mem_root : &table->mem_root; + /* The following functions must be called only after m_part_info set */ - if (initialize_partition(&table->mem_root) || init_part_share() || + if (initialize_partition(mem_root) || init_part_share() || init_with_fields()) DBUG_RETURN(true); @@ -1966,15 +1977,30 @@ int Partition_base::open(const char *name, int mode, uint test_if_locked) DBUG_ASSERT(m_part_info); file= m_file; + if (m_clone_base != nullptr) { + clone_base_file= m_clone_base->m_file; + } if (!foreach_partition([&](const partition_element *parent_part_elem, const partition_element *part_elem) -> bool { char name_buff[FN_REFLEN]; part_name(name_buff, name, parent_part_elem, part_elem); + + if (m_clone_base != nullptr) { + uint ref_length = (*clone_base_file)->ref_length; + (*file)->ref= (uchar *)alloc_root(m_clone_mem_root, + ALIGN_SIZE(ref_length) * 2); + } + if ((error= (*file)->ha_open(table, name_buff, mode, test_if_locked | HA_OPEN_NO_PSI_CALL))) return false; + ++file; + if (m_clone_base != nullptr) { + ++clone_base_file; + } + return true; })) goto err_handler; diff --git a/sql/partitioning/partition_base.h b/sql/partitioning/partition_base.h index 6b2a6a90e70f..0c1519836fa0 100644 --- a/sql/partitioning/partition_base.h +++ b/sql/partitioning/partition_base.h @@ -111,6 +111,14 @@ class Partition_base : public handler, /** keep track of partitions to call ha_reset */ MY_BITMAP m_partitions_to_reset; + /** Used when Partiton_base is cloned. Specifically it is used to retrieve + each handler ref_length */ + Partition_base* m_clone_base; + /** For clone operation, memory will be allocated from this mem_root instead + of TABLE->mem_root. The clone can be destroyed by optimizer immediately. So + this allows to release the memory used by cloned object quickly */ + MEM_ROOT* m_clone_mem_root; + public: handler *clone(const char *name, MEM_ROOT *mem_root)override = 0; /* @@ -126,7 +134,8 @@ class Partition_base : public handler, */ Partition_base(handlerton *hton, TABLE_SHARE *table); - Partition_base(handlerton *hton, TABLE_SHARE *table, Handler_share** ha_share); + Partition_base(handlerton *hton, TABLE_SHARE *table, + Partition_base *clone_base, MEM_ROOT *clone_mem_root); ~Partition_base() override; bool init_with_fields() override; diff --git a/storage/rocksdb/ha_rockspart.cc b/storage/rocksdb/ha_rockspart.cc index cff21c828339..8e679bfb5a98 100644 --- a/storage/rocksdb/ha_rockspart.cc +++ b/storage/rocksdb/ha_rockspart.cc @@ -75,7 +75,8 @@ handler *ha_rockspart::clone(const char *name, MEM_ROOT *mem_root) { if (!table) DBUG_RETURN(nullptr); - new_handler = new (mem_root) ha_rockspart(ht, table_share, m_part_info); + new_handler = + new (mem_root) ha_rockspart(ht, table_share, m_part_info, this, mem_root); if (!new_handler) DBUG_RETURN(nullptr); diff --git a/storage/rocksdb/ha_rockspart.h b/storage/rocksdb/ha_rockspart.h index e16c18916466..5d8cbbec09e5 100644 --- a/storage/rocksdb/ha_rockspart.h +++ b/storage/rocksdb/ha_rockspart.h @@ -25,8 +25,11 @@ class ha_rockspart : public native_part::Partition_base { : native_part::Partition_base(hton, table_arg){}; ha_rockspart(handlerton *hton, TABLE_SHARE *table_arg, - partition_info *part_info) - : native_part::Partition_base(hton, table_arg, &table_arg->ha_share) { + partition_info *part_info, + native_part::Partition_base *clone_base, + MEM_ROOT *clone_mem_root) + : native_part::Partition_base(hton, table_arg, clone_base, + clone_mem_root) { this->get_partition_handler()->set_part_info(part_info, true); } diff --git a/storage/tokudb/ha_tokupart.cc b/storage/tokudb/ha_tokupart.cc index d11862e9fa43..4dbb65f7894e 100644 --- a/storage/tokudb/ha_tokupart.cc +++ b/storage/tokudb/ha_tokupart.cc @@ -45,7 +45,8 @@ handler *ha_tokupart::clone(const char *name, MEM_ROOT *mem_root) { if (!table) DBUG_RETURN(nullptr); - new_handler = new (mem_root) ha_tokupart(ht, table_share, m_part_info); + new_handler = + new (mem_root) ha_tokupart(ht, table_share, m_part_info, this, mem_root); if (!new_handler) DBUG_RETURN(nullptr); diff --git a/storage/tokudb/ha_tokupart.h b/storage/tokudb/ha_tokupart.h index 519978c6ff26..928877efa2e8 100644 --- a/storage/tokudb/ha_tokupart.h +++ b/storage/tokudb/ha_tokupart.h @@ -27,8 +27,11 @@ class ha_tokupart : public native_part::Partition_base { ha_tokupart(handlerton *hton, TABLE_SHARE *table_arg, - partition_info *part_info) - : native_part::Partition_base(hton, table_arg, &table_arg->ha_share) { + partition_info *part_info, + native_part::Partition_base *clone_base, + MEM_ROOT *clone_mem_root) + : native_part::Partition_base(hton, table_arg, clone_base, + clone_mem_root) { this->get_partition_handler()->set_part_info(part_info, true); }