Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GAIAPLAT-1752] INDEX: eliminate memory allocation when garbage collecting offsets #1124

Merged
merged 10 commits into from
Dec 20, 2021
96 changes: 53 additions & 43 deletions production/db/core/src/index_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

#include "gaia_internal/db/index_builder.hpp"

#include <unordered_set>
#include <array>
#include <utility>
#include <vector>

#include "gaia/exceptions.hpp"

Expand Down Expand Up @@ -381,7 +382,7 @@ void index_builder_t::update_indexes_from_txn_log(
// table is created or dropped in the txn.
// Keep track of dropped tables.
bool has_cleared_cache = false;
std::unordered_set<gaia_type_t> dropped_types;
std::vector<gaia_type_t> dropped_types;

for (size_t i = 0; i < records.record_count; ++i)
{
Expand All @@ -403,7 +404,7 @@ void index_builder_t::update_indexes_from_txn_log(
if (log_record.operation == gaia_operation_t::remove)
{
auto table_view = table_view_t(offset_to_ptr(log_record.old_offset));
dropped_types.insert(table_view.table_type());
dropped_types.push_back(table_view.table_type());
}
}
}
Expand Down Expand Up @@ -448,7 +449,7 @@ void index_builder_t::update_indexes_from_txn_log(
// The operation is from a dropped table.
// Skip if catalog verification disabled and type not found in the catalog.
if (is_system_object(obj->type)
|| dropped_types.find(obj->type) != dropped_types.end()
|| std::find(dropped_types.begin(), dropped_types.end(), obj->type) != dropped_types.end()
|| (skip_catalog_integrity_check && type_record_id == c_invalid_gaia_id))
{
continue;
Expand All @@ -462,62 +463,71 @@ void index_builder_t::update_indexes_from_txn_log(
}

template <class T_index>
void remove_entries_with_offsets(base_index_t* base_index, const std::unordered_set<gaia_offset_t>& offsets, gaia_txn_id_t txn_id)
void remove_entries_with_offsets(base_index_t* base_index, const index_offset_buffer_t& offsets, gaia_txn_id_t txn_id)
{
auto index = static_cast<T_index*>(base_index);
index->remove_index_entry_with_offsets(offsets, txn_id);
}

void index_builder_t::gc_indexes_from_txn_log(const txn_log_t& records, bool deallocate_new_offsets)
{
std::unordered_set<gaia_offset_t> collected_offsets;
std::unordered_set<gaia_type_t> offset_types;

for (size_t i = 0; i < records.record_count; ++i)
size_t records_index = 0;
while (records_index < records.record_count)
{
const auto& log_record = records.log_records[i];
gaia_offset_t offset = deallocate_new_offsets ? log_record.new_offset : log_record.old_offset;

// If no action is needed, move on to the next log record.
if (offset != c_invalid_gaia_offset)
index_offset_buffer_t collected_offsets;
// Fill the offset buffer for garbage collection.
// Exit the loop when we either have run out of records to process or the offsets buffer is full.
for (; records_index < records.record_count && collected_offsets.size() < c_offset_buffer_size; ++records_index)
{
auto obj = offset_to_ptr(offset);
const auto& log_record = records.log_records[records_index];

// We do not index system objects, so we can move on.
if (is_system_object(obj->type))
{
continue;
}
gaia_offset_t offset = deallocate_new_offsets ? log_record.new_offset : log_record.old_offset;

collected_offsets.insert(offset);
offset_types.insert(obj->type);
}
}
// If no action is needed, move on to the next log record.
if (offset != c_invalid_gaia_offset)
{
auto obj = offset_to_ptr(offset);

// Nothing to do here.
if (collected_offsets.size() == 0)
{
return;
}
// We do not index system objects, so we can move on.
if (is_system_object(obj->type))
{
continue;
}

for (auto it : *get_indexes())
{
gaia_type_t indexed_type = it.second->table_type();
// Add the offset to the buffers and advance the buffer index.
collected_offsets.insert(offset, obj->type);
}
}

// This index does not contain any of the deleted offsets.
if (offset_types.find(indexed_type) == offset_types.end())
// When we reach this point, either we have 1) run out of records to iterate over or 2) the offsets buffer is now considered full.
// We know that 2) is false when the offsets buffer is empty and there is no garbage to collect.
// Therefore we can safely return here.
if (collected_offsets.empty())
LaurentiuCristofor marked this conversation as resolved.
Show resolved Hide resolved
{
continue;
return;
}

switch (it.second->type())
// Garbage collect the offsets in the buffer.
for (const auto& it : *get_indexes())
{
case catalog::index_type_t::range:
remove_entries_with_offsets<range_index_t>(it.second.get(), collected_offsets, records.begin_ts);
break;
case catalog::index_type_t::hash:
remove_entries_with_offsets<hash_index_t>(it.second.get(), collected_offsets, records.begin_ts);
break;
gaia_type_t indexed_type = it.second->table_type();

// This index does not contain any of the deleted offsets.
// Skip the expensive remove_entries_with_offsets operation.
if (!collected_offsets.has_type(indexed_type))
{
continue;
}

switch (it.second->type())
{
case catalog::index_type_t::range:
remove_entries_with_offsets<range_index_t>(it.second.get(), collected_offsets, records.begin_ts);
Copy link
Contributor

@LaurentiuCristofor LaurentiuCristofor Dec 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused - doesn't collected_offsets contain offsets of different types? Why would we pass the full array to an index of a specific type?

Perhaps a better approach is to build a map of types to offset arrays/vectors so that we can only pass relevant offsets in these calls. Building that map can be done with trivial changes to the code that fills the 2 arrays.

Copy link
Contributor

@senderista senderista Dec 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively, if the offset array is small enough it would be reasonable to just scan the whole array (assuming it contains offset/type pairs) and filter out offsets with inapplicable types. This would be really concise with C++20 ranges but still wouldn't be much code in a for loop.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

offset array is small limited to 32 records. Passing the type does not provide any benefit over straight checking against offset: if the type is correct, we still need to do another comparison, if it is wrong, we already did one comparison. Comparing straight against the offset is one comparison regardless.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a fragile assumption. If you rely on the size being small, then that should be statically asserted somewhere, otherwise someone can easily edit the constant value and bump it to a much larger value than you expect to have to deal with in this code.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

32 is a somewhat arbitrary size set by me. I have not done any benchmarking and picked this value because it seems reasonable. I think it's fine if someone bumps it up (in fact my guess is the performance at 128 or 256 elements is acceptable). The important thing is that the array is bounded at some size.

The dangerous case for gc still remains on 'hot' indexes with many, many entries, and I have not addressed this danger. That case would require a different strategy altogether.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should add more comments to the constant declaration and, if possible, some static_asserts to document our expectations on the size of these arrays. Their length is declared in one place and the reliance on that length being small happens in a different place. It's very easy for someone to update the constant without being aware of the reliance on a small value.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fact that the offset array is small is rather irrelevant given that the number of arrays you can batch here is not really bounded.

Furthermore, any comparison that you avoid once when you build your array will add overhead to the find operation executed for all indexes of a different type. The penalty of including offsets of wrong type gets multiplied by the number of indexes of different type.

I.e.: Let's say you collect 2 values each for 8 different types. You avoided 16 type comparisons during this collection. But now for each index, you'll perform a linear search through a 16 element array instead of performing a search through a 2 element array (or no search at all for indexes of different types). Your performance is 8 times worse in this example!

This is why I'd rather see an implementation done with no regard to memory allocation and then look to see if it can be optimized further to remove that memory allocation. As it is now, you're probably doing more operations doing linear array searches than you'd do allocating memory for some vectors. This is premature optimization.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that the important worst-case bound here is not on the size of a single array, but on the work done by a thread in a single GC pass. So far I've avoided bounding per-thread GC work in favor of approximating fairness by GC handoff on contention. But if there's no opportunity for contention detection and backoff in this case, we may need to design fairness explicitly into the protocol, which implies batching work across GC threads. I think I need to understand the possible worst-case scenarios better here before commenting further.

I'm not terribly worried about optimizing comparisons, given that basically any computation done in an already-resident cache line is free. Indirections from a mapping structure could easily swamp the overhead of unnecessary comparisons. But of course we don't want to scan thousands or millions of entries for a single entry of a given type. We could possibly optimize this without introducing expensive mappings, e.g. by sorting the (offset, type) pairs by type and binary searching for the first occurrence of a type. Seems like maybe this would be better discussed in a meeting, though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So to clarify, I'm not worried at all about linear searches of an array that fits into 1 or 2 cache lines. Those comparisons are effectively free (once you've taken the cache miss). Performance will definitely not be "8 times worse" for a 16-element array than a 2-element array. But I do think we could safely use much larger arrays if we sorted (offset, type) pairs by type and binary-searched for the first occurrence of a type.

break;
case catalog::index_type_t::hash:
remove_entries_with_offsets<hash_index_t>(it.second.get(), collected_offsets, records.begin_ts);
break;
}
}
}
}
Expand All @@ -531,7 +541,7 @@ void mark_index_entries(base_index_t* base_index, gaia_txn_id_t txn_id)

void index_builder_t::mark_index_entries_committed(gaia_txn_id_t txn_id)
{
for (auto it : *get_indexes())
for (const auto& it : *get_indexes())
{
// Optimization: only mark index entries committed for UNIQUE indexes, as we only look up the flags on that path.
if (it.second->is_unique())
Expand Down
25 changes: 23 additions & 2 deletions production/db/inc/index/index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

#pragma once

#include <unordered_set>
#include <array>

#include "gaia_internal/db/db_types.hpp"

Expand Down Expand Up @@ -48,6 +48,27 @@ class index_writer_guard_t
T_structure& m_data;
};

constexpr size_t c_offset_buffer_size = 32;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There shouldn't be any need to hardcode the array size if you just use std::vector.


/*
* Buffer storing data for garbage collecting offsets.
*/

class index_offset_buffer_t
{
public:
void insert(gaia_offset_t offset, common::gaia_type_t type);
bool has_offset(gaia_offset_t offset) const;
bool has_type(common::gaia_type_t type) const;
bool empty() const;
size_t size() const;

private:
std::array<gaia_offset_t, c_offset_buffer_size> m_offsets = {};
std::array<common::gaia_type_t, c_offset_buffer_size> m_offset_types = {};
size_t m_size = 0;
};

/**
* Abstract in-memory index type:
* T_structure is the underlying backing data structure of the index.
Expand All @@ -71,7 +92,7 @@ class index_t : public base_index_t

// Index structure maintenance.
void insert_index_entry(index_key_t&& key, index_record_t record);
void remove_index_entry_with_offsets(const std::unordered_set<gaia_offset_t>& offsets, gaia_txn_id_t gc_txn_id);
void remove_index_entry_with_offsets(const index_offset_buffer_t& offsets, gaia_txn_id_t gc_txn_id);

// This method will mark all entries below a specified txn_id as committed.
// This must only be called after all aborted/terminated index entries below the txn_id are garbage collected.
Expand Down
5 changes: 2 additions & 3 deletions production/db/inc/index/index.inc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
/////////////////////////////////////////////

// Index structure maintenance.

template <typename T_structure, typename T_iterator>
void index_t<T_structure, T_iterator>::insert_index_entry(index_key_t&& key, index_record_t record)
{
Expand All @@ -29,7 +28,7 @@ void index_t<T_structure, T_iterator>::insert_index_entry(index_key_t&& key, ind
}

template <typename T_structure, typename T_iterator>
void index_t<T_structure, T_iterator>::remove_index_entry_with_offsets(const std::unordered_set<gaia_offset_t>& offsets, gaia_txn_id_t gc_txn_id)
void index_t<T_structure, T_iterator>::remove_index_entry_with_offsets(const index_offset_buffer_t& offsets, gaia_txn_id_t gc_txn_id)
{
std::lock_guard lock(m_index_lock);

Expand All @@ -42,7 +41,7 @@ void index_t<T_structure, T_iterator>::remove_index_entry_with_offsets(const std

for (auto it = m_data.begin(); it != m_data.end();)
{
if (offsets.find(it->second.offset) != offsets.end())
if (it->second.txn_id <= gc_txn_id && offsets.has_offset(it->second.offset))
{
it = m_data.erase(it);
}
Expand Down
3 changes: 2 additions & 1 deletion production/db/index/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ add_library(gaia_index STATIC
src/base_index.cpp
src/index.cpp
src/hash_index.cpp
src/range_index.cpp)
src/range_index.cpp
src/index_offset_buffer.cpp)
configure_gaia_target(gaia_index)
target_include_directories(gaia_index PUBLIC ${GAIA_INDEX_INCLUDES})
target_link_libraries(gaia_index PRIVATE gaia_common gaia_payload_types)
Expand Down
45 changes: 45 additions & 0 deletions production/db/index/src/index_offset_buffer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/////////////////////////////////////////////
// Copyright (c) Gaia Platform LLC
// All rights reserved.
/////////////////////////////////////////////

#include "index.hpp"

namespace gaia
{
namespace db
{
namespace index
{

bool index_offset_buffer_t::has_offset(gaia_offset_t offset) const
{
return std::find(m_offsets.cbegin(), m_offsets.cbegin() + m_size, offset)
!= m_offsets.cbegin() + m_size;
}

bool index_offset_buffer_t::has_type(common::gaia_type_t type) const
{
return std::find(m_offset_types.cbegin(), m_offset_types.cbegin() + m_size, type)
!= m_offset_types.cbegin() + m_size;
}

size_t index_offset_buffer_t::size() const
{
return m_size;
}

bool index_offset_buffer_t::empty() const
{
return m_size == 0;
}

void index_offset_buffer_t::insert(gaia_offset_t offset, common::gaia_type_t type)
{
m_offsets[m_size] = offset;
m_offset_types[m_size] = type;
++m_size;
}
} // namespace index
} // namespace db
} // namespace gaia