Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IVF-Flat index splitting #1271

Merged
merged 33 commits into from
Mar 15, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
328a179
Initial index splitting
lowener Feb 7, 2023
a83aca4
Adapt `extend`
lowener Feb 7, 2023
843904a
Refactoring: build and extend fix
lowener Feb 9, 2023
f09b3a0
Refactor ivf flat search for index splitting
lowener Feb 10, 2023
fdc9395
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Feb 10, 2023
93d5b35
Use mdpsan/mdarray aliases
lowener Feb 12, 2023
5fcf564
Add serialization
lowener Feb 13, 2023
c49bf67
Deserialize ivf_flat and style fix
lowener Feb 16, 2023
6ba87d4
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Feb 19, 2023
7e2d80b
Integrate ivf::list to ivf_flat index splitting
lowener Feb 21, 2023
74e0a8c
Update refine
lowener Feb 21, 2023
7b36742
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Feb 22, 2023
135a9b6
Use std vector for ivf flat index list
lowener Feb 24, 2023
81b2cbf
Test second variant of `ivf_flat::extend`
lowener Feb 24, 2023
fd33dbc
Use ValueT template on spec
lowener Feb 25, 2023
539fbc5
Use second variant of ivf_extend
lowener Feb 26, 2023
31815d7
Merge branch 'branch-23.04' into 23.04-flat-split
cjnolet Feb 27, 2023
33bfb82
Fix spec template
lowener Feb 28, 2023
adb96e4
Revert ValueT on ivfpq
lowener Mar 6, 2023
32936c9
Make ivf::list more flexible
achirkin Mar 7, 2023
23e0f84
Merge pull request #1 from achirkin/23.04-flat-split
lowener Mar 7, 2023
acf1888
Use new list API for ivf flat
lowener Mar 7, 2023
0283d25
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Mar 7, 2023
f790abf
Fix adaptive centers
lowener Mar 9, 2023
6a05196
Fix adaptive center norms testing
lowener Mar 9, 2023
eda7923
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Mar 9, 2023
f0a7031
Fix list spec template after merge
lowener Mar 9, 2023
a6c54a4
Add serialization overload
lowener Mar 9, 2023
85b4aa1
Fix list size override
lowener Mar 10, 2023
692af0d
Fix resizelist with interleaved format
lowener Mar 10, 2023
d892ebb
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Mar 10, 2023
a8b96a7
Fix refine list resize operation
lowener Mar 12, 2023
1870541
Merge branch 'branch-23.04' into 23.04-flat-split
lowener Mar 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions cpp/include/raft/neighbors/ivf_flat.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ void extend(raft::device_resources const& handle,
const IdxT* new_indices,
IdxT n_rows)
{
*index = extend(handle, *index, new_vectors, new_indices, n_rows);
raft::spatial::knn::ivf_flat::detail::extend(handle, index, new_vectors, new_indices, n_rows);
}

/**
Expand Down Expand Up @@ -293,11 +293,11 @@ void extend(raft::device_resources const& handle,
raft::device_matrix_view<const value_t, idx_t, row_major> new_vectors,
std::optional<raft::device_vector_view<const idx_t, idx_t>> new_indices = std::nullopt)
{
*index = extend(handle,
*index,
new_vectors.data_handle(),
new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
static_cast<idx_t>(new_vectors.extent(0)));
extend(handle,
index,
new_vectors.data_handle(),
new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
static_cast<idx_t>(new_vectors.extent(0)));
}

/** @} */
Expand Down
209 changes: 130 additions & 79 deletions cpp/include/raft/neighbors/ivf_flat_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@
#include "ann_types.hpp"

#include <raft/core/device_mdarray.hpp>
#include <raft/core/host_mdarray.hpp>
#include <raft/core/mdspan_types.hpp>
#include <raft/core/error.hpp>
#include <raft/distance/distance_types.hpp>
#include <raft/matrix/init.cuh>
#include <raft/util/integer_utils.hpp>

#include <memory>
#include <optional>
#include <type_traits>

Expand All @@ -35,6 +39,14 @@ namespace raft::neighbors::ivf_flat {
/** Size of the interleaved group (see `index::data` description). */
constexpr static uint32_t kIndexGroupSize = 32;

/**
* Default value filled in the `indices()` array.
* One may encounter it trying to access a record within a cluster that is outside of the
* `list_sizes()` bound (due to the record alignment `kIndexGroupSize`).
*/
template <typename IdxT>
constexpr static IdxT kInvalidRecord = std::numeric_limits<IdxT>::max() - 1;

struct index_params : ann::index_params {
/** The number of inverted lists (clusters) */
uint32_t n_lists = 1024;
Expand Down Expand Up @@ -65,6 +77,37 @@ struct search_params : ann::search_params {
static_assert(std::is_aggregate_v<index_params>);
static_assert(std::is_aggregate_v<search_params>);

/** The data for a single list (cluster). */
template <typename T, typename IdxT, typename SizeT = uint32_t>
struct list_data {
/** Cluster data. */
device_matrix<T, SizeT, row_major> data;
/** Source indices. */
device_vector<IdxT, SizeT> indices;
/** The actual size of the content. */
std::atomic<SizeT> size;

list_data(raft::device_resources const& res, SizeT n_rows, uint32_t dim)
: size{n_rows}
{
auto capacity = round_up_safe<SizeT>(bound_by_power_of_two<SizeT>(size), kIndexGroupSize);
try {
data = make_device_matrix<T, SizeT, row_major>(res, capacity, dim);
indices = make_device_vector<IdxT, SizeT>(res, capacity);
} catch (std::bad_alloc& e) {
RAFT_FAIL(
"ivf-flat: failed to allocate a big enough index list to hold all data "
"(requested size: %zu records, selected capacity: %zu records). "
"Allocator exception: %s",
size_t(size),
size_t(capacity),
e.what());
}
// Fill the index buffer with a pre-defined marker for easier debugging
matrix::fill(res, indices.view(), ivf_flat::kInvalidRecord<IdxT>);
lowener marked this conversation as resolved.
Show resolved Hide resolved
}
};

/**
* @brief IVF-flat index.
*
Expand Down Expand Up @@ -118,59 +161,24 @@ struct index : ann::index {
* x[16, 4], x[16, 5], x[17, 4], x[17, 5], ... x[30, 4], x[30, 5], - , - ,
*
*/
inline auto data() noexcept -> device_mdspan<T, extent_2d<IdxT>, row_major>
{
return data_.view();
}
[[nodiscard]] inline auto data() const noexcept
-> device_mdspan<const T, extent_2d<size_t>, row_major>
{
return data_.view();
}

/** Inverted list indices: ids of items in the source data [size] */
inline auto indices() noexcept -> device_mdspan<IdxT, extent_1d<IdxT>, row_major>
{
return indices_.view();
}
[[nodiscard]] inline auto indices() const noexcept
-> device_mdspan<const IdxT, extent_1d<IdxT>, row_major>
{
return indices_.view();
}

/** Sizes of the lists (clusters) [n_lists] */
inline auto list_sizes() noexcept -> device_mdspan<uint32_t, extent_1d<uint32_t>, row_major>
inline auto list_sizes() noexcept -> device_vector_view<uint32_t, uint32_t>
{
return list_sizes_.view();
}
[[nodiscard]] inline auto list_sizes() const noexcept
-> device_mdspan<const uint32_t, extent_1d<uint32_t>, row_major>
-> device_vector_view<const uint32_t, uint32_t>
{
return list_sizes_.view();
}

/**
* Offsets into the lists [n_lists + 1].
* The last value contains the total length of the index.
*/
inline auto list_offsets() noexcept -> device_mdspan<IdxT, extent_1d<uint32_t>, row_major>
{
return list_offsets_.view();
}
[[nodiscard]] inline auto list_offsets() const noexcept
-> device_mdspan<const IdxT, extent_1d<uint32_t>, row_major>
{
return list_offsets_.view();
}

/** k-means cluster centers corresponding to the lists [n_lists, dim] */
inline auto centers() noexcept -> device_mdspan<float, extent_2d<uint32_t>, row_major>
inline auto centers() noexcept -> device_matrix_view<float, uint32_t, row_major>
{
return centers_.view();
}
[[nodiscard]] inline auto centers() const noexcept
-> device_mdspan<const float, extent_2d<uint32_t>, row_major>
-> device_matrix_view<const float, uint32_t, row_major>
{
return centers_.view();
}
Expand All @@ -182,28 +190,31 @@ struct index : ann::index {
* calculation.
*/
inline auto center_norms() noexcept
-> std::optional<device_mdspan<float, extent_1d<uint32_t>, row_major>>
-> std::optional<device_vector_view<float, uint32_t>>
{
if (center_norms_.has_value()) {
return std::make_optional<device_mdspan<float, extent_1d<uint32_t>, row_major>>(
return std::make_optional<device_vector_view<float, uint32_t>>(
center_norms_->view());
} else {
return std::nullopt;
}
}
[[nodiscard]] inline auto center_norms() const noexcept
-> std::optional<device_mdspan<const float, extent_1d<uint32_t>, row_major>>
-> std::optional<device_vector_view<const float, uint32_t>>
{
if (center_norms_.has_value()) {
return std::make_optional<device_mdspan<const float, extent_1d<uint32_t>, row_major>>(
return std::make_optional<device_vector_view<const float, uint32_t>>(
center_norms_->view());
} else {
return std::nullopt;
}
}

/** Total length of the index. */
[[nodiscard]] constexpr inline auto size() const noexcept -> IdxT { return indices_.extent(0); }
[[nodiscard]] constexpr inline auto size() const noexcept -> IdxT
{
return total_size_;
}
/** Dimensionality of the data. */
[[nodiscard]] constexpr inline auto dim() const noexcept -> uint32_t
{
Expand All @@ -212,7 +223,7 @@ struct index : ann::index {
/** Number of clusters/inverted lists. */
[[nodiscard]] constexpr inline auto n_lists() const noexcept -> uint32_t
{
return centers_.extent(0);
return lists_.extent(0);
}

// Don't allow copying the index for performance reasons (try avoiding copying data)
Expand All @@ -223,7 +234,7 @@ struct index : ann::index {
~index() = default;

/** Construct an empty index. It needs to be trained and then populated. */
index(raft::device_resources const& handle,
index(raft::device_resources const& res,
raft::distance::DistanceType metric,
uint32_t n_lists,
bool adaptive_centers,
Expand All @@ -232,42 +243,80 @@ struct index : ann::index {
veclen_(calculate_veclen(dim)),
metric_(metric),
adaptive_centers_(adaptive_centers),
data_(make_device_mdarray<T>(handle, make_extents<IdxT>(0, dim))),
indices_(make_device_mdarray<IdxT>(handle, make_extents<IdxT>(0))),
list_sizes_(make_device_mdarray<uint32_t>(handle, make_extents<uint32_t>(n_lists))),
list_offsets_(make_device_mdarray<IdxT>(handle, make_extents<uint32_t>(n_lists + 1))),
centers_(make_device_mdarray<float>(handle, make_extents<uint32_t>(n_lists, dim))),
center_norms_(std::nullopt)
centers_(make_device_matrix<float, uint32_t>(res, n_lists, dim)),
center_norms_(std::nullopt),
lists_{make_host_vector<std::shared_ptr<list_data<T, IdxT>>, uint32_t>(n_lists)},
list_sizes_{make_device_vector<uint32_t, uint32_t>(res, n_lists)},
tfeher marked this conversation as resolved.
Show resolved Hide resolved
data_ptrs_{make_device_vector<T*, uint32_t>(res, n_lists)},
inds_ptrs_{make_device_vector<IdxT*, uint32_t>(res, n_lists)}
{
check_consistency();
for (uint32_t i = 0; i < n_lists; i++) {
lists_(i) = std::shared_ptr<list_data<T, IdxT>>();
}
}

/** Construct an empty index. It needs to be trained and then populated. */
index(raft::device_resources const& handle, const index_params& params, uint32_t dim)
: index(handle, params.metric, params.n_lists, params.adaptive_centers, dim)
index(raft::device_resources const& res, const index_params& params, uint32_t dim)
: index(res, params.metric, params.n_lists, params.adaptive_centers, dim)
{
}

/** Pointers to the inverted lists (clusters) data [n_lists]. */
inline auto data_ptrs() noexcept -> device_vector_view<T*, uint32_t>
{
return data_ptrs_.view();
}
[[nodiscard]] inline auto data_ptrs() const noexcept
-> device_vector_view<T* const, uint32_t>
{
return data_ptrs_.view();
}

/** Pointers to the inverted lists (clusters) indices [n_lists]. */
inline auto inds_ptrs() noexcept -> device_vector_view<IdxT*, uint32_t>
{
return inds_ptrs_.view();
}
[[nodiscard]] inline auto inds_ptrs() const noexcept
-> device_vector_view<IdxT* const, uint32_t>
{
return inds_ptrs_.view();
}

/**
* Replace the content of the index with new uninitialized mdarrays to hold the indicated amount
* of data.
* Update the state of the dependent index members.
*/
void allocate(raft::device_resources const& handle, IdxT index_size)
void recompute_internal_state(raft::device_resources const& res)
{
data_ = make_device_mdarray<T>(handle, make_extents<IdxT>(index_size, dim()));
indices_ = make_device_mdarray<IdxT>(handle, make_extents<IdxT>(index_size));
auto stream = res.get_stream();

switch (metric_) {
case raft::distance::DistanceType::L2Expanded:
case raft::distance::DistanceType::L2SqrtExpanded:
case raft::distance::DistanceType::L2Unexpanded:
case raft::distance::DistanceType::L2SqrtUnexpanded:
center_norms_ = make_device_mdarray<float>(handle, make_extents<uint32_t>(n_lists()));
break;
default: center_norms_ = std::nullopt;
// Actualize the list pointers
auto this_lists = lists();
auto this_data_ptrs = data_ptrs();
auto this_inds_ptrs = inds_ptrs();
IdxT recompute_total_size = 0;
for (uint32_t label = 0; label < this_lists.size(); label++) {
const auto data_ptr = this_lists(label) ? this_lists(label)->data.data_handle() : nullptr;
const auto inds_ptr = this_lists(label) ? this_lists(label)->indices.data_handle() : nullptr;
const auto list_size = this_lists(label) ? IdxT(this_lists(label)->size) : 0;
copy(&this_data_ptrs(label), &data_ptr, 1, stream);
copy(&this_inds_ptrs(label), &inds_ptr, 1, stream);
recompute_total_size += list_size;
}
total_size_ = recompute_total_size;
}

check_consistency();
/** Lists' data and indices. */
inline auto lists() noexcept
-> host_vector_view<std::shared_ptr<list_data<T, IdxT>>, uint32_t>
{
return lists_.view();
}
[[nodiscard]] inline auto lists() const noexcept
-> host_vector_view<const std::shared_ptr<list_data<T, IdxT>>, uint32_t>
{
return lists_.view();
}

private:
Expand All @@ -278,26 +327,28 @@ struct index : ann::index {
uint32_t veclen_;
raft::distance::DistanceType metric_;
bool adaptive_centers_;
device_mdarray<T, extent_2d<IdxT>, row_major> data_;
device_mdarray<IdxT, extent_1d<IdxT>, row_major> indices_;
device_mdarray<uint32_t, extent_1d<uint32_t>, row_major> list_sizes_;
device_mdarray<IdxT, extent_1d<uint32_t>, row_major> list_offsets_;
device_mdarray<float, extent_2d<uint32_t>, row_major> centers_;
std::optional<device_mdarray<float, extent_1d<uint32_t>, row_major>> center_norms_;
host_vector<std::shared_ptr<list_data<T, IdxT>>, uint32_t> lists_;
lowener marked this conversation as resolved.
Show resolved Hide resolved
device_vector<uint32_t, uint32_t> list_sizes_;
device_matrix<float, uint32_t, row_major> centers_;
std::optional<device_vector<float, uint32_t>> center_norms_;

// Computed members
device_vector<T*, uint32_t> data_ptrs_;
device_vector<IdxT*, uint32_t> inds_ptrs_;
IdxT total_size_;

/** Throw an error if the index content is inconsistent. */
void check_consistency()
{
auto n_lists = lists_.extent(0);
RAFT_EXPECTS(dim() % veclen_ == 0, "dimensionality is not a multiple of the veclen");
RAFT_EXPECTS(data_.extent(0) == indices_.extent(0), "inconsistent index size");
RAFT_EXPECTS(data_.extent(1) == IdxT(centers_.extent(1)), "inconsistent data dimensionality");
RAFT_EXPECTS(list_sizes_.extent(0) == n_lists, "inconsistent list size");
RAFT_EXPECTS(data_ptrs_.extent(0) == n_lists, "inconsistent list size");
RAFT_EXPECTS(inds_ptrs_.extent(0) == n_lists, "inconsistent list size");
RAFT_EXPECTS( //
(centers_.extent(0) == list_sizes_.extent(0)) && //
(centers_.extent(0) + 1 == list_offsets_.extent(0)) && //
(!center_norms_.has_value() || centers_.extent(0) == center_norms_->extent(0)),
"inconsistent number of lists (clusters)");
RAFT_EXPECTS(reinterpret_cast<size_t>(data_.data_handle()) % (veclen_ * sizeof(T)) == 0,
"The data storage pointer is not aligned to the vector length");
}

static auto calculate_veclen(uint32_t dim) -> uint32_t
Expand Down
Loading