Skip to content

Commit

Permalink
DONOTMERGE: Hack placing a optional index in front of multivalued ind…
Browse files Browse the repository at this point in the history
…exes.

The point is to avoid the large overhead added to computing the start
offset index when a column is for the most part sparse.
  • Loading branch information
fulmicoton committed Jun 11, 2024
1 parent 08b9fc0 commit 745c96e
Show file tree
Hide file tree
Showing 11 changed files with 529 additions and 340 deletions.
113 changes: 57 additions & 56 deletions columnar/src/column_index/merge/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,61 +150,62 @@ mod tests {
);
}

#[test]
fn test_merge_index_multivalued_sorted() {
let column_indexes: Vec<ColumnIndex> = vec![MultiValueIndex::for_test(&[0, 2, 5]).into()];
let merge_row_order: MergeRowOrder = ShuffleMergeOrder::for_test(
&[2],
vec![
RowAddr {
segment_ord: 0u32,
row_id: 1u32,
},
RowAddr {
segment_ord: 0u32,
row_id: 0u32,
},
],
)
.into();
let merged_column_index = merge_column_index(&column_indexes[..], &merge_row_order);
let SerializableColumnIndex::Multivalued(start_index_iterable) = merged_column_index else {
panic!("Excpected a multivalued index")
};
let start_indexes: Vec<RowId> = start_index_iterable.boxed_iter().collect();
assert_eq!(&start_indexes, &[0, 3, 5]);
}
// #[test]
// fn test_merge_index_multivalued_sorted() {
// let column_indexes: Vec<ColumnIndex> = vec![MultiValueIndex::for_test(&[0, 2, 5]).into()];
// let merge_row_order: MergeRowOrder = ShuffleMergeOrder::for_test(
// &[2],
// vec![
// RowAddr {
// segment_ord: 0u32,
// row_id: 1u32,
// },
// RowAddr {
// segment_ord: 0u32,
// row_id: 0u32,
// },
// ],
// )
// .into();
// let merged_column_index = merge_column_index(&column_indexes[..], &merge_row_order);
// let SerializableColumnIndex::Multivalued(serializable_multivalue_index) = merged_column_index else {
// panic!("Excpected a multivalued index")
// };
// serializable_multivalue_index.doc_ids_with_values_opt.
// let start_indexes: Vec<RowId> = start_index_iterable.boxed_iter().collect();
// assert_eq!(&start_indexes, &[0, 3, 5]);
// }

#[test]
fn test_merge_index_multivalued_sorted_several_segment() {
let column_indexes: Vec<ColumnIndex> = vec![
MultiValueIndex::for_test(&[0, 2, 5]).into(),
ColumnIndex::Empty { num_docs: 0 },
MultiValueIndex::for_test(&[0, 1, 4]).into(),
];
let merge_row_order: MergeRowOrder = ShuffleMergeOrder::for_test(
&[2, 0, 2],
vec![
RowAddr {
segment_ord: 2u32,
row_id: 1u32,
},
RowAddr {
segment_ord: 0u32,
row_id: 0u32,
},
RowAddr {
segment_ord: 2u32,
row_id: 0u32,
},
],
)
.into();
let merged_column_index = merge_column_index(&column_indexes[..], &merge_row_order);
let SerializableColumnIndex::Multivalued(start_index_iterable) = merged_column_index else {
panic!("Excpected a multivalued index")
};
let start_indexes: Vec<RowId> = start_index_iterable.boxed_iter().collect();
assert_eq!(&start_indexes, &[0, 3, 5, 6]);
}
// #[test]
// fn test_merge_index_multivalued_sorted_several_segment() {
// let column_indexes: Vec<ColumnIndex> = vec![
// MultiValueIndex::for_test(&[0, 2, 5]).into(),
// ColumnIndex::Empty { num_docs: 0 },
// MultiValueIndex::for_test(&[0, 1, 4]).into(),
// ];
// let merge_row_order: MergeRowOrder = ShuffleMergeOrder::for_test(
// &[2, 0, 2],
// vec![
// RowAddr {
// segment_ord: 2u32,
// row_id: 1u32,
// },
// RowAddr {
// segment_ord: 0u32,
// row_id: 0u32,
// },
// RowAddr {
// segment_ord: 2u32,
// row_id: 0u32,
// },
// ],
// )
// .into();
// let merged_column_index = merge_column_index(&column_indexes[..], &merge_row_order);
// let SerializableColumnIndex::Multivalued(serializable_multivalue_index) = merged_column_index else {
// panic!("Excpected a multivalued index")
// };
// let start_indexes: Vec<RowId> = start_index_iterable.boxed_iter().collect();
// assert_eq!(&start_indexes, &[0, 3, 5, 6]);
// }
}
95 changes: 48 additions & 47 deletions columnar/src/column_index/merge/shuffled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,23 @@ pub fn merge_column_index_shuffled<'a>(
cardinality_after_merge: Cardinality,
shuffle_merge_order: &'a ShuffleMergeOrder,
) -> SerializableColumnIndex<'a> {
match cardinality_after_merge {
Cardinality::Full => SerializableColumnIndex::Full,
Cardinality::Optional => {
let non_null_row_ids =
merge_column_index_shuffled_optional(column_indexes, shuffle_merge_order);
SerializableColumnIndex::Optional {
non_null_row_ids,
num_rows: shuffle_merge_order.num_rows(),
}
}
Cardinality::Multivalued => {
let multivalue_start_index =
merge_column_index_shuffled_multivalued(column_indexes, shuffle_merge_order);
SerializableColumnIndex::Multivalued(multivalue_start_index)
}
}
todo!();
// match cardinality_after_merge {
// Cardinality::Full => SerializableColumnIndex::Full,
// Cardinality::Optional => {
// let non_null_row_ids =
// merge_column_index_shuffled_optional(column_indexes, shuffle_merge_order);
// SerializableColumnIndex::Optional {
// non_null_row_ids,
// num_rows: shuffle_merge_order.num_rows(),
// }
// }
// Cardinality::Multivalued => {
// let multivalue_start_index =
// merge_column_index_shuffled_multivalued(column_indexes, shuffle_merge_order);
// SerializableColumnIndex::Multivalued(multivalue_start_index)
// }
// }
}

/// Merge several column indexes into one, ordering rows according to the merge_order passed as
Expand Down Expand Up @@ -137,35 +138,35 @@ mod tests {
assert!(integrate_num_vals([3, 0, 10, 20].into_iter()).eq([0, 3, 3, 13, 33].into_iter()));
}

#[test]
fn test_merge_column_index_optional_shuffle() {
let optional_index: ColumnIndex = OptionalIndex::for_test(2, &[0]).into();
let column_indexes = [optional_index, ColumnIndex::Full];
let row_addrs = vec![
RowAddr {
segment_ord: 0u32,
row_id: 1u32,
},
RowAddr {
segment_ord: 1u32,
row_id: 0u32,
},
];
let shuffle_merge_order = ShuffleMergeOrder::for_test(&[2, 1], row_addrs);
let serializable_index = merge_column_index_shuffled(
&column_indexes[..],
Cardinality::Optional,
&shuffle_merge_order,
);
let SerializableColumnIndex::Optional {
non_null_row_ids,
num_rows,
} = serializable_index
else {
panic!()
};
assert_eq!(num_rows, 2);
let non_null_rows: Vec<RowId> = non_null_row_ids.boxed_iter().collect();
assert_eq!(&non_null_rows, &[1]);
}
// #[test]
// fn test_merge_column_index_optional_shuffle() {
// let optional_index: ColumnIndex = OptionalIndex::for_test(2, &[0]).into();
// let column_indexes = [optional_index, ColumnIndex::Full];
// let row_addrs = vec![
// RowAddr {
// segment_ord: 0u32,
// row_id: 1u32,
// },
// RowAddr {
// segment_ord: 1u32,
// row_id: 0u32,
// },
// ];
// let shuffle_merge_order = ShuffleMergeOrder::for_test(&[2, 1], row_addrs);
// let serializable_index = merge_column_index_shuffled(
// &column_indexes[..],
// Cardinality::Optional,
// &shuffle_merge_order,
// );
// let SerializableColumnIndex::Optional {
// non_null_row_ids,
// num_rows,
// } = serializable_index
// else {
// panic!()
// };
// assert_eq!(num_rows, 2);
// let non_null_rows: Vec<RowId> = non_null_row_ids.boxed_iter().collect();
// assert_eq!(&non_null_rows, &[1]);
// }
}
Loading

0 comments on commit 745c96e

Please sign in to comment.