Skip to content

Commit

Permalink
Better mixed types support in aggs and fix serialization issue (#1971)
Browse files Browse the repository at this point in the history
* Better mixed types support in aggs and fix serialization issue

- Improve support for mixed types in JSON field aggregations (pick the right field, #1913)
- Resolve the issue with JSON serialization for numeric keys (fixes #1967)
- Add JSON round-trip test for term buckets
- Remove `u64_lenient`, as this is a footgun without the type
- move aggregation benchmarks

* remove shadowing
  • Loading branch information
PSeitz authored Mar 31, 2023
1 parent 571735c commit 5c380b7
Show file tree
Hide file tree
Showing 9 changed files with 811 additions and 628 deletions.
611 changes: 611 additions & 0 deletions src/aggregation/agg_bench.rs

Large diffs are not rendered by default.

38 changes: 32 additions & 6 deletions src/aggregation/agg_req_with_accessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ pub struct BucketAggregationWithAccessor {
pub(crate) column_block_accessor: ColumnBlockAccessor<u64>,
}

fn get_numeric_or_date_column_types() -> &'static [ColumnType] {
&[
ColumnType::F64,
ColumnType::U64,
ColumnType::I64,
ColumnType::DateTime,
]
}

impl BucketAggregationWithAccessor {
fn try_from_bucket(
bucket: &BucketAggregationType,
Expand All @@ -57,19 +66,31 @@ impl BucketAggregationWithAccessor {
let (accessor, field_type) = match &bucket {
BucketAggregationType::Range(RangeAggregation {
field: field_name, ..
}) => get_ff_reader_and_validate(reader, field_name)?,
}) => get_ff_reader_and_validate(
reader,
field_name,
Some(get_numeric_or_date_column_types()),
)?,
BucketAggregationType::Histogram(HistogramAggregation {
field: field_name, ..
}) => get_ff_reader_and_validate(reader, field_name)?,
}) => get_ff_reader_and_validate(
reader,
field_name,
Some(get_numeric_or_date_column_types()),
)?,
BucketAggregationType::DateHistogram(DateHistogramAggregationReq {
field: field_name,
..
}) => get_ff_reader_and_validate(reader, field_name)?,
}) => get_ff_reader_and_validate(
reader,
field_name,
Some(get_numeric_or_date_column_types()),
)?,
BucketAggregationType::Terms(TermsAggregation {
field: field_name, ..
}) => {
str_dict_column = reader.fast_fields().str(field_name)?;
get_ff_reader_and_validate(reader, field_name)?
get_ff_reader_and_validate(reader, field_name, None)?
}
};
let sub_aggregation = sub_aggregation.clone();
Expand Down Expand Up @@ -110,7 +131,11 @@ impl MetricAggregationWithAccessor {
| MetricAggregation::Min(MinAggregation { field: field_name })
| MetricAggregation::Stats(StatsAggregation { field: field_name })
| MetricAggregation::Sum(SumAggregation { field: field_name }) => {
let (accessor, field_type) = get_ff_reader_and_validate(reader, field_name)?;
let (accessor, field_type) = get_ff_reader_and_validate(
reader,
field_name,
Some(get_numeric_or_date_column_types()),
)?;

Ok(MetricAggregationWithAccessor {
accessor,
Expand Down Expand Up @@ -157,10 +182,11 @@ pub(crate) fn get_aggs_with_accessor_and_validate(
fn get_ff_reader_and_validate(
reader: &SegmentReader,
field_name: &str,
allowed_column_types: Option<&[ColumnType]>,
) -> crate::Result<(columnar::Column<u64>, ColumnType)> {
let ff_fields = reader.fast_fields();
let ff_field_with_type = ff_fields
.u64_lenient_with_type(field_name)?
.u64_lenient_for_type(allowed_column_types, field_name)?
.unwrap_or_else(|| {
(
Column::build_empty_column(reader.num_docs()),
Expand Down
Loading

0 comments on commit 5c380b7

Please sign in to comment.