Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

switch to ms in histogram for date type #2045

Merged
merged 8 commits into from
May 19, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions columnar/src/columnar/column_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ impl ColumnType {
pub fn to_code(self) -> u8 {
self as u8
}
pub fn is_date_time(&self) -> bool {
self == &ColumnType::DateTime
}

pub(crate) fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
COLUMN_TYPES.get(code as usize).copied().ok_or(InvalidData)
Expand Down
2 changes: 1 addition & 1 deletion src/aggregation/agg_req.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ use super::metric::{
};

/// The top-level aggregation request structure, which contains [`Aggregation`] and their user
/// defined names. It is also used in [buckets](BucketAggregation) to define sub-aggregations.
/// defined names. It is also used in buckets aggregations to define sub-aggregations.
///
/// The key is the user defined name of the aggregation.
pub type Aggregations = HashMap<String, Aggregation>;
Expand Down
6 changes: 4 additions & 2 deletions src/aggregation/agg_req_with_accessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@ impl AggregationsWithAccessor {

pub struct AggregationWithAccessor {
/// In general there can be buckets without fast field access, e.g. buckets that are created
/// based on search terms. So eventually this needs to be Option or moved.
/// based on search terms. That is not that case currently, but eventually this needs to be
/// Option or moved.
pub(crate) accessor: Column<u64>,
pub(crate) str_dict_column: Option<StrColumn>,
pub(crate) field_type: ColumnType,
/// In case there are multiple types of fast fields, e.g. string and numeric.
/// Only used for term aggregations
/// Only used for term aggregations currently.
pub(crate) accessor2: Option<(Column<u64>, ColumnType)>,
pub(crate) sub_aggregation: AggregationsWithAccessor,
pub(crate) limits: ResourceLimitGuard,
Expand Down Expand Up @@ -105,6 +106,7 @@ impl AggregationWithAccessor {
(accessor, field_type)
}
};

let sub_aggregation = sub_aggregation.clone();
Ok(AggregationWithAccessor {
accessor,
Expand Down
121 changes: 65 additions & 56 deletions src/aggregation/bucket/histogram/date_histogram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ pub struct DateHistogramAggregationReq {
pub fixed_interval: Option<String>,
/// Intervals implicitly defines an absolute grid of buckets `[interval * k, interval * (k +
/// 1))`.
///
/// Offset makes it possible to shift this grid into
/// `[offset + interval * k, offset + interval * (k + 1))`. Offset has to be in the range [0,
/// interval).
///
/// The `offset` parameter is has the same syntax as the `fixed_interval` parameter, but
/// also allows for negative values.
pub offset: Option<String>,
/// The minimum number of documents in a bucket to be returned. Defaults to 0.
pub min_doc_count: Option<u64>,
Expand All @@ -77,7 +84,7 @@ pub struct DateHistogramAggregationReq {
/// hard_bounds only limits the buckets, to force a range set both extended_bounds and
/// hard_bounds to the same range.
///
/// Needs to be provided as timestamp in nanosecond precision.
/// Needs to be provided as timestamp in millisecond precision.
///
/// ## Example
/// ```json
Expand All @@ -88,7 +95,7 @@ pub struct DateHistogramAggregationReq {
/// "interval": "1d",
/// "hard_bounds": {
/// "min": 0,
/// "max": 1420502400000000000
/// "max": 1420502400000
/// }
/// }
/// }
Expand All @@ -114,11 +121,11 @@ impl DateHistogramAggregationReq {
self.validate()?;
Ok(HistogramAggregation {
field: self.field.to_string(),
interval: parse_into_nanoseconds(self.fixed_interval.as_ref().unwrap())? as f64,
interval: parse_into_milliseconds(self.fixed_interval.as_ref().unwrap())? as f64,
offset: self
.offset
.as_ref()
.map(|offset| parse_offset_into_nanosecs(offset))
.map(|offset| parse_offset_into_milliseconds(offset))
.transpose()?
.map(|el| el as f64),
min_doc_count: self.min_doc_count,
Expand Down Expand Up @@ -153,7 +160,7 @@ impl DateHistogramAggregationReq {
));
}

parse_into_nanoseconds(self.fixed_interval.as_ref().unwrap())?;
parse_into_milliseconds(self.fixed_interval.as_ref().unwrap())?;

Ok(())
}
Expand All @@ -179,7 +186,7 @@ pub enum DateHistogramParseError {
OutOfBounds(String),
}

fn parse_offset_into_nanosecs(input: &str) -> Result<i64, AggregationError> {
fn parse_offset_into_milliseconds(input: &str) -> Result<i64, AggregationError> {
let is_sign = |byte| &[byte] == b"-" || &[byte] == b"+";
if input.is_empty() {
return Err(DateHistogramParseError::InvalidOffset(input.to_string()).into());
Expand All @@ -188,18 +195,18 @@ fn parse_offset_into_nanosecs(input: &str) -> Result<i64, AggregationError> {
let has_sign = is_sign(input.as_bytes()[0]);
if has_sign {
let (sign, input) = input.split_at(1);
let val = parse_into_nanoseconds(input)?;
let val = parse_into_milliseconds(input)?;
if sign == "-" {
Ok(-val)
} else {
Ok(val)
}
} else {
parse_into_nanoseconds(input)
parse_into_milliseconds(input)
}
}

fn parse_into_nanoseconds(input: &str) -> Result<i64, AggregationError> {
fn parse_into_milliseconds(input: &str) -> Result<i64, AggregationError> {
let split_boundary = input
.as_bytes()
.iter()
Expand All @@ -218,17 +225,18 @@ fn parse_into_nanoseconds(input: &str) -> Result<i64, AggregationError> {
// here and being defensive does not hurt.
.map_err(|_err| DateHistogramParseError::NumberMissing(input.to_string()))?;

let multiplier_from_unit = match unit {
"ms" => 1,
"s" => 1000,
"m" => 60 * 1000,
"h" => 60 * 60 * 1000,
"d" => 24 * 60 * 60 * 1000,
let unit_in_ms = match unit {
"ms" | "milliseconds" => 1,
"s" | "seconds" => 1000,
"m" | "minutes" => 60 * 1000,
"h" | "hours" => 60 * 60 * 1000,
"d" | "days" => 24 * 60 * 60 * 1000,
_ => return Err(DateHistogramParseError::UnitNotRecognized(unit.to_string()).into()),
};

let val = (number * multiplier_from_unit)
.checked_mul(1_000_000)
let val = number * unit_in_ms;
// The field type is in nanoseconds precision, so validate the value to fit the range
val.checked_mul(1_000_000)
.ok_or_else(|| DateHistogramParseError::OutOfBounds(input.to_string()))?;

Ok(val)
Expand All @@ -246,49 +254,50 @@ mod tests {
use crate::Index;

#[test]
fn test_parse_into_nanosecs() {
assert_eq!(parse_into_nanoseconds("1m").unwrap(), 60_000_000_000);
assert_eq!(parse_into_nanoseconds("2m").unwrap(), 120_000_000_000);
fn test_parse_into_millisecs() {
assert_eq!(parse_into_milliseconds("1m").unwrap(), 60_000);
assert_eq!(parse_into_milliseconds("2m").unwrap(), 120_000);
assert_eq!(parse_into_milliseconds("2minutes").unwrap(), 120_000);
assert_eq!(
parse_into_nanoseconds("2y").unwrap_err(),
parse_into_milliseconds("2y").unwrap_err(),
DateHistogramParseError::UnitNotRecognized("y".to_string()).into()
);
assert_eq!(
parse_into_nanoseconds("2000").unwrap_err(),
parse_into_milliseconds("2000").unwrap_err(),
DateHistogramParseError::UnitMissing("2000".to_string()).into()
);
assert_eq!(
parse_into_nanoseconds("ms").unwrap_err(),
parse_into_milliseconds("ms").unwrap_err(),
DateHistogramParseError::NumberMissing("ms".to_string()).into()
);
}

#[test]
fn test_parse_offset_into_nanosecs() {
assert_eq!(parse_offset_into_nanosecs("1m").unwrap(), 60_000_000_000);
assert_eq!(parse_offset_into_nanosecs("+1m").unwrap(), 60_000_000_000);
assert_eq!(parse_offset_into_nanosecs("-1m").unwrap(), -60_000_000_000);
assert_eq!(parse_offset_into_nanosecs("2m").unwrap(), 120_000_000_000);
assert_eq!(parse_offset_into_nanosecs("+2m").unwrap(), 120_000_000_000);
assert_eq!(parse_offset_into_nanosecs("-2m").unwrap(), -120_000_000_000);
assert_eq!(parse_offset_into_nanosecs("-2ms").unwrap(), -2_000_000);
fn test_parse_offset_into_milliseconds() {
assert_eq!(parse_offset_into_milliseconds("1m").unwrap(), 60_000);
assert_eq!(parse_offset_into_milliseconds("+1m").unwrap(), 60_000);
assert_eq!(parse_offset_into_milliseconds("-1m").unwrap(), -60_000);
assert_eq!(parse_offset_into_milliseconds("2m").unwrap(), 120_000);
assert_eq!(parse_offset_into_milliseconds("+2m").unwrap(), 120_000);
assert_eq!(parse_offset_into_milliseconds("-2m").unwrap(), -120_000);
assert_eq!(parse_offset_into_milliseconds("-2ms").unwrap(), -2);
assert_eq!(
parse_offset_into_nanosecs("2y").unwrap_err(),
parse_offset_into_milliseconds("2y").unwrap_err(),
DateHistogramParseError::UnitNotRecognized("y".to_string()).into()
);
assert_eq!(
parse_offset_into_nanosecs("2000").unwrap_err(),
parse_offset_into_milliseconds("2000").unwrap_err(),
DateHistogramParseError::UnitMissing("2000".to_string()).into()
);
assert_eq!(
parse_offset_into_nanosecs("ms").unwrap_err(),
parse_offset_into_milliseconds("ms").unwrap_err(),
DateHistogramParseError::NumberMissing("ms".to_string()).into()
);
}

#[test]
fn test_parse_into_milliseconds_do_not_accept_non_ascii() {
assert!(parse_into_nanoseconds("1m").is_err());
assert!(parse_into_milliseconds("1m").is_err());
}

pub fn get_test_index_from_docs(
Expand Down Expand Up @@ -369,7 +378,7 @@ mod tests {
"buckets" : [
{
"key_as_string" : "2015-01-01T00:00:00Z",
"key" : 1420070400000000000.0,
"key" : 1420070400000.0,
"doc_count" : 4
}
]
Expand Down Expand Up @@ -407,7 +416,7 @@ mod tests {
"buckets" : [
{
"key_as_string" : "2015-01-01T00:00:00Z",
"key" : 1420070400000000000.0,
"key" : 1420070400000.0,
"doc_count" : 4,
"texts": {
"buckets": [
Expand Down Expand Up @@ -456,32 +465,32 @@ mod tests {
"buckets": [
{
"doc_count": 2,
"key": 1420070400000000000.0,
"key": 1420070400000.0,
"key_as_string": "2015-01-01T00:00:00Z"
},
{
"doc_count": 1,
"key": 1420156800000000000.0,
"key": 1420156800000.0,
"key_as_string": "2015-01-02T00:00:00Z"
},
{
"doc_count": 0,
"key": 1420243200000000000.0,
"key": 1420243200000.0,
"key_as_string": "2015-01-03T00:00:00Z"
},
{
"doc_count": 0,
"key": 1420329600000000000.0,
"key": 1420329600000.0,
"key_as_string": "2015-01-04T00:00:00Z"
},
{
"doc_count": 0,
"key": 1420416000000000000.0,
"key": 1420416000000.0,
"key_as_string": "2015-01-05T00:00:00Z"
},
{
"doc_count": 1,
"key": 1420502400000000000.0,
"key": 1420502400000.0,
"key_as_string": "2015-01-06T00:00:00Z"
}
]
Expand All @@ -499,8 +508,8 @@ mod tests {
"field": "date",
"fixed_interval": "1d",
"extended_bounds": {
"min": 1419984000000000000.0,
"max": 1420588800000000000.0
"min": 1419984000000.0,
"max": 1420588800000.0
}
}
}
Expand All @@ -517,42 +526,42 @@ mod tests {
"buckets": [
{
"doc_count": 0,
"key": 1419984000000000000.0,
"key": 1419984000000.0,
"key_as_string": "2014-12-31T00:00:00Z"
},
{
"doc_count": 2,
"key": 1420070400000000000.0,
"key": 1420070400000.0,
"key_as_string": "2015-01-01T00:00:00Z"
},
{
"doc_count": 1,
"key": 1420156800000000000.0,
"key": 1420156800000.0,
"key_as_string": "2015-01-02T00:00:00Z"
},
{
"doc_count": 0,
"key": 1420243200000000000.0,
"key": 1420243200000.0,
"key_as_string": "2015-01-03T00:00:00Z"
},
{
"doc_count": 0,
"key": 1420329600000000000.0,
"key": 1420329600000.0,
"key_as_string": "2015-01-04T00:00:00Z"
},
{
"doc_count": 0,
"key": 1420416000000000000.0,
"key": 1420416000000.0,
"key_as_string": "2015-01-05T00:00:00Z"
},
{
"doc_count": 1,
"key": 1420502400000000000.0,
"key": 1420502400000.0,
"key_as_string": "2015-01-06T00:00:00Z"
},
{
"doc_count": 0,
"key": 1.4205888e18,
"key": 1420588800000.0,
"key_as_string": "2015-01-07T00:00:00Z"
}
]
Expand All @@ -569,8 +578,8 @@ mod tests {
"field": "date",
"fixed_interval": "1d",
"hard_bounds": {
"min": 1420156800000000000.0,
"max": 1420243200000000000.0
"min": 1420156800000.0,
"max": 1420243200000.0
}
}
}
Expand All @@ -587,7 +596,7 @@ mod tests {
"buckets": [
{
"doc_count": 1,
"key": 1420156800000000000.0,
"key": 1420156800000.0,
"key_as_string": "2015-01-02T00:00:00Z"
}
]
Expand Down
Loading