From 022d245ea24bcc2eff50a1d7acf76aa6a56deaab Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 25 Apr 2024 23:00:21 +0800 Subject: [PATCH 1/2] use bingang for agg benchmark use bingang for agg benchmark, which includes memory consumption Output: ``` full histogram Memory: 15.8 KB Avg: 10.9322ms (+5.44%) Median: 10.8790ms (+9.28%) Min: 10.7470ms Max: 11.3263ms histogram_hard_bounds Memory: 15.5 KB Avg: 5.1939ms (+6.61%) Median: 5.1722ms (+10.98%) Min: 5.0432ms Max: 5.3910ms histogram_with_avg_sub_agg Memory: 48.7 KB Avg: 23.8165ms (+4.57%) Median: 23.7264ms (+10.06%) Min: 23.4995ms Max: 24.8107ms dense histogram Memory: 17.3 KB Avg: 15.6810ms (-8.54%) Median: 15.6174ms (-8.89%) Min: 15.4953ms Max: 16.0702ms histogram_hard_bounds Memory: 15.4 KB Avg: 10.0720ms (-7.33%) Median: 10.0572ms (-7.06%) Min: 9.8500ms Max: 10.4819ms histogram_with_avg_sub_agg Memory: 50.1 KB Avg: 33.0993ms (-7.04%) Median: 32.9499ms (-6.86%) Min: 32.8284ms Max: 34.0529ms sparse histogram Memory: 16.3 KB Avg: 19.2325ms (-0.44%) Median: 19.1211ms (-1.26%) Min: 19.0348ms Max: 19.7902ms histogram_hard_bounds Memory: 16.1 KB Avg: 18.5179ms (-0.61%) Median: 18.4552ms (-0.90%) Min: 18.3799ms Max: 19.0535ms histogram_with_avg_sub_agg Memory: 34.7 KB Avg: 21.2589ms (-0.69%) Median: 21.1867ms (-1.05%) Min: 21.0342ms Max: 21.9900ms ``` --- Cargo.toml | 6 + benches/agg_bench.rs | 368 ++++++++++++++++++++++ src/aggregation/agg_bench.rs | 585 ----------------------------------- src/aggregation/mod.rs | 2 - 4 files changed, 374 insertions(+), 587 deletions(-) create mode 100644 benches/agg_bench.rs delete mode 100644 src/aggregation/agg_bench.rs diff --git a/Cargo.toml b/Cargo.toml index 3580168e1b..38340740cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,6 +67,7 @@ fnv = "1.0.7" winapi = "0.3.9" [dev-dependencies] +binggan = "0.3.0" rand = "0.8.5" maplit = "1.0.2" matches = "0.1.9" @@ -143,3 +144,8 @@ harness = false [[bench]] name = "index-bench" harness = false + +[[bench]] +name = "agg_bench" +harness = false + diff --git a/benches/agg_bench.rs b/benches/agg_bench.rs new file mode 100644 index 0000000000..51e698c134 --- /dev/null +++ b/benches/agg_bench.rs @@ -0,0 +1,368 @@ +use binggan::{black_box, BenchGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM}; +use rand::prelude::SliceRandom; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; +use rand_distr::Distribution; +use serde_json::json; +use tantivy::aggregation::agg_req::Aggregations; +use tantivy::aggregation::AggregationCollector; +use tantivy::query::{AllQuery, TermQuery}; +use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING}; +use tantivy::{doc, Index, Term}; + +#[global_allocator] +pub static GLOBAL: &PeakMemAlloc = &INSTRUMENTED_SYSTEM; + +/// Mini macro to register a function via its name +/// runner.register("average_u64", move |index| average_u64(index)); +macro_rules! register { + ($runner:expr, $func:ident) => { + $runner.register(stringify!($func), move |index| $func(index)) + }; +} + +fn main() { + let inputs = vec![ + ("full", get_test_index_bench(Cardinality::Full).unwrap()), + ( + "dense", + get_test_index_bench(Cardinality::OptionalDense).unwrap(), + ), + ( + "sparse", + get_test_index_bench(Cardinality::OptionalSparse).unwrap(), + ), + ( + "multivalue", + get_test_index_bench(Cardinality::Multivalued).unwrap(), + ), + ]; + + bench_agg(BenchGroup::new_with_inputs(inputs)); +} + +fn bench_agg(mut group: BenchGroup) { + group.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting. + register!(group, average_u64); + register!(group, average_f64); + register!(group, average_f64_u64); + register!(group, stats_f64); + register!(group, percentiles_f64); + register!(group, terms_few); + register!(group, terms_many); + register!(group, terms_many_order_by_term); + register!(group, terms_many_with_top_hits); + register!(group, terms_many_with_avg_sub_agg); + register!(group, terms_many_json_mixed_type_with_sub_agg_card); + register!(group, range_agg); + register!(group, range_agg_with_avg_sub_agg); + register!(group, histogram); + register!(group, histogram_hard_bounds); + register!(group, histogram_with_avg_sub_agg); + register!(group, avg_and_range_with_avg_sub_agg); + + group.run(); +} + +fn exec_term_with_agg(index: &Index, agg_req: serde_json::Value) { + let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap(); + + let reader = index.reader().unwrap(); + let text_field = reader.searcher().schema().get_field("text").unwrap(); + let term_query = TermQuery::new( + Term::from_field_text(text_field, "cool"), + IndexRecordOption::Basic, + ); + let collector = get_collector(agg_req); + let searcher = reader.searcher(); + black_box(searcher.search(&term_query, &collector).unwrap()); +} + +fn average_u64(index: &Index) { + let agg_req = json!({ + "average": { "avg": { "field": "score", } } + }); + exec_term_with_agg(index, agg_req) +} +fn average_f64(index: &Index) { + let agg_req = json!({ + "average": { "avg": { "field": "score_f64", } } + }); + exec_term_with_agg(index, agg_req) +} +fn average_f64_u64(index: &Index) { + let agg_req = json!({ + "average_f64": { "avg": { "field": "score_f64" } }, + "average": { "avg": { "field": "score" } }, + }); + exec_term_with_agg(index, agg_req) +} +fn stats_f64(index: &Index) { + let agg_req = json!({ + "average_f64": { "stats": { "field": "score_f64", } } + }); + exec_term_with_agg(index, agg_req) +} + +fn percentiles_f64(index: &Index) { + let agg_req = json!({ + "mypercentiles": { + "percentiles": { + "field": "score_f64", + "percents": [ 95, 99, 99.9 ] + } + } + }); + execute_agg(index, agg_req); +} +fn terms_few(index: &Index) { + let agg_req = json!({ + "my_texts": { "terms": { "field": "text_few_terms" } }, + }); + execute_agg(index, agg_req); +} +fn terms_many(index: &Index) { + let agg_req = json!({ + "my_texts": { "terms": { "field": "text_many_terms" } }, + }); + execute_agg(index, agg_req); +} +fn terms_many_order_by_term(index: &Index) { + let agg_req = json!({ + "my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } }, + }); + execute_agg(index, agg_req); +} +fn terms_many_with_top_hits(index: &Index) { + let agg_req = json!({ + "my_texts": { + "terms": { "field": "text_many_terms" }, + "aggs": { + "top_hits": { "top_hits": + { + "sort": [ + { "score": "desc" } + ], + "size": 2, + "doc_value_fields": ["score_f64"] + } + } + } + }, + }); + execute_agg(index, agg_req); +} +fn terms_many_with_avg_sub_agg(index: &Index) { + let agg_req = json!({ + "my_texts": { + "terms": { "field": "text_many_terms" }, + "aggs": { + "average_f64": { "avg": { "field": "score_f64" } } + } + }, + }); + execute_agg(index, agg_req); +} +fn terms_many_json_mixed_type_with_sub_agg_card(index: &Index) { + let agg_req = json!({ + "my_texts": { + "terms": { "field": "json.mixed_type" }, + "aggs": { + "average_f64": { "avg": { "field": "score_f64" } } + } + }, + }); + execute_agg(index, agg_req); +} + +fn execute_agg(index: &Index, agg_req: serde_json::Value) { + let agg_req: Aggregations = serde_json::from_value(agg_req).unwrap(); + let collector = get_collector(agg_req); + + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + black_box(searcher.search(&AllQuery, &collector).unwrap()); +} +fn range_agg(index: &Index) { + let agg_req = json!({ + "range_f64": { "range": { "field": "score_f64", "ranges": [ + { "from": 3, "to": 7000 }, + { "from": 7000, "to": 20000 }, + { "from": 20000, "to": 30000 }, + { "from": 30000, "to": 40000 }, + { "from": 40000, "to": 50000 }, + { "from": 50000, "to": 60000 } + ] } }, + }); + execute_agg(index, agg_req); +} +fn range_agg_with_avg_sub_agg(index: &Index) { + let agg_req = json!({ + "rangef64": { + "range": { + "field": "score_f64", + "ranges": [ + { "from": 3, "to": 7000 }, + { "from": 7000, "to": 20000 }, + { "from": 20000, "to": 30000 }, + { "from": 30000, "to": 40000 }, + { "from": 40000, "to": 50000 }, + { "from": 50000, "to": 60000 } + ] + }, + "aggs": { + "average_f64": { "avg": { "field": "score_f64" } } + } + }, + }); + execute_agg(index, agg_req); +} +fn histogram(index: &Index) { + let agg_req = json!({ + "rangef64": { + "histogram": { + "field": "score_f64", + "interval": 100 // 1000 buckets + }, + } + }); + execute_agg(index, agg_req); +} +fn histogram_hard_bounds(index: &Index) { + let agg_req = json!({ + "rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } }, + }); + execute_agg(index, agg_req); +} +fn histogram_with_avg_sub_agg(index: &Index) { + let agg_req = json!({ + "rangef64": { + "histogram": { "field": "score_f64", "interval": 100 }, + "aggs": { + "average_f64": { "avg": { "field": "score_f64" } } + } + } + }); + execute_agg(index, agg_req); +} +fn avg_and_range_with_avg_sub_agg(index: &Index) { + let agg_req = json!({ + "rangef64": { + "range": { + "field": "score_f64", + "ranges": [ + { "from": 3, "to": 7000 }, + { "from": 7000, "to": 20000 }, + { "from": 20000, "to": 60000 } + ] + }, + "aggs": { + "average_in_range": { "avg": { "field": "score" } } + } + }, + "average": { "avg": { "field": "score" } } + }); + execute_agg(index, agg_req); +} + +#[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)] +enum Cardinality { + /// All documents contain exactly one value. + /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict. + #[default] + Full = 0, + /// All documents contain at most one value. + OptionalDense = 1, + /// All documents may contain any number of values. + Multivalued = 2, + /// 1 / 20 documents has a value + OptionalSparse = 3, +} + +fn get_collector(agg_req: Aggregations) -> AggregationCollector { + AggregationCollector::from_aggs(agg_req, Default::default()) +} + +fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result { + let mut schema_builder = Schema::builder(); + let text_fieldtype = tantivy::schema::TextOptions::default() + .set_indexing_options( + TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs), + ) + .set_stored(); + let text_field = schema_builder.add_text_field("text", text_fieldtype); + let json_field = schema_builder.add_json_field("json", FAST); + let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST); + let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST); + let score_fieldtype = tantivy::schema::NumericOptions::default().set_fast(); + let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone()); + let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone()); + let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype); + let index = Index::create_from_tempdir(schema_builder.build())?; + let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"]; + + let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap(); + + let many_terms_data = (0..150_000) + .map(|num| format!("author{}", num)) + .collect::>(); + { + let mut rng = StdRng::from_seed([1u8; 32]); + let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?; + // To make the different test cases comparable we just change one doc to force the + // cardinality + if cardinality == Cardinality::OptionalDense { + index_writer.add_document(doc!())?; + } + if cardinality == Cardinality::Multivalued { + index_writer.add_document(doc!( + json_field => json!({"mixed_type": 10.0}), + json_field => json!({"mixed_type": 10.0}), + text_field => "cool", + text_field => "cool", + text_field_many_terms => "cool", + text_field_many_terms => "cool", + text_field_few_terms => "cool", + text_field_few_terms => "cool", + score_field => 1u64, + score_field => 1u64, + score_field_f64 => lg_norm.sample(&mut rng), + score_field_f64 => lg_norm.sample(&mut rng), + score_field_i64 => 1i64, + score_field_i64 => 1i64, + ))?; + } + let mut doc_with_value = 1_000_000; + if cardinality == Cardinality::OptionalSparse { + doc_with_value /= 20; + } + let _val_max = 1_000_000.0; + for _ in 0..doc_with_value { + let val: f64 = rng.gen_range(0.0..1_000_000.0); + let json = if rng.gen_bool(0.1) { + // 10% are numeric values + json!({ "mixed_type": val }) + } else { + json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()}) + }; + index_writer.add_document(doc!( + text_field => "cool", + json_field => json, + text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(), + text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(), + score_field => val as u64, + score_field_f64 => lg_norm.sample(&mut rng), + score_field_i64 => val as i64, + ))?; + if cardinality == Cardinality::OptionalSparse { + for _ in 0..20 { + index_writer.add_document(doc!(text_field => "cool"))?; + } + } + } + // writing the segment + index_writer.commit()?; + } + + Ok(index) +} diff --git a/src/aggregation/agg_bench.rs b/src/aggregation/agg_bench.rs deleted file mode 100644 index 84c0bb3827..0000000000 --- a/src/aggregation/agg_bench.rs +++ /dev/null @@ -1,585 +0,0 @@ -#[cfg(all(test, feature = "unstable"))] -mod bench { - - use rand::prelude::SliceRandom; - use rand::rngs::StdRng; - use rand::{Rng, SeedableRng}; - use rand_distr::Distribution; - use serde_json::json; - use test::{self, Bencher}; - - use crate::aggregation::agg_req::Aggregations; - use crate::aggregation::AggregationCollector; - use crate::query::{AllQuery, TermQuery}; - use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING}; - use crate::{Index, Term}; - - #[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)] - enum Cardinality { - /// All documents contain exactly one value. - /// `Full` is the default for auto-detecting the Cardinality, since it is the most strict. - #[default] - Full = 0, - /// All documents contain at most one value. - Optional = 1, - /// All documents may contain any number of values. - Multivalued = 2, - /// 1 / 20 documents has a value - Sparse = 3, - } - - fn get_collector(agg_req: Aggregations) -> AggregationCollector { - AggregationCollector::from_aggs(agg_req, Default::default()) - } - - fn get_test_index_bench(cardinality: Cardinality) -> crate::Result { - let mut schema_builder = Schema::builder(); - let text_fieldtype = crate::schema::TextOptions::default() - .set_indexing_options( - TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs), - ) - .set_stored(); - let text_field = schema_builder.add_text_field("text", text_fieldtype); - let json_field = schema_builder.add_json_field("json", FAST); - let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST); - let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST); - let score_fieldtype = crate::schema::NumericOptions::default().set_fast(); - let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone()); - let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone()); - let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype); - let index = Index::create_from_tempdir(schema_builder.build())?; - let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"]; - - let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap(); - - let many_terms_data = (0..150_000) - .map(|num| format!("author{}", num)) - .collect::>(); - { - let mut rng = StdRng::from_seed([1u8; 32]); - let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?; - // To make the different test cases comparable we just change one doc to force the - // cardinality - if cardinality == Cardinality::Optional { - index_writer.add_document(doc!())?; - } - if cardinality == Cardinality::Multivalued { - index_writer.add_document(doc!( - json_field => json!({"mixed_type": 10.0}), - json_field => json!({"mixed_type": 10.0}), - text_field => "cool", - text_field => "cool", - text_field_many_terms => "cool", - text_field_many_terms => "cool", - text_field_few_terms => "cool", - text_field_few_terms => "cool", - score_field => 1u64, - score_field => 1u64, - score_field_f64 => lg_norm.sample(&mut rng), - score_field_f64 => lg_norm.sample(&mut rng), - score_field_i64 => 1i64, - score_field_i64 => 1i64, - ))?; - } - let mut doc_with_value = 1_000_000; - if cardinality == Cardinality::Sparse { - doc_with_value /= 20; - } - let _val_max = 1_000_000.0; - for _ in 0..doc_with_value { - let val: f64 = rng.gen_range(0.0..1_000_000.0); - let json = if rng.gen_bool(0.1) { - // 10% are numeric values - json!({ "mixed_type": val }) - } else { - json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()}) - }; - index_writer.add_document(doc!( - text_field => "cool", - json_field => json, - text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(), - text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(), - score_field => val as u64, - score_field_f64 => lg_norm.sample(&mut rng), - score_field_i64 => val as i64, - ))?; - if cardinality == Cardinality::Sparse { - for _ in 0..20 { - index_writer.add_document(doc!(text_field => "cool"))?; - } - } - } - // writing the segment - index_writer.commit()?; - } - - Ok(index) - } - - use paste::paste; - #[macro_export] - macro_rules! bench_all_cardinalities { - ( $x:ident ) => { - paste! { - #[bench] - fn $x(b: &mut Bencher) { - [<$x _card>](b, Cardinality::Full) - } - - #[bench] - fn [<$x _opt>](b: &mut Bencher) { - [<$x _card>](b, Cardinality::Optional) - } - - #[bench] - fn [<$x _multi>](b: &mut Bencher) { - [<$x _card>](b, Cardinality::Multivalued) - } - - #[bench] - fn [<$x _sparse>](b: &mut Bencher) { - [<$x _card>](b, Cardinality::Sparse) - } - - } - }; - } - - bench_all_cardinalities!(bench_aggregation_average_u64); - - fn bench_aggregation_average_u64_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - let text_field = reader.searcher().schema().get_field("text").unwrap(); - - b.iter(|| { - let term_query = TermQuery::new( - Term::from_field_text(text_field, "cool"), - IndexRecordOption::Basic, - ); - - let agg_req_1: Aggregations = serde_json::from_value(json!({ - "average": { "avg": { "field": "score", } } - })) - .unwrap(); - - let collector = get_collector(agg_req_1); - - let searcher = reader.searcher(); - searcher.search(&term_query, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_stats_f64); - - fn bench_aggregation_stats_f64_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - let text_field = reader.searcher().schema().get_field("text").unwrap(); - - b.iter(|| { - let term_query = TermQuery::new( - Term::from_field_text(text_field, "cool"), - IndexRecordOption::Basic, - ); - - let agg_req_1: Aggregations = serde_json::from_value(json!({ - "average_f64": { "stats": { "field": "score_f64", } } - })) - .unwrap(); - - let collector = get_collector(agg_req_1); - - let searcher = reader.searcher(); - searcher.search(&term_query, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_average_f64); - - fn bench_aggregation_average_f64_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - let text_field = reader.searcher().schema().get_field("text").unwrap(); - - b.iter(|| { - let term_query = TermQuery::new( - Term::from_field_text(text_field, "cool"), - IndexRecordOption::Basic, - ); - - let agg_req_1: Aggregations = serde_json::from_value(json!({ - "average_f64": { "avg": { "field": "score_f64", } } - })) - .unwrap(); - - let collector = get_collector(agg_req_1); - - let searcher = reader.searcher(); - searcher.search(&term_query, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_percentiles_f64); - - fn bench_aggregation_percentiles_f64_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req_str = r#" - { - "mypercentiles": { - "percentiles": { - "field": "score_f64", - "percents": [ 95, 99, 99.9 ] - } - } - } "#; - let agg_req_1: Aggregations = serde_json::from_str(agg_req_str).unwrap(); - - let collector = get_collector(agg_req_1); - - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_average_u64_and_f64); - - fn bench_aggregation_average_u64_and_f64_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - let text_field = reader.searcher().schema().get_field("text").unwrap(); - - b.iter(|| { - let term_query = TermQuery::new( - Term::from_field_text(text_field, "cool"), - IndexRecordOption::Basic, - ); - - let agg_req_1: Aggregations = serde_json::from_value(json!({ - "average_f64": { "avg": { "field": "score_f64" } }, - "average": { "avg": { "field": "score" } }, - })) - .unwrap(); - - let collector = get_collector(agg_req_1); - - let searcher = reader.searcher(); - searcher.search(&term_query, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_terms_few); - - fn bench_aggregation_terms_few_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req: Aggregations = serde_json::from_value(json!({ - "my_texts": { "terms": { "field": "text_few_terms" } }, - })) - .unwrap(); - - let collector = get_collector(agg_req); - - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_terms_many_with_top_hits_agg); - - fn bench_aggregation_terms_many_with_top_hits_agg_card( - b: &mut Bencher, - cardinality: Cardinality, - ) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req: Aggregations = serde_json::from_value(json!({ - "my_texts": { - "terms": { "field": "text_many_terms" }, - "aggs": { - "top_hits": { "top_hits": - { - "sort": [ - { "score": "desc" } - ], - "size": 2, - "doc_value_fields": ["score_f64"] - } - } - } - }, - })) - .unwrap(); - - let collector = get_collector(agg_req); - - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_terms_many_with_sub_agg); - - fn bench_aggregation_terms_many_with_sub_agg_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req: Aggregations = serde_json::from_value(json!({ - "my_texts": { - "terms": { "field": "text_many_terms" }, - "aggs": { - "average_f64": { "avg": { "field": "score_f64" } } - } - }, - })) - .unwrap(); - - let collector = get_collector(agg_req); - - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_terms_many_json_mixed_type_with_sub_agg); - - fn bench_aggregation_terms_many_json_mixed_type_with_sub_agg_card( - b: &mut Bencher, - cardinality: Cardinality, - ) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req: Aggregations = serde_json::from_value(json!({ - "my_texts": { - "terms": { "field": "json.mixed_type" }, - "aggs": { - "average_f64": { "avg": { "field": "score_f64" } } - } - }, - })) - .unwrap(); - - let collector = get_collector(agg_req); - - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_terms_many2); - - fn bench_aggregation_terms_many2_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req: Aggregations = serde_json::from_value(json!({ - "my_texts": { "terms": { "field": "text_many_terms" } }, - })) - .unwrap(); - - let collector = get_collector(agg_req); - - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_terms_many_order_by_term); - - fn bench_aggregation_terms_many_order_by_term_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req: Aggregations = serde_json::from_value(json!({ - "my_texts": { "terms": { "field": "text_many_terms", "order": { "_key": "desc" } } }, - })) - .unwrap(); - - let collector = get_collector(agg_req); - - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_range_only); - - fn bench_aggregation_range_only_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req_1: Aggregations = serde_json::from_value(json!({ - "range_f64": { "range": { "field": "score_f64", "ranges": [ - { "from": 3, "to": 7000 }, - { "from": 7000, "to": 20000 }, - { "from": 20000, "to": 30000 }, - { "from": 30000, "to": 40000 }, - { "from": 40000, "to": 50000 }, - { "from": 50000, "to": 60000 } - ] } }, - })) - .unwrap(); - - let collector = get_collector(agg_req_1); - - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_range_with_avg); - - fn bench_aggregation_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req_1: Aggregations = serde_json::from_value(json!({ - "rangef64": { - "range": { - "field": "score_f64", - "ranges": [ - { "from": 3, "to": 7000 }, - { "from": 7000, "to": 20000 }, - { "from": 20000, "to": 30000 }, - { "from": 30000, "to": 40000 }, - { "from": 40000, "to": 50000 }, - { "from": 50000, "to": 60000 } - ] - }, - "aggs": { - "average_f64": { "avg": { "field": "score_f64" } } - } - }, - })) - .unwrap(); - - let collector = get_collector(agg_req_1); - - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - // hard bounds has a different algorithm, because it actually limits collection range - // - bench_all_cardinalities!(bench_aggregation_histogram_only_hard_bounds); - - fn bench_aggregation_histogram_only_hard_bounds_card( - b: &mut Bencher, - cardinality: Cardinality, - ) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req_1: Aggregations = serde_json::from_value(json!({ - "rangef64": { "histogram": { "field": "score_f64", "interval": 100, "hard_bounds": { "min": 1000, "max": 300000 } } }, - })) - .unwrap(); - - let collector = get_collector(agg_req_1); - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_histogram_with_avg); - - fn bench_aggregation_histogram_with_avg_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req_1: Aggregations = serde_json::from_value(json!({ - "rangef64": { - "histogram": { "field": "score_f64", "interval": 100 }, - "aggs": { - "average_f64": { "avg": { "field": "score_f64" } } - } - } - })) - .unwrap(); - - let collector = get_collector(agg_req_1); - - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_histogram_only); - - fn bench_aggregation_histogram_only_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - - b.iter(|| { - let agg_req_1: Aggregations = serde_json::from_value(json!({ - "rangef64": { - "histogram": { - "field": "score_f64", - "interval": 100 // 1000 buckets - }, - } - })) - .unwrap(); - - let collector = get_collector(agg_req_1); - - let searcher = reader.searcher(); - searcher.search(&AllQuery, &collector).unwrap() - }); - } - - bench_all_cardinalities!(bench_aggregation_avg_and_range_with_avg); - - fn bench_aggregation_avg_and_range_with_avg_card(b: &mut Bencher, cardinality: Cardinality) { - let index = get_test_index_bench(cardinality).unwrap(); - let reader = index.reader().unwrap(); - let text_field = reader.searcher().schema().get_field("text").unwrap(); - - b.iter(|| { - let term_query = TermQuery::new( - Term::from_field_text(text_field, "cool"), - IndexRecordOption::Basic, - ); - - let agg_req_1: Aggregations = serde_json::from_value(json!({ - "rangef64": { - "range": { - "field": "score_f64", - "ranges": [ - { "from": 3, "to": 7000 }, - { "from": 7000, "to": 20000 }, - { "from": 20000, "to": 60000 } - ] - }, - "aggs": { - "average_in_range": { "avg": { "field": "score" } } - } - }, - "average": { "avg": { "field": "score" } } - })) - .unwrap(); - - let collector = get_collector(agg_req_1); - - let searcher = reader.searcher(); - searcher.search(&term_query, &collector).unwrap() - }); - } -} diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index fbb2925dd1..cb45885ac6 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -143,8 +143,6 @@ use std::fmt::Display; #[cfg(test)] mod agg_tests; -mod agg_bench; - use core::fmt; pub use agg_limits::AggregationLimits; From 12a7b19d3f14ca8a9a149b81cf3fabd9ffa367b7 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Tue, 7 May 2024 17:21:08 +0800 Subject: [PATCH 2/2] add more bench with term as sub agg --- Cargo.toml | 2 +- benches/agg_bench.rs | 51 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 38340740cb..1862db26e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,7 +67,7 @@ fnv = "1.0.7" winapi = "0.3.9" [dev-dependencies] -binggan = "0.3.0" +binggan = "0.5.1" rand = "0.8.5" maplit = "1.0.2" matches = "0.1.9" diff --git a/benches/agg_bench.rs b/benches/agg_bench.rs index 51e698c134..d35124999d 100644 --- a/benches/agg_bench.rs +++ b/benches/agg_bench.rs @@ -1,4 +1,4 @@ -use binggan::{black_box, BenchGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM}; +use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM}; use rand::prelude::SliceRandom; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; @@ -38,10 +38,10 @@ fn main() { ), ]; - bench_agg(BenchGroup::new_with_inputs(inputs)); + bench_agg(InputGroup::new_with_inputs(inputs)); } -fn bench_agg(mut group: BenchGroup) { +fn bench_agg(mut group: InputGroup) { group.set_alloc(GLOBAL); // Set the peak mem allocator. This will enable peak memory reporting. register!(group, average_u64); register!(group, average_f64); @@ -56,6 +56,8 @@ fn bench_agg(mut group: BenchGroup) { register!(group, terms_many_json_mixed_type_with_sub_agg_card); register!(group, range_agg); register!(group, range_agg_with_avg_sub_agg); + register!(group, range_agg_with_term_agg_few); + register!(group, range_agg_with_term_agg_many); register!(group, histogram); register!(group, histogram_hard_bounds); register!(group, histogram_with_avg_sub_agg); @@ -217,6 +219,49 @@ fn range_agg_with_avg_sub_agg(index: &Index) { }); execute_agg(index, agg_req); } + +fn range_agg_with_term_agg_few(index: &Index) { + let agg_req = json!({ + "rangef64": { + "range": { + "field": "score_f64", + "ranges": [ + { "from": 3, "to": 7000 }, + { "from": 7000, "to": 20000 }, + { "from": 20000, "to": 30000 }, + { "from": 30000, "to": 40000 }, + { "from": 40000, "to": 50000 }, + { "from": 50000, "to": 60000 } + ] + }, + "aggs": { + "my_texts": { "terms": { "field": "text_few_terms" } }, + } + }, + }); + execute_agg(index, agg_req); +} +fn range_agg_with_term_agg_many(index: &Index) { + let agg_req = json!({ + "rangef64": { + "range": { + "field": "score_f64", + "ranges": [ + { "from": 3, "to": 7000 }, + { "from": 7000, "to": 20000 }, + { "from": 20000, "to": 30000 }, + { "from": 30000, "to": 40000 }, + { "from": 40000, "to": 50000 }, + { "from": 50000, "to": 60000 } + ] + }, + "aggs": { + "my_texts": { "terms": { "field": "text_many_terms" } }, + } + }, + }); + execute_agg(index, agg_req); +} fn histogram(index: &Index) { let agg_req = json!({ "rangef64": {