From 5e9b4a5fd5aeeb01acfcfc448fe0ec6b7eea0dec Mon Sep 17 00:00:00 2001 From: aldenhu Date: Fri, 24 Jan 2025 13:05:20 -0800 Subject: [PATCH] LayeredMap: lazy and cached bench input data --- experimental/storage/layered-map/Cargo.toml | 3 + .../storage/layered-map/benches/maps.rs | 166 ++++++++++++------ 2 files changed, 120 insertions(+), 49 deletions(-) diff --git a/experimental/storage/layered-map/Cargo.toml b/experimental/storage/layered-map/Cargo.toml index 0bed1c63fb8ae1..ee65b678784e58 100644 --- a/experimental/storage/layered-map/Cargo.toml +++ b/experimental/storage/layered-map/Cargo.toml @@ -29,6 +29,9 @@ jemallocator = { workspace = true } proptest = { workspace = true } rand = { workspace = true } +[lib] +bench = false + [[bench]] name = "sorting" harness = false diff --git a/experimental/storage/layered-map/benches/maps.rs b/experimental/storage/layered-map/benches/maps.rs index bb61742e1db6eb..fdea0904b60412 100644 --- a/experimental/storage/layered-map/benches/maps.rs +++ b/experimental/storage/layered-map/benches/maps.rs @@ -2,11 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 use aptos_crypto::HashValue; -use aptos_experimental_layered_map::MapLayer; +use aptos_experimental_layered_map::{LayeredMap, MapLayer}; use criterion::{ criterion_group, criterion_main, measurement::WallTime, BatchSize, BenchmarkGroup, Criterion, }; use itertools::Itertools; +use once_cell::sync::OnceCell; use rand::random; use std::{ collections::{BTreeMap, HashMap}, @@ -22,25 +23,46 @@ type Value = HashValue; const K: usize = 1024; -fn gen_update_batches(batch_size_k: usize, n_batches: usize) -> Vec> { - repeat_with(|| { - repeat_with(|| (random(), random())) - .take(batch_size_k * K) - .collect_vec() - }) - .take(n_batches) - .collect_vec() +type BatchCache = HashMap>>>; + +fn gen_update_batches( + cache: &mut BatchCache, + batch_size_k: usize, + n_batches: usize, +) -> &Vec> { + cache + .entry(batch_size_k) + .or_default() + .entry(n_batches) + .or_insert_with(|| { + println!(); + println!("Generating batch. {batch_size_k}k per batch, {n_batches} batches."); + let timer = std::time::Instant::now(); + let ret = repeat_with(|| { + repeat_with(|| (random(), random())) + .take(batch_size_k * K) + .collect_vec() + }) + .take(n_batches) + .collect_vec(); + println!("done in {} secs.", timer.elapsed().as_secs()); + ret + }) } -fn insert_in_batches(group: &mut BenchmarkGroup, batch_size_k: usize, n_batches: usize) { - let batches = gen_update_batches(batch_size_k, n_batches); +fn insert_in_batches( + group: &mut BenchmarkGroup, + cache: &mut BatchCache, + batch_size_k: usize, + n_batches: usize, +) { let total_updates = (batch_size_k * K * n_batches) as u64; group.throughput(criterion::Throughput::Elements(total_updates)); let name = format!("hash_map_{n_batches}_batches_of_{batch_size_k}k_updates"); group.bench_function(&name, |b| { b.iter_batched( - || batches.clone(), + || gen_update_batches(cache, batch_size_k, n_batches).clone(), |batches| { let mut map = HashMap::new(); for batch in batches { @@ -55,7 +77,7 @@ fn insert_in_batches(group: &mut BenchmarkGroup, batch_size_k: usize, let name = format!("btree_map_{n_batches}_batches_of_{batch_size_k}k_updates"); group.bench_function(&name, |b| { b.iter_batched( - || batches.clone(), + || gen_update_batches(cache, batch_size_k, n_batches).clone(), |batches| { let mut map = BTreeMap::new(); for batch in batches { @@ -70,7 +92,7 @@ fn insert_in_batches(group: &mut BenchmarkGroup, batch_size_k: usize, let name = format!("layered_map_{n_batches}_batches_of_{batch_size_k}k_updates"); group.bench_function(&name, |b| { b.iter_batched( - || batches.clone(), + || gen_update_batches(cache, batch_size_k, n_batches).clone(), |batches| { let root_layer = MapLayer::new_family("bench"); let mut latest_layer = root_layer.clone(); @@ -88,73 +110,107 @@ fn insert_in_batches(group: &mut BenchmarkGroup, batch_size_k: usize, fn get( group: &mut BenchmarkGroup, + batch_cache: &mut BatchCache, + keys_cache: &mut KeysCache, map_size_k: usize, - items: &[(Key, Value)], - keys_to_get: &[Key], + existing: bool, ) { - assert_eq!(map_size_k * K, items.len()); - group.throughput(criterion::Throughput::Elements(keys_to_get.len() as u64)); + let n_keys_to_get = map_size_k.min(10) * K; + + group.throughput(criterion::Throughput::Elements(n_keys_to_get as u64)); let name = format!("hash_map_{map_size_k}k_items"); - let map: HashMap = items.iter().cloned().collect(); + let map: OnceCell> = OnceCell::new(); + let keys: OnceCell> = OnceCell::new(); + group.bench_function(&name, |b| { b.iter_batched( - || (), - |_| keys_to_get.iter().map(|key| map.get(key)).collect_vec(), + || { + let (items, keys_) = + gen_get(batch_cache, keys_cache, map_size_k, n_keys_to_get, existing); + let map = map.get_or_init(|| items.iter().cloned().collect()); + let keys = keys.get_or_init(|| keys_.clone()); + (map, keys) + }, + |(map, keys)| keys.iter().map(|key| map.get(key)).collect_vec(), BatchSize::SmallInput, ) }); let name = format!("btree_map_{map_size_k}k_items"); - let map: BTreeMap = items.iter().cloned().collect(); + let map: OnceCell> = OnceCell::new(); + let keys: OnceCell> = OnceCell::new(); + group.bench_function(&name, |b| { b.iter_batched( - || (), - |_| keys_to_get.iter().map(|key| map.get(key)).collect_vec(), + || { + let (items, keys_) = + gen_get(batch_cache, keys_cache, map_size_k, n_keys_to_get, existing); + let map = map.get_or_init(|| items.iter().cloned().collect()); + let keys = keys.get_or_init(|| keys_.clone()); + (map, keys) + }, + |(map, keys)| keys.iter().map(|key| map.get(key)).collect_vec(), BatchSize::SmallInput, ) }); let name = format!("layered_map_{map_size_k}k_items"); - let root_layer = MapLayer::new_family("bench"); - let top_layer = root_layer.view_layers_after(&root_layer).new_layer(items); - let map = top_layer.into_layers_view_after(root_layer); + let map: OnceCell> = OnceCell::new(); + let keys: OnceCell> = OnceCell::new(); group.bench_function(&name, |b| { b.iter_batched( - || (), - |_| keys_to_get.iter().map(|key| map.get(key)).collect_vec(), + || { + let (items, keys_) = + gen_get(batch_cache, keys_cache, map_size_k, n_keys_to_get, existing); + let map = map.get_or_init(|| { + let root_layer = MapLayer::new_family("bench"); + let top_layer = root_layer.view_layers_after(&root_layer).new_layer(items); + top_layer.into_layers_view_after(root_layer) + }); + let keys = keys.get_or_init(|| keys_.clone()); + (map, keys) + }, + |(map, keys)| keys.iter().map(|key| map.get(key)).collect_vec(), BatchSize::SmallInput, ) }); } -fn get_existing(group: &mut BenchmarkGroup, map_size_k: usize) { - let items = gen_update_batches(map_size_k, 1).pop().unwrap(); - let num_keys_to_get = map_size_k.min(10) * K; - let keys_to_get = items - .iter() - .map(|(key, _v)| *key) - .take(num_keys_to_get) - .collect_vec(); - group.throughput(criterion::Throughput::Elements(num_keys_to_get as u64)); - - get(group, map_size_k, &items, &keys_to_get); -} +type KeysCache = HashMap>>; -fn get_non_existing(group: &mut BenchmarkGroup, map_size_k: usize) { - let items = gen_update_batches(map_size_k, 1).pop().unwrap(); - let num_keys_to_get = map_size_k.min(10) * K; - let keys_to_get = (0..num_keys_to_get).map(|_| random()).collect_vec(); +fn gen_get<'a>( + batch_cache: &'a mut BatchCache, + keys_cache: &'a mut KeysCache, + map_size_k: usize, + n_keys_to_get: usize, + existing: bool, +) -> (&'a Vec<(Key, Value)>, &'a Vec) { + let items = &gen_update_batches(batch_cache, map_size_k, 1)[0]; + let keys = keys_cache + .entry(map_size_k) + .or_default() + .entry(existing) + .or_insert_with(|| { + if existing { + items.iter().map(|(k, _v)| *k).take(n_keys_to_get).collect() + } else { + repeat_with(random).take(n_keys_to_get).collect() + } + }); - get(group, map_size_k, &items, &keys_to_get); + (items, keys) } fn compare_maps(c: &mut Criterion) { + let mut batch_cache = BatchCache::default(); + let mut keys_cache = KeysCache::default(); + { let mut group = c.benchmark_group("insert_in_batches"); for batch_size_k in [1, 10, 100] { for n_batches in [1, 8] { - insert_in_batches(&mut group, batch_size_k, n_batches); + insert_in_batches(&mut group, &mut batch_cache, batch_size_k, n_batches); } } } @@ -162,14 +218,26 @@ fn compare_maps(c: &mut Criterion) { { let mut group = c.benchmark_group("get_existing"); for map_size_k in [100, 1000, 128_000] { - get_existing(&mut group, map_size_k); + get( + &mut group, + &mut batch_cache, + &mut keys_cache, + map_size_k, + true, + ); } } { let mut group = c.benchmark_group("get_non_existing"); for map_size_k in [100, 1000, 128_000] { - get_non_existing(&mut group, map_size_k); + get( + &mut group, + &mut batch_cache, + &mut keys_cache, + map_size_k, + false, + ); } } }