Skip to content

Commit

Permalink
Add memory overhead test for the datastore (#6067)
Browse files Browse the repository at this point in the history
Part of #6066

Shows that datastore currently uses 936 bytes to store a 16 byte RowId,
a 8 byte TimeInt, and a 8 byte f64, i.e. around 29x the memory use it
should have.

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested the web demo (if applicable):
* Using examples from latest `main` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/6067?manifest_url=https://app.rerun.io/version/main/examples_manifest.json)
* Using full set of examples from `nightly` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/6067?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG
* [x] If applicable, add a new check to the [release
checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)!

- [PR Build Summary](https://build.rerun.io/pr/6067)
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)

To run all checks from `main`, comment on the PR with `@rerun-bot
full-check`.

---------

Co-authored-by: Clement Rey <[email protected]>
  • Loading branch information
emilk and teh-cmc authored Apr 23, 2024
1 parent 72e80c6 commit f69fa6a
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 2 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion crates/re_data_store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ deadlock_detection = ["parking_lot/deadlock_detection"]
# Rerun dependencies:
re_format.workspace = true
re_format_arrow.workspace = true
re_log_types.workspace = true
re_log = { workspace = true, features = ["setup"] }
re_log_types.workspace = true
re_tracing.workspace = true
re_types_core.workspace = true

Expand All @@ -50,10 +50,12 @@ web-time.workspace = true


[dev-dependencies]
re_format.workspace = true
re_types = { workspace = true, features = ["datagen", "testing"] }

anyhow.workspace = true
criterion.workspace = true
insta.workspace = true
mimalloc.workspace = true
rand.workspace = true
similar-asserts.workspace = true
Expand Down
107 changes: 107 additions & 0 deletions crates/re_data_store/tests/memory_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
//! Measures the memory overhead of the data store.
use std::sync::atomic::{AtomicUsize, Ordering::Relaxed};

thread_local! {
static LIVE_BYTES_IN_THREAD: AtomicUsize = AtomicUsize::new(0);
}

pub struct TrackingAllocator {
allocator: std::alloc::System,
}

#[global_allocator]
pub static GLOBAL_ALLOCATOR: TrackingAllocator = TrackingAllocator {
allocator: std::alloc::System,
};

#[allow(unsafe_code)]
// SAFETY:
// We just do book-keeping and then let another allocator do all the actual work.
unsafe impl std::alloc::GlobalAlloc for TrackingAllocator {
#[allow(clippy::let_and_return)]
unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 {
LIVE_BYTES_IN_THREAD.with(|bytes| bytes.fetch_add(layout.size(), Relaxed));

// SAFETY:
// Just deferring
unsafe { self.allocator.alloc(layout) }
}

unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) {
LIVE_BYTES_IN_THREAD.with(|bytes| bytes.fetch_sub(layout.size(), Relaxed));

// SAFETY:
// Just deferring
unsafe { self.allocator.dealloc(ptr, layout) };
}
}

fn live_bytes() -> usize {
LIVE_BYTES_IN_THREAD.with(|bytes| bytes.load(Relaxed))
}

/// Assumes all allocations are on the calling thread.
///
/// The reason we use thread-local counting is so that
/// the counting won't be confused by any other running threads (e.g. other tests).
fn memory_use<R>(run: impl Fn() -> R) -> usize {
let used_bytes_start = live_bytes();
let ret = run();
let bytes_used = live_bytes() - used_bytes_start;
drop(ret);
bytes_used
}

// ----------------------------------------------------------------------------

use re_data_store::{DataStore, DataStoreConfig};
use re_log_types::{DataRow, RowId, TimePoint, TimeType, Timeline};
use re_types::components::{InstanceKey, Scalar};
use re_types_core::Loggable as _;

/// The memory overhead of storing many scalars in the store.
#[test]
fn scalar_memory_overhead() {
re_log::setup_logging();

const NUM_SCALARS: usize = 1024 * 1024;

let total_mem_use = memory_use(|| {
let mut store = DataStore::new(
re_log_types::StoreId::random(re_log_types::StoreKind::Recording),
InstanceKey::name(),
DataStoreConfig::default(),
);

for i in 0..NUM_SCALARS {
let entity_path = re_log_types::entity_path!("scalar");
let timepoint =
TimePoint::default().with(Timeline::new("log_time", TimeType::Time), i as i64);
let num_instances = 1;
let row = DataRow::from_cells1_sized(
RowId::new(),
entity_path,
timepoint,
num_instances,
vec![Scalar(i as f64)],
)
.unwrap();
store.insert_row(&row).unwrap();
}

store
});

insta::assert_debug_snapshot!(
"scalars_on_one_timeline",
[
format!("{NUM_SCALARS} scalars"),
format!("{} in total", re_format::format_bytes(total_mem_use as _)),
format!(
"{} per row",
re_format::format_bytes(total_mem_use as f64 / NUM_SCALARS as f64)
),
]
);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: crates/re_data_store/tests/memory_test.rs
assertion_line: 96
expression: "[format!(\"{NUM_SCALARS} scalars\"),\n format!(\"{} in total\", re_format::format_bytes(total_mem_use as _)),\n format!(\"{} per row\",\n re_format::format_bytes(total_mem_use as f64 / NUM_SCALARS as\n f64))]"
---
[
"1048576 scalars",
"936 MiB in total",
"936 B per row",
]
2 changes: 1 addition & 1 deletion pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ meilisearch = "meilisearch --db-path=./meilisearch/data.ms --dump-dir=./meilisea
download-design-tokens = "curl --fail https://rerun-docs.netlify.app/api/tokens | jq > crates/re_ui/data/design_tokens.json"

# Update the results of `insta` snapshot regression tests
rs-update-insta-tests = "cargo test && cargo insta review"
rs-update-insta-tests = "cargo test ; cargo insta review"

# Upload image to gcloud storage.
upload-image = "python scripts/upload_image.py"
Expand Down

0 comments on commit f69fa6a

Please sign in to comment.