diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index dc0ec46fa20f..088359ff3a5b 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,3 +1,13 @@ +### WHAT +copilot:summary +​ + +### WHY + + +### HOW +copilot:walkthrough + ### Checklist * [ ] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [ ] I've included a screenshot or gif (if applicable) diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index 62624d0e0bd8..aed2272b2b4b 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -29,4 +29,4 @@ jobs: with: mode: minimum count: 1 - labels: "πŸ“Š analytics, πŸͺ³ bug, πŸ§‘β€πŸ’» dev experience, πŸ“– documentation, πŸ’¬ discussion, examples, πŸ“‰ performance, 🐍 python API, ⛃ re_datastore, πŸ“Ί re_viewer, πŸ”Ί re_renderer, β›΄ release, πŸ¦€ rust SDK, πŸ”¨ testing, ui, πŸ•ΈοΈ web" + labels: "πŸ“Š analytics, πŸͺ³ bug, πŸ§‘β€πŸ’» dev experience, dependencies, πŸ“– documentation, πŸ’¬ discussion, examples, πŸ“‰ performance, 🐍 python API, ⛃ re_datastore, πŸ“Ί re_viewer, πŸ”Ί re_renderer, 🚜 refactor, β›΄ release, πŸ¦€ rust SDK, πŸ”¨ testing, ui, πŸ•ΈοΈ web" diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 80c2f14c1005..491c5353550a 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -63,7 +63,159 @@ jobs: just py-requirements # --------------------------------------------------------------------------- + # We need one wheel-build to be special so the other builds (namely mac arm) can use its rrd + # This copy-paste is awful, but we'll refactor the build soon. + wheels-linux: + if: github.event_name == 'push' || github.event.inputs.force_build_wheel + name: Build Python Wheels (Linux) + runs-on: ubuntu-latest + container: + image: rerunio/ci_docker:0.6 + steps: + - uses: actions/checkout@v3 + + # These should already be in the docker container, but run for good measure. A no-op install + # should be fast, and this way things don't break if we add new packages without rebuilding + # docker + - name: Cache APT Packages + uses: awalsh128/cache-apt-pkgs-action@v1.2.2 + with: + packages: ${{ env.UBUNTU_REQUIRED_PKGS }} + version: 2.0 # Increment this to pull newer packages + execute_install_scripts: true + + - name: Set up cargo cache + uses: Swatinem/rust-cache@v2 + with: + env-vars: CARGO CC CFLAGS CXX CMAKE RUST CACHE_KEY + # Don't update the cache -- it will be updated by the lint job + # TODO(jleibs): this job will likely run before rust.yml updates + # the cache. Better cross-job sequencing would be nice here + save-if: False + # These should already be in the docker container, but run for good measure. A no-op install + # should be fast, and this way things don't break if we add new packages without rebuilding + # docker + - run: pip install -r rerun_py/requirements-build.txt + + # ---------------------------------------------------------------------------------- + + - name: Patch Cargo.toml for pre-release + if: github.ref == 'refs/heads/main' + # After patching the pre-release version, run cargo update. + # This updates the cargo.lock file with the new version numbers and keeps the wheel build from failing + run: | + python3 scripts/version_util.py --patch_prerelease + cargo update -w + + - name: Version check for tagged-release + if: startsWith(github.ref, 'refs/tags/v') + # This call to version_util.py will assert version from Cargo.toml matches git tagged version vX.Y.Z + run: | + python3 scripts/version_util.py --check_version + + - name: Store the expected version + # Find the current cargo version and store it in the GITHUB_ENV var: `expected_version` + shell: bash + run: | + echo "expected_version=$(python3 scripts/version_util.py --bare_cargo_version)" >> $GITHUB_ENV + + - name: Build Wheel + uses: PyO3/maturin-action@v1 + with: + maturin-version: "0.14.10" + manylinux: manylinux_2_31 + container: off + command: build + args: | + --manifest-path rerun_py/Cargo.toml + --release + --target x86_64-unknown-linux-gnu + --no-default-features + --features pypi + --out pre-dist + + - name: Install wheel dependencies + # First we install the dependencies manually so we can use `--no-index` when installing the wheel. + # This needs to be a separate step for some reason or the following step fails + # TODO(jleibs): pull these deps from pyproject.toml + # TODO(jleibs): understand why deps can't be installed in the same step as the wheel + shell: bash + run: | + pip install deprecated numpy>=1.23 pyarrow==10.0.1 + + - name: Install built wheel + # Now install the wheel using a specific version and --no-index to guarantee we get the version from + # the pre-dist folder. Note we don't use --force-reinstall here because --no-index means it wouldn't + # find the dependencies to reinstall them. + shell: bash + run: | + pip uninstall rerun-sdk + pip install rerun-sdk==${{ env.expected_version }} --no-index --find-links pre-dist + + - name: Verify built wheel version + shell: bash + run: | + python3 -m rerun --version + which rerun + rerun --version + + - name: Run unit tests + shell: bash + run: cd rerun_py/tests && pytest + + - name: Run e2e test + shell: bash + run: RUST_LOG=debug scripts/run_python_e2e_test.py --no-build # rerun-sdk is already built and installed + + - name: Unpack the wheel + shell: bash + run: | + mkdir unpack-dist + wheel unpack pre-dist/*.whl --dest unpack-dist + + - name: Get the folder name + shell: bash + run: | + echo "pkg_folder=$(ls unpack-dist)" >> $GITHUB_ENV + + - name: Cache RRD dataset + id: dataset + uses: actions/cache@v3 + with: + path: examples/python/colmap/dataset/ + # TODO(jleibs): Derive this key from the invocation below + key: colmap-dataset-colmap-fiat-v0 + + - name: Generate Embedded RRD file + shell: bash + # If you change the line below you should almost definitely change the `key:` line above by giving it a new, unique name + run: | + mkdir rrd + pip install -r examples/python/colmap/requirements.txt + python3 examples/python/colmap/main.py --dataset colmap_fiat --resize 800x600 --save rrd/colmap_fiat.rrd + cp rrd/colmap_fiat.rrd unpack-dist/${{ env.pkg_folder }}/rerun_sdk/rerun_demo/colmap_fiat.rrd + + - name: Repack the wheel + shell: bash + run: | + mkdir dist + wheel pack unpack-dist/${{ env.pkg_folder }} --dest dist/ + + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + # All platforms are currently creating the same rrd file, upload one of them + - name: Save RRD artifact + uses: actions/upload-artifact@v3 + with: + name: rrd + path: rrd + + # --------------------------------------------------------------------------- matrix-setup: # Building all the wheels is expensive, so we only run this job when we push (to main or release tags), # or if the job was manually triggered with `force_build_wheel` set to true. @@ -85,42 +237,30 @@ jobs: - id: set-matrix shell: bash + # TODO(jleibs): figure out why tests are failing to complete on `x86_64-apple-darwin` + # See: https://github.com/rerun-io/rerun/pull/1853 run: | matrix=() - matrix+=('{"platform": "macos", "target": "x86_64-apple-darwin", "runs_on": "macos-latest"},') - matrix+=('{"platform": "macos", "target": "aarch64-apple-darwin", "runs_on": "macos-latest"},') - matrix+=('{"platform": "windows", "target": "x86_64-pc-windows-msvc", "runs_on": "windows-latest-8-cores"},') - matrix+=('{"platform": "linux", "target": "x86_64-unknown-linux-gnu", "runs_on": "ubuntu-latest-16-cores", container: {"image": "rerunio/ci_docker:0.6"}}') + matrix+=('{"platform": "macos", "target": "x86_64-apple-darwin", "run_tests": false, "runs_on": "macos-latest" },') + matrix+=('{"platform": "macos", "target": "aarch64-apple-darwin", "run_tests": false, "runs_on": "macos-latest" },') # NOTE: we can't run tests on arm since our macos runner is x86_64 + matrix+=('{"platform": "windows", "target": "x86_64-pc-windows-msvc", "run_tests": true, "runs_on": "windows-latest-8-cores"},') echo "Matrix values: ${matrix[@]}" echo "matrix={\"include\":[${matrix[@]}]}" >> $GITHUB_OUTPUT wheels: - name: Build Python Wheels - needs: [lint, matrix-setup] + name: Build Remaining Python Wheels + needs: [lint, matrix-setup, wheels-linux] strategy: matrix: ${{fromJson(needs.matrix-setup.outputs.matrix)}} runs-on: ${{ matrix.runs_on }} - container: ${{ matrix.container }} - steps: - uses: actions/checkout@v3 - # These should already be in the docker container, but run for good measure. A no-op install - # should be fast, and this way things don't break if we add new packages without rebuilding - # docker - - name: Cache APT Packages - if: matrix.platform == 'linux' - uses: awalsh128/cache-apt-pkgs-action@v1.2.2 - with: - packages: ${{ env.UBUNTU_REQUIRED_PKGS }} - version: 2.0 # Increment this to pull newer packages - execute_install_scripts: true - - name: Set up cargo cache uses: Swatinem/rust-cache@v2 with: @@ -133,7 +273,6 @@ jobs: # The pip-cache setup logic doesn't work in the ubuntu docker container # That's probably fine since we bake these deps into the container already - name: Setup python - if: matrix.platform != 'linux' uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} @@ -194,6 +333,12 @@ jobs: run: | python3 scripts/version_util.py --check_version + - name: Store the expected version + # Find the current cargo version and store it in the GITHUB_ENV var: `expected_version` + shell: bash + run: | + echo "expected_version=$(python3 scripts/version_util.py --bare_cargo_version)" >> $GITHUB_ENV + - name: Build Wheel uses: PyO3/maturin-action@v1 with: @@ -210,12 +355,36 @@ jobs: --out pre-dist - name: Install built wheel + if: ${{ matrix.run_tests }} + # First we install the dependencies manually so we can use `--no-index` when installing the wheel. + # Then install the wheel using a specific version and --no-index to guarantee we get the version from + # the pre-dist folder. Note we don't use --force-reinstall here because --no-index means it wouldn't + # find the dependencies to reinstall them. + # TODO(jleibs): pull these deps from pyproject.toml + shell: bash + run: | + pip uninstall rerun-sdk + pip install deprecated numpy>=1.23 pyarrow==10.0.1 + pip install rerun-sdk==${{ env.expected_version }} --no-index --find-links pre-dist + + - name: Verify built wheel version + if: ${{ matrix.run_tests }} + shell: bash run: | - pip install rerun-sdk --find-links pre-dist --force-reinstall + python3 -m rerun --version + which rerun + rerun --version - - name: Run tests + - name: Run unit tests + if: ${{ matrix.run_tests }} + shell: bash run: cd rerun_py/tests && pytest + - name: Run e2e test + if: ${{ matrix.run_tests }} + shell: bash + run: RUST_LOG=debug scripts/run_python_e2e_test.py --no-build # rerun-sdk is already built and installed + - name: Unpack the wheel shell: bash run: | @@ -227,21 +396,16 @@ jobs: run: | echo "pkg_folder=$(ls unpack-dist)" >> $GITHUB_ENV - - name: Cache RRD dataset - id: dataset - uses: actions/cache@v3 + - name: Download RRD + uses: actions/download-artifact@v3 with: - path: examples/python/colmap/dataset/ - # TODO(jleibs): Derive this key from the invocation below - key: colmap-dataset-colmap-fiat-v0 + name: rrd + path: rrd - - name: Generate Embedded RRD file + - name: Insert the rrd shell: bash # If you change the line below you should almost definitely change the `key:` line above by giving it a new, unique name run: | - mkdir rrd - pip install -r examples/python/colmap/requirements.txt - python3 examples/python/colmap/main.py --dataset colmap_fiat --resize 800x600 --save rrd/colmap_fiat.rrd cp rrd/colmap_fiat.rrd unpack-dist/${{ env.pkg_folder }}/rerun_sdk/rerun_demo/colmap_fiat.rrd - name: Repack the wheel @@ -256,14 +420,6 @@ jobs: name: wheels path: dist - # All platforms are currently creating the same rrd file, upload one of them - - name: Save RRD artifact - if: matrix.platform == 'linux' - uses: actions/upload-artifact@v3 - with: - name: rrd - path: rrd - # --------------------------------------------------------------------------- upload_rrd: diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 9821e24ad139..f48703b53395 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -38,6 +38,7 @@ permissions: contents: write jobs: + # See the benchmarks at https://ref.rerun.io/dev/bench/ rs-benchmarks: name: Rust Criterion benchmarks @@ -70,12 +71,13 @@ jobs: --all-features \ -p re_arrow_store \ -p re_data_store \ - -p re_log_types \ + -p re_log_encoding \ -p re_query \ -p re_tuid \ -- --output-format=bencher | tee output.txt - name: Store benchmark result + # https://github.com/benchmark-action/github-action-benchmark uses: benchmark-action/github-action-benchmark@v1 with: name: Rust Benchmark @@ -87,7 +89,7 @@ jobs: comment-on-alert: true alert-threshold: "150%" fail-on-alert: true - comment-always: true + comment-always: false # Generates too much GitHub notification spam # Save, results and push to GitHub only on main save-data-file: ${{ github.ref == 'refs/heads/main' }} @@ -137,18 +139,20 @@ jobs: command: cranky args: --all-targets --all-features -- --deny warnings - - name: Check no default features + # -------------------------------------------------------------------------------- + # Check a few important permutations of the feature flags for our `rerun` library: + - name: Check rerun with `--no-default-features`` uses: actions-rs/cargo@v1 with: - command: check - args: --locked --no-default-features --features __ci --lib + command: cranky + args: --locked -p rerun --no-default-features - # Check a few important permutations of the feature flags for our `rerun` library: - - name: Check rerun with --features sdk + - name: Check rerun with `--features sdk` uses: actions-rs/cargo@v1 with: - command: check - args: --locked --no-default-features --features sdk + command: cranky + args: --locked -p rerun --no-default-features --features sdk + # -------------------------------------------------------------------------------- - name: Test doc-tests uses: actions-rs/cargo@v1 @@ -339,29 +343,26 @@ jobs: env: RUSTFLAGS: ${{env.RUSTFLAGS}} RUSTDOCFLAGS: ${{env.RUSTDOCFLAGS}} - steps: - - uses: actions/checkout@v2 - - - name: cargo deny aarch64-apple-darwin check - uses: actions-rs/cargo@v1 - with: - command: deny - args: --log-level=error --all-features --target aarch64-apple-darwin check - - name: cargo deny wasm32-unknown-unknown check - uses: actions-rs/cargo@v1 - with: - command: deny - args: --log-level=error --all-features --target wasm32-unknown-unknown check - - - name: cargo deny x86_64-pc-windows-msvc - uses: actions-rs/cargo@v1 - with: - command: deny - args: --log-level=error --all-features --target x86_64-pc-windows-msvc check + # TODO(emilk): remove this matrix when https://github.com/EmbarkStudios/cargo-deny/issues/324 is resolved + strategy: + fail-fast: false + matrix: + platform: + - i686-pc-windows-gnu + - i686-pc-windows-msvc + - i686-unknown-linux-gnu + - wasm32-unknown-unknown + - x86_64-apple-darwin + - x86_64-pc-windows-gnu + - x86_64-pc-windows-msvc + - x86_64-unknown-linux-gnu + - x86_64-unknown-redox - - name: cargo deny x86_64-unknown-linux-musl check - uses: actions-rs/cargo@v1 + steps: + - uses: actions/checkout@v3 + - uses: EmbarkStudios/cargo-deny-action@v1 with: - command: deny - args: --log-level=error --all-features --target x86_64-unknown-linux-musl check + command: check + log-level: error + arguments: --all-features --target ${{ matrix.platform }} diff --git a/.vscode/settings.json b/.vscode/settings.json index ed03ba4db684..c52ea2c7178d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -25,6 +25,7 @@ "andreas", "bbox", "bindgroup", + "colormap", "emath", "framebuffer", "hoverable", diff --git a/Cargo.toml b/Cargo.toml index baa3b459875f..6a433c6954ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ re_error = { path = "crates/re_error", version = "0.4.0" } re_format = { path = "crates/re_format", version = "0.4.0" } re_int_histogram = { path = "crates/re_int_histogram", version = "0.4.0" } re_log = { path = "crates/re_log", version = "0.4.0" } +re_log_encoding = { path = "crates/re_log_encoding", version = "0.4.0" } re_log_types = { path = "crates/re_log_types", version = "0.4.0" } re_memory = { path = "crates/re_memory", version = "0.4.0" } re_query = { path = "crates/re_query", version = "0.4.0" } @@ -79,8 +80,10 @@ polars-core = "0.27.1" polars-lazy = "0.27.1" polars-ops = "0.27.1" puffin = "0.14" +smallvec = { version = "1.0", features = ["const_generics", "union"] } thiserror = "1.0" time = { version = "0.3", features = ["wasm-bindgen"] } +tinyvec = { version = "1.6", features = ["alloc", "rustc_1_55"] } tokio = "1.24" wgpu = { version = "0.15.1", default-features = false } wgpu-core = { version = "0.15.1", default-features = false } @@ -88,7 +91,8 @@ wgpu-hal = { version = "0.15.4", default-features = false } [profile.dev] -opt-level = 1 # Make debug builds run faster +opt-level = 1 # Make debug builds run faster +panic = "abort" # This leads to better optimizations and smaller binaries (and is the default in Wasm anyways). # Optimize all dependencies even in debug builds (does not affect workspace packages): [profile.dev.package."*"] @@ -96,6 +100,7 @@ opt-level = 2 [profile.release] # debug = true # good for profilers +panic = "abort" # This leads to better optimizations and smaller binaries (and is the default in Wasm anyways). [profile.bench] debug = true diff --git a/README.md b/README.md index 2f5740a31b4c..bae9d9f0f30c 100644 --- a/README.md +++ b/README.md @@ -60,9 +60,8 @@ _Expect breaking changes!_ Some shortcomings: * Big points clouds (1M+) are slow ([#1136](https://github.com/rerun-io/rerun/issues/1136)) * The data you want to visualize must fit in RAM. - - See for how to bound memory use + - See for how to bound memory use - We plan on having a disk-based data store some time in the future - - Additionally, Rerun is using more memory than it should at the moment ([#1242](https://github.com/rerun-io/rerun/pull/1242)) * The Rust library takes a long time to compile - We have way too many big dependencies, and we are planning on improving the situation ([#1316](https://github.com/rerun-io/rerun/pull/1316)) diff --git a/clippy.toml b/clippy.toml index c72661614556..4da41d009fbc 100644 --- a/clippy.toml +++ b/clippy.toml @@ -27,6 +27,8 @@ disallowed-methods = [ "std::thread::spawn", # Use `std::thread::Builder` and name the thread "sha1::Digest::new", # SHA1 is cryptographically broken + + "std::panic::catch_unwind", # We compile with `panic = "abort"` ] # https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_names diff --git a/crates/re_arrow_store/Cargo.toml b/crates/re_arrow_store/Cargo.toml index 8487f656b17f..ca67b770ea1b 100644 --- a/crates/re_arrow_store/Cargo.toml +++ b/crates/re_arrow_store/Cargo.toml @@ -25,6 +25,10 @@ deadlock_detection = ["parking_lot/deadlock_detection"] ## Integration with `polars`, to efficiently use the datastore with dataframes. polars = ["dep:polars-core", "dep:polars-ops"] +## When set, only run the core set of benchmark suites. +## Commonly set implicitly by --all-features, e.g. on CI. +core_benchmarks_only = [] + [dependencies] # Rerun dependencies: @@ -34,17 +38,14 @@ re_log.workspace = true # External dependencies: ahash.workspace = true -anyhow.workspace = true -arrow2 = { workspace = true, features = [ - "compute_concatenate", - "compute_aggregate", -] } +arrow2 = { workspace = true, features = ["compute_concatenate"] } +arrow2_convert.workspace = true document-features = "0.2" indent = "0.1" itertools = { workspace = true } nohash-hasher = "0.2" parking_lot.workspace = true -static_assertions = "1.1" +smallvec.workspace = true thiserror.workspace = true # Native dependencies: @@ -69,6 +70,7 @@ polars-ops = { workspace = true, optional = true, features = [ [dev-dependencies] +anyhow.workspace = true criterion = "0.4" mimalloc.workspace = true polars-core = { workspace = true, features = [ @@ -81,7 +83,7 @@ polars-core = { workspace = true, features = [ "sort_multiple", ] } rand = "0.8" - +tinyvec.workspace = true [lib] bench = false @@ -111,3 +113,15 @@ required-features = ["polars"] [[bench]] name = "data_store" harness = false + +[[bench]] +name = "arrow2" +harness = false + +[[bench]] +name = "arrow2_convert" +harness = false + +[[bench]] +name = "vectors" +harness = false diff --git a/crates/re_arrow_store/benches/arrow2.rs b/crates/re_arrow_store/benches/arrow2.rs new file mode 100644 index 000000000000..77f682c57dc0 --- /dev/null +++ b/crates/re_arrow_store/benches/arrow2.rs @@ -0,0 +1,364 @@ +//! Keeping track of performance issues/regressions in `arrow2` that directly affect us. + +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +use std::sync::Arc; + +use arrow2::{ + array::{Array, PrimitiveArray, StructArray, UnionArray}, + compute::aggregate::estimated_bytes_size, +}; +use criterion::{criterion_group, Criterion}; +use itertools::Itertools; +use re_log_types::{ + component_types::{InstanceKey, Point2D, Rect2D}, + datagen::{build_some_instances, build_some_point2d, build_some_rects}, + external::arrow2_convert::serialize::TryIntoArrow, + DataCell, SerializableComponent, +}; + +// --- + +criterion_group!(benches, erased_clone, estimated_size_bytes); + +#[cfg(not(feature = "core_benchmarks_only"))] +criterion::criterion_main!(benches); + +// Don't run these benchmarks on CI: they measure the performance of third-party libraries. +#[cfg(feature = "core_benchmarks_only")] +fn main() {} + +// --- + +#[cfg(not(debug_assertions))] +const NUM_ROWS: usize = 10_000; +#[cfg(not(debug_assertions))] +const NUM_INSTANCES: usize = 100; + +// `cargo test` also runs the benchmark setup code, so make sure they run quickly: +#[cfg(debug_assertions)] +const NUM_ROWS: usize = 1; +#[cfg(debug_assertions)] +const NUM_INSTANCES: usize = 1; + +// --- + +#[derive(Debug, Clone, Copy)] +enum ArrayKind { + /// E.g. an array of `InstanceKey`. + Primitive, + + /// E.g. an array of `Point2D`. + Struct, + + /// E.g. an array of `Rect2D`. + StructLarge, +} + +impl std::fmt::Display for ArrayKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + ArrayKind::Primitive => "primitive", + ArrayKind::Struct => "struct", + ArrayKind::StructLarge => "struct_large", + }) + } +} + +fn erased_clone(c: &mut Criterion) { + let kind = [ + ArrayKind::Primitive, + ArrayKind::Struct, + ArrayKind::StructLarge, + ]; + + for kind in kind { + let mut group = c.benchmark_group(format!( + "arrow2/size_bytes/{kind}/rows={NUM_ROWS}/instances={NUM_INSTANCES}" + )); + group.throughput(criterion::Throughput::Elements(NUM_ROWS as _)); + + match kind { + ArrayKind::Primitive => { + let data = build_some_instances(NUM_INSTANCES); + bench_arrow(&mut group, data.as_slice()); + bench_native(&mut group, data.as_slice()); + } + ArrayKind::Struct => { + let data = build_some_point2d(NUM_INSTANCES); + bench_arrow(&mut group, data.as_slice()); + bench_native(&mut group, data.as_slice()); + } + ArrayKind::StructLarge => { + let data = build_some_rects(NUM_INSTANCES); + bench_arrow(&mut group, data.as_slice()); + bench_native(&mut group, data.as_slice()); + } + } + } + + // TODO(cmc): Use cells once `cell.size_bytes()` has landed (#1727) + fn bench_arrow( + group: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>, + data: &[T], + ) { + let arrays: Vec> = (0..NUM_ROWS) + .map(|_| TryIntoArrow::try_into_arrow(data).unwrap()) + .collect_vec(); + + let total_size_bytes = arrays + .iter() + .map(|array| estimated_bytes_size(&**array) as u64) + .sum::(); + assert!(total_size_bytes as usize >= NUM_ROWS * NUM_INSTANCES * std::mem::size_of::()); + + group.bench_function("array", |b| { + b.iter(|| { + let sz = arrays + .iter() + .map(|array| estimated_bytes_size(&**array) as u64) + .sum::(); + assert_eq!(total_size_bytes, sz); + sz + }); + }); + } + + fn bench_native( + group: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>, + data: &[T], + ) { + let vecs = (0..NUM_ROWS).map(|_| data.to_vec()).collect_vec(); + + let total_size_bytes = vecs + .iter() + .map(|vec| std::mem::size_of_val(vec.as_slice()) as u64) + .sum::(); + assert!(total_size_bytes as usize >= NUM_ROWS * NUM_INSTANCES * std::mem::size_of::()); + + { + let vecs = (0..NUM_ROWS).map(|_| data.to_vec()).collect_vec(); + group.bench_function("vec", |b| { + b.iter(|| { + let sz = vecs + .iter() + .map(|vec| std::mem::size_of_val(vec.as_slice()) as u64) + .sum::(); + assert_eq!(total_size_bytes, sz); + sz + }); + }); + } + + trait SizeOf { + fn size_of(&self) -> usize; + } + + impl SizeOf for Vec { + fn size_of(&self) -> usize { + std::mem::size_of_val(self.as_slice()) + } + } + + { + let vecs: Vec> = (0..NUM_ROWS) + .map(|_| Box::new(data.to_vec()) as Box) + .collect_vec(); + + group.bench_function("vec/erased", |b| { + b.iter(|| { + let sz = vecs.iter().map(|vec| vec.size_of() as u64).sum::(); + assert_eq!(total_size_bytes, sz); + sz + }); + }); + } + } +} + +fn estimated_size_bytes(c: &mut Criterion) { + let kind = [ + ArrayKind::Primitive, + ArrayKind::Struct, + ArrayKind::StructLarge, + ]; + + for kind in kind { + let mut group = c.benchmark_group(format!( + "arrow2/erased_clone/{kind}/rows={NUM_ROWS}/instances={NUM_INSTANCES}" + )); + group.throughput(criterion::Throughput::Elements(NUM_ROWS as _)); + + fn generate_cells(kind: ArrayKind) -> Vec { + match kind { + ArrayKind::Primitive => (0..NUM_ROWS) + .map(|_| DataCell::from_native(build_some_instances(NUM_INSTANCES).as_slice())) + .collect(), + ArrayKind::Struct => (0..NUM_ROWS) + .map(|_| DataCell::from_native(build_some_point2d(NUM_INSTANCES).as_slice())) + .collect(), + ArrayKind::StructLarge => (0..NUM_ROWS) + .map(|_| DataCell::from_native(build_some_rects(NUM_INSTANCES).as_slice())) + .collect(), + } + } + + { + { + let cells = generate_cells(kind); + let total_instances = cells.iter().map(|cell| cell.num_instances()).sum::(); + assert_eq!(total_instances, (NUM_ROWS * NUM_INSTANCES) as u32); + + group.bench_function("cell/arc_erased", |b| { + b.iter(|| { + let cells = cells.clone(); + assert_eq!( + total_instances, + cells.iter().map(|cell| cell.num_instances()).sum::() + ); + cells + }); + }); + } + + { + let cells = generate_cells(kind).into_iter().map(Arc::new).collect_vec(); + let total_instances = cells.iter().map(|cell| cell.num_instances()).sum::(); + assert_eq!(total_instances, (NUM_ROWS * NUM_INSTANCES) as u32); + + group.bench_function("cell/wrapped_in_arc", |b| { + b.iter(|| { + let cells = cells.clone(); + assert_eq!( + total_instances, + cells.iter().map(|cell| cell.num_instances()).sum::() + ); + cells + }); + }); + } + + { + let cells = generate_cells(kind); + let arrays = cells.iter().map(|cell| cell.to_arrow()).collect_vec(); + let total_instances = arrays.iter().map(|array| array.len() as u32).sum::(); + assert_eq!(total_instances, (NUM_ROWS * NUM_INSTANCES) as u32); + + group.bench_function("array", |b| { + b.iter(|| { + let arrays = arrays.clone(); + assert_eq!( + total_instances, + arrays.iter().map(|array| array.len() as u32).sum::() + ); + arrays + }); + }); + } + + match kind { + ArrayKind::Primitive => { + bench_downcast_first::>(&mut group, kind); + } + ArrayKind::Struct => bench_downcast_first::(&mut group, kind), + ArrayKind::StructLarge => bench_downcast_first::(&mut group, kind), + } + + fn bench_downcast_first( + group: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>, + kind: ArrayKind, + ) { + let cells = generate_cells(kind); + let arrays = cells + .iter() + .map(|cell| { + cell.as_arrow_ref() + .as_any() + .downcast_ref::() + .unwrap() + .clone() + }) + .collect_vec(); + let total_instances = arrays.iter().map(|array| array.len() as u32).sum::(); + assert_eq!(total_instances, (NUM_ROWS * NUM_INSTANCES) as u32); + + group.bench_function("array/downcast_first", |b| { + b.iter(|| { + let arrays = arrays.clone(); + assert_eq!( + total_instances, + arrays.iter().map(|array| array.len() as u32).sum::() + ); + arrays + }); + }); + } + } + + { + fn generate_points() -> Vec> { + (0..NUM_ROWS) + .map(|_| build_some_point2d(NUM_INSTANCES)) + .collect() + } + + fn generate_keys() -> Vec> { + (0..NUM_ROWS) + .map(|_| build_some_instances(NUM_INSTANCES)) + .collect() + } + + fn generate_rects() -> Vec> { + (0..NUM_ROWS) + .map(|_| build_some_rects(NUM_INSTANCES)) + .collect() + } + + match kind { + ArrayKind::Primitive => bench_std(&mut group, generate_keys()), + ArrayKind::Struct => bench_std(&mut group, generate_points()), + ArrayKind::StructLarge => bench_std(&mut group, generate_rects()), + } + + fn bench_std( + group: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>, + data: Vec>, + ) { + { + let vecs = data.clone(); + let total_instances = vecs.iter().map(|vec| vec.len() as u32).sum::(); + assert_eq!(total_instances, (NUM_ROWS * NUM_INSTANCES) as u32); + + group.bench_function("vec/full_copy", |b| { + b.iter(|| { + let vecs = vecs.clone(); + assert_eq!( + total_instances, + vecs.iter().map(|vec| vec.len() as u32).sum::() + ); + vecs + }); + }); + } + + { + let vecs = data.into_iter().map(Arc::new).collect_vec(); + let total_instances = vecs.iter().map(|vec| vec.len() as u32).sum::(); + assert_eq!(total_instances, (NUM_ROWS * NUM_INSTANCES) as u32); + + group.bench_function("vec/wrapped_in_arc", |b| { + b.iter(|| { + let vecs = vecs.clone(); + assert_eq!( + total_instances, + vecs.iter().map(|vec| vec.len() as u32).sum::() + ); + vecs + }); + }); + } + } + } + } +} diff --git a/crates/re_arrow_store/benches/arrow2_convert.rs b/crates/re_arrow_store/benches/arrow2_convert.rs new file mode 100644 index 000000000000..e0d4ce95abdc --- /dev/null +++ b/crates/re_arrow_store/benches/arrow2_convert.rs @@ -0,0 +1,147 @@ +//! Keeping track of performance issues/regressions in `arrow2_convert` that directly affect us. + +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +use arrow2::{array::PrimitiveArray, datatypes::PhysicalType, types::PrimitiveType}; +use criterion::{criterion_group, Criterion}; +use re_log_types::{ + component_types::InstanceKey, external::arrow2_convert::deserialize::TryIntoCollection, + Component as _, DataCell, +}; + +// --- + +criterion_group!(benches, serialize, deserialize); + +#[cfg(not(feature = "core_benchmarks_only"))] +criterion::criterion_main!(benches); + +// Don't run these benchmarks on CI: they measure the performance of third-party libraries. +#[cfg(feature = "core_benchmarks_only")] +fn main() {} + +// --- + +#[cfg(not(debug_assertions))] +const NUM_INSTANCES: usize = 100_000; + +// `cargo test` also runs the benchmark setup code, so make sure they run quickly: +#[cfg(debug_assertions)] +const NUM_INSTANCES: usize = 1; + +// --- + +fn serialize(c: &mut Criterion) { + let mut group = c.benchmark_group(format!( + "arrow2_convert/serialize/primitive/instances={NUM_INSTANCES}" + )); + group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _)); + + { + group.bench_function("arrow2_convert", |b| { + b.iter(|| { + let cell = DataCell::from_component::(0..NUM_INSTANCES as u64); + assert_eq!(NUM_INSTANCES as u32, cell.num_instances()); + assert_eq!( + cell.datatype().to_physical_type(), + PhysicalType::Primitive(PrimitiveType::UInt64) + ); + cell + }); + }); + } + + { + group.bench_function("arrow2/from_values", |b| { + b.iter(|| { + let values = PrimitiveArray::from_values(0..NUM_INSTANCES as u64).boxed(); + let cell = crate::DataCell::from_arrow(InstanceKey::name(), values); + assert_eq!(NUM_INSTANCES as u32, cell.num_instances()); + assert_eq!( + cell.datatype().to_physical_type(), + PhysicalType::Primitive(PrimitiveType::UInt64) + ); + cell + }); + }); + } + + { + group.bench_function("arrow2/from_vec", |b| { + b.iter(|| { + // NOTE: We do the `collect()` here on purpose! + // + // All of these APIs have to allocate an array under the hood, except `from_vec` + // which is O(1) (it just unsafely reuses the vec's data pointer). + // We need to measure the collection in order to have a leveled playing field. + let values = PrimitiveArray::from_vec((0..NUM_INSTANCES as u64).collect()).boxed(); + let cell = crate::DataCell::from_arrow(InstanceKey::name(), values); + assert_eq!(NUM_INSTANCES as u32, cell.num_instances()); + assert_eq!( + cell.datatype().to_physical_type(), + PhysicalType::Primitive(PrimitiveType::UInt64) + ); + cell + }); + }); + } +} + +fn deserialize(c: &mut Criterion) { + let mut group = c.benchmark_group(format!( + "arrow2_convert/deserialize/primitive/instances={NUM_INSTANCES}" + )); + group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _)); + + let cell = DataCell::from_component::(0..NUM_INSTANCES as u64); + let data = cell.to_arrow(); + + { + group.bench_function("arrow2_convert", |b| { + b.iter(|| { + let keys: Vec = data.as_ref().try_into_collection().unwrap(); + assert_eq!(NUM_INSTANCES, keys.len()); + assert_eq!( + InstanceKey(NUM_INSTANCES as u64 / 2), + keys[NUM_INSTANCES / 2] + ); + keys + }); + }); + } + + { + group.bench_function("arrow2/validity_checks", |b| { + b.iter(|| { + let data = data.as_any().downcast_ref::>().unwrap(); + let keys: Vec = data + .into_iter() + .filter_map(|v| v.copied().map(InstanceKey)) + .collect(); + assert_eq!(NUM_INSTANCES, keys.len()); + assert_eq!( + InstanceKey(NUM_INSTANCES as u64 / 2), + keys[NUM_INSTANCES / 2] + ); + keys + }); + }); + } + + { + group.bench_function("arrow2/validity_bypass", |b| { + b.iter(|| { + let data = data.as_any().downcast_ref::>().unwrap(); + assert!(data.validity().is_none()); + let keys: Vec = data.values_iter().copied().map(InstanceKey).collect(); + assert_eq!(NUM_INSTANCES, keys.len()); + assert_eq!( + InstanceKey(NUM_INSTANCES as u64 / 2), + keys[NUM_INSTANCES / 2] + ); + keys + }); + }); + } +} diff --git a/crates/re_arrow_store/benches/data_store.rs b/crates/re_arrow_store/benches/data_store.rs index cb8517114743..6e9aeda7a922 100644 --- a/crates/re_arrow_store/benches/data_store.rs +++ b/crates/re_arrow_store/benches/data_store.rs @@ -1,17 +1,21 @@ #[global_allocator] static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; -use arrow2::array::{Array, UnionArray}; +use arrow2::array::UnionArray; use criterion::{criterion_group, criterion_main, Criterion}; -use re_arrow_store::{DataStore, DataStoreConfig, LatestAtQuery, RangeQuery, TimeInt, TimeRange}; +use re_arrow_store::{ + DataStore, DataStoreConfig, GarbageCollectionTarget, LatestAtQuery, RangeQuery, TimeInt, + TimeRange, +}; use re_log_types::{ component_types::{InstanceKey, Rect2D}, datagen::{build_frame_nr, build_some_instances, build_some_rects}, - Component as _, ComponentName, DataRow, DataTable, EntityPath, MsgId, TimeType, Timeline, + Component as _, ComponentName, DataCell, DataRow, DataTable, EntityPath, RowId, TableId, + TimeType, Timeline, }; -criterion_group!(benches, insert, latest_at, latest_at_missing, range); +criterion_group!(benches, insert, latest_at, latest_at_missing, range, gc); criterion_main!(benches); // --- @@ -27,10 +31,32 @@ const NUM_ROWS: i64 = 1; #[cfg(debug_assertions)] const NUM_INSTANCES: i64 = 1; +fn packed() -> &'static [bool] { + #[cfg(feature = "core_benchmarks_only")] + { + &[false] + } + #[cfg(not(feature = "core_benchmarks_only"))] + { + &[false, true] + } +} + +fn num_rows_per_bucket() -> &'static [u64] { + #[cfg(feature = "core_benchmarks_only")] + { + &[] + } + #[cfg(not(feature = "core_benchmarks_only"))] + { + &[0, 2, 32, 2048] + } +} + // --- Benchmarks --- fn insert(c: &mut Criterion) { - for packed in [false, true] { + for &packed in packed() { let mut group = c.benchmark_group(format!( "datastore/num_rows={NUM_ROWS}/num_instances={NUM_INSTANCES}/packed={packed}/insert" )); @@ -45,17 +71,13 @@ fn insert(c: &mut Criterion) { b.iter(|| insert_table(Default::default(), InstanceKey::name(), &table)); }); - // Emulate more or less buckets - let num_rows_per_bucket = [0, 2, 32, 2048]; - for num_rows_per_bucket in num_rows_per_bucket { + // Emulate more or less bucket + for &num_rows_per_bucket in num_rows_per_bucket() { group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { b.iter(|| { insert_table( DataStoreConfig { - index_bucket_nb_rows: num_rows_per_bucket, - component_bucket_nb_rows: num_rows_per_bucket, - index_bucket_size_bytes: u64::MAX, - component_bucket_size_bytes: u64::MAX, + indexed_bucket_num_rows: num_rows_per_bucket, ..Default::default() }, InstanceKey::name(), @@ -68,7 +90,7 @@ fn insert(c: &mut Criterion) { } fn latest_at(c: &mut Criterion) { - for packed in [false, true] { + for &packed in packed() { let mut group = c.benchmark_group(format!( "datastore/num_rows={NUM_ROWS}/num_instances={NUM_INSTANCES}/packed={packed}/latest_at" )); @@ -80,10 +102,11 @@ fn latest_at(c: &mut Criterion) { group.bench_function("default", |b| { let store = insert_table(Default::default(), InstanceKey::name(), &table); b.iter(|| { - let results = latest_data_at(&store, Rect2D::name(), &[Rect2D::name()]); - let rects = results[0] + let cells = latest_data_at(&store, Rect2D::name(), &[Rect2D::name()]); + let rects = cells[0] .as_ref() .unwrap() + .as_arrow_ref() .as_any() .downcast_ref::() .unwrap(); @@ -92,14 +115,10 @@ fn latest_at(c: &mut Criterion) { }); // Emulate more or less buckets - let num_rows_per_bucket = [0, 2, 32, 2048]; - for num_rows_per_bucket in num_rows_per_bucket { + for &num_rows_per_bucket in num_rows_per_bucket() { let store = insert_table( DataStoreConfig { - index_bucket_nb_rows: num_rows_per_bucket, - component_bucket_nb_rows: num_rows_per_bucket, - index_bucket_size_bytes: u64::MAX, - component_bucket_size_bytes: u64::MAX, + indexed_bucket_num_rows: num_rows_per_bucket, ..Default::default() }, InstanceKey::name(), @@ -107,10 +126,11 @@ fn latest_at(c: &mut Criterion) { ); group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { b.iter(|| { - let results = latest_data_at(&store, Rect2D::name(), &[Rect2D::name()]); - let rects = results[0] + let cells = latest_data_at(&store, Rect2D::name(), &[Rect2D::name()]); + let rects = cells[0] .as_ref() .unwrap() + .as_arrow_ref() .as_any() .downcast_ref::() .unwrap(); @@ -122,7 +142,7 @@ fn latest_at(c: &mut Criterion) { } fn latest_at_missing(c: &mut Criterion) { - for packed in [false, true] { + for &packed in packed() { let mut group = c.benchmark_group(format!( "datastore/num_rows={NUM_ROWS}/num_instances={NUM_INSTANCES}/packed={packed}/latest_at_missing" )); @@ -157,14 +177,10 @@ fn latest_at_missing(c: &mut Criterion) { }); // Emulate more or less buckets - let num_rows_per_bucket = [0, 2, 32, 2048]; - for num_rows_per_bucket in num_rows_per_bucket { + for &num_rows_per_bucket in num_rows_per_bucket() { let store = insert_table( DataStoreConfig { - index_bucket_nb_rows: num_rows_per_bucket, - component_bucket_nb_rows: num_rows_per_bucket, - index_bucket_size_bytes: u64::MAX, - component_bucket_size_bytes: u64::MAX, + indexed_bucket_num_rows: num_rows_per_bucket, ..Default::default() }, InstanceKey::name(), @@ -198,7 +214,7 @@ fn latest_at_missing(c: &mut Criterion) { } fn range(c: &mut Criterion) { - for packed in [false, true] { + for &packed in packed() { let mut group = c.benchmark_group(format!( "datastore/num_rows={NUM_ROWS}/num_instances={NUM_INSTANCES}/packed={packed}/range" )); @@ -214,14 +230,10 @@ fn range(c: &mut Criterion) { }); // Emulate more or less buckets - let num_rows_per_bucket = [0, 2, 32, 2048]; - for num_rows_per_bucket in num_rows_per_bucket { + for &num_rows_per_bucket in num_rows_per_bucket() { let store = insert_table( DataStoreConfig { - index_bucket_nb_rows: num_rows_per_bucket, - component_bucket_nb_rows: num_rows_per_bucket, - index_bucket_size_bytes: u64::MAX, - component_bucket_size_bytes: u64::MAX, + indexed_bucket_num_rows: num_rows_per_bucket, ..Default::default() }, InstanceKey::name(), @@ -229,14 +241,15 @@ fn range(c: &mut Criterion) { ); group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { b.iter(|| { - let msgs = range_data(&store, [Rect2D::name()]); - for (cur_time, (time, results)) in msgs.enumerate() { + let rows = range_data(&store, [Rect2D::name()]); + for (cur_time, (time, cells)) in rows.enumerate() { let time = time.unwrap(); assert_eq!(cur_time as i64, time.as_i64()); - let rects = results[0] + let rects = cells[0] .as_ref() .unwrap() + .as_arrow_ref() .as_any() .downcast_ref::() .unwrap(); @@ -248,14 +261,56 @@ fn range(c: &mut Criterion) { } } +fn gc(c: &mut Criterion) { + let mut group = c.benchmark_group(format!( + "datastore/num_rows={NUM_ROWS}/num_instances={NUM_INSTANCES}/gc" + )); + group.throughput(criterion::Throughput::Elements( + (NUM_INSTANCES * NUM_ROWS) as _, + )); + + let mut table = build_table(NUM_INSTANCES as usize, false); + table.compute_all_size_bytes(); + + // Default config + group.bench_function("default", |b| { + let store = insert_table(Default::default(), InstanceKey::name(), &table); + b.iter(|| { + let mut store = store.clone(); + let (_, stats_diff) = store.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0 / 3.0)); + stats_diff + }); + }); + + // Emulate more or less bucket + for &num_rows_per_bucket in num_rows_per_bucket() { + group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { + let store = insert_table( + DataStoreConfig { + indexed_bucket_num_rows: num_rows_per_bucket, + ..Default::default() + }, + InstanceKey::name(), + &table, + ); + b.iter(|| { + let mut store = store.clone(); + let (_, stats_diff) = + store.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0 / 3.0)); + stats_diff + }); + }); + } +} + // --- Helpers --- fn build_table(n: usize, packed: bool) -> DataTable { let mut table = DataTable::from_rows( - MsgId::ZERO, + TableId::ZERO, (0..NUM_ROWS).map(move |frame_idx| { DataRow::from_cells2( - MsgId::random(), + RowId::random(), "rects", [build_frame_nr(frame_idx.into())], n as _, @@ -267,7 +322,7 @@ fn build_table(n: usize, packed: bool) -> DataTable { // Do a serialization roundtrip to pack everything in contiguous memory. if packed { let (schema, columns) = table.serialize().unwrap(); - table = DataTable::deserialize(MsgId::ZERO, &schema, &columns).unwrap(); + table = DataTable::deserialize(TableId::ZERO, &schema, &columns).unwrap(); } table @@ -287,26 +342,25 @@ fn latest_data_at( store: &DataStore, primary: ComponentName, secondaries: &[ComponentName; N], -) -> [Option>; N] { +) -> [Option; N] { let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); let timeline_query = LatestAtQuery::new(timeline_frame_nr, (NUM_ROWS / 2).into()); let ent_path = EntityPath::from("rects"); - let row_indices = store + store .latest_at(&timeline_query, &ent_path, primary, secondaries) - .unwrap_or_else(|| [(); N].map(|_| None)); - store.get(secondaries, &row_indices) + .map_or_else(|| [(); N].map(|_| None), |(_, cells)| cells) } fn range_data( store: &DataStore, components: [ComponentName; N], -) -> impl Iterator, [Option>; N])> + '_ { +) -> impl Iterator, [Option; N])> + '_ { let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); let query = RangeQuery::new(timeline_frame_nr, TimeRange::new(0.into(), NUM_ROWS.into())); let ent_path = EntityPath::from("rects"); store .range(&query, &ent_path, components) - .map(move |(time, _, row_indices)| (time, store.get(&components, &row_indices))) + .map(move |(time, _, cells)| (time, cells)) } diff --git a/crates/re_arrow_store/benches/vectors.rs b/crates/re_arrow_store/benches/vectors.rs new file mode 100644 index 000000000000..9bb175d6bbda --- /dev/null +++ b/crates/re_arrow_store/benches/vectors.rs @@ -0,0 +1,337 @@ +//! Keeping track of performance issues/regressions for common vector operations. + +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +use criterion::{criterion_group, Criterion}; + +use smallvec::SmallVec; +use tinyvec::TinyVec; + +// --- + +criterion_group!(benches, sort, split, swap, swap_opt); + +#[cfg(not(feature = "core_benchmarks_only"))] +criterion::criterion_main!(benches); + +// Don't run these benchmarks on CI: they measure the performance of third-party libraries. +#[cfg(feature = "core_benchmarks_only")] +fn main() {} + +// --- + +#[cfg(not(debug_assertions))] +const NUM_INSTANCES: usize = 10_000; +#[cfg(not(debug_assertions))] +const SMALLVEC_SIZE: usize = 4; + +// `cargo test` also runs the benchmark setup code, so make sure they run quickly: +#[cfg(debug_assertions)] +const NUM_INSTANCES: usize = 1; +#[cfg(debug_assertions)] +const SMALLVEC_SIZE: usize = 1; + +// --- Benchmarks --- + +fn split(c: &mut Criterion) { + let mut group = c.benchmark_group(format!("vector_ops/split_off/instances={NUM_INSTANCES}")); + group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _)); + + { + fn split_off( + data: &mut SmallVec<[T; N]>, + split_idx: usize, + ) -> SmallVec<[T; N]> { + if split_idx >= data.len() { + return SmallVec::default(); + } + + let second_half = SmallVec::from_slice(&data[split_idx..]); + data.truncate(split_idx); + second_half + } + + let data: SmallVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).collect(); + + group.bench_function(format!("smallvec/n={SMALLVEC_SIZE}/manual"), |b| { + b.iter(|| { + let mut data = data.clone(); + let second_half = split_off(&mut data, NUM_INSTANCES / 2); + assert_eq!(NUM_INSTANCES, data.len() + second_half.len()); + assert_eq!(NUM_INSTANCES as i64 / 2, second_half[0]); + (data, second_half) + }); + }); + } + + { + let data: TinyVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).collect(); + + group.bench_function(format!("tinyvec/n={SMALLVEC_SIZE}"), |b| { + b.iter(|| { + let mut data = data.clone(); + let second_half = data.split_off(NUM_INSTANCES / 2); + assert_eq!(NUM_INSTANCES, data.len() + second_half.len()); + assert_eq!(NUM_INSTANCES as i64 / 2, second_half[0]); + (data, second_half) + }); + }); + } + + { + fn split_off( + data: &mut TinyVec<[T; N]>, + split_idx: usize, + ) -> TinyVec<[T; N]> { + if split_idx >= data.len() { + return TinyVec::default(); + } + + let second_half = TinyVec::from(&data[split_idx..]); + data.truncate(split_idx); + second_half + } + + let data: TinyVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).collect(); + + group.bench_function(format!("tinyvec/n={SMALLVEC_SIZE}/manual"), |b| { + b.iter(|| { + let mut data = data.clone(); + let second_half = split_off(&mut data, NUM_INSTANCES / 2); + assert_eq!(NUM_INSTANCES, data.len() + second_half.len()); + assert_eq!(NUM_INSTANCES as i64 / 2, second_half[0]); + (data, second_half) + }); + }); + } + + { + let data: Vec = (0..NUM_INSTANCES as i64).collect(); + + group.bench_function("vec", |b| { + b.iter(|| { + let mut data = data.clone(); + let second_half = data.split_off(NUM_INSTANCES / 2); + assert_eq!(NUM_INSTANCES, data.len() + second_half.len()); + assert_eq!(NUM_INSTANCES as i64 / 2, second_half[0]); + (data, second_half) + }); + }); + } + + { + fn split_off(data: &mut Vec, split_idx: usize) -> Vec { + if split_idx >= data.len() { + return Vec::default(); + } + + let second_half = Vec::from(&data[split_idx..]); + data.truncate(split_idx); + second_half + } + + let data: Vec = (0..NUM_INSTANCES as i64).collect(); + + group.bench_function("vec/manual", |b| { + b.iter(|| { + let mut data = data.clone(); + let second_half = split_off(&mut data, NUM_INSTANCES / 2); + assert_eq!(NUM_INSTANCES, data.len() + second_half.len()); + assert_eq!(NUM_INSTANCES as i64 / 2, second_half[0]); + (data, second_half) + }); + }); + } +} + +fn sort(c: &mut Criterion) { + let mut group = c.benchmark_group(format!("vector_ops/sort/instances={NUM_INSTANCES}")); + group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _)); + + { + let data: SmallVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).rev().collect(); + + group.bench_function(format!("smallvec/n={SMALLVEC_SIZE}"), |b| { + b.iter(|| { + let mut data = data.clone(); + data.sort_unstable(); + assert_eq!(NUM_INSTANCES, data.len()); + assert_eq!(NUM_INSTANCES as i64 / 2, data[NUM_INSTANCES / 2]); + data + }); + }); + } + + { + let data: TinyVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).rev().collect(); + + group.bench_function(format!("tinyvec/n={SMALLVEC_SIZE}"), |b| { + b.iter(|| { + let mut data = data.clone(); + data.sort_unstable(); + assert_eq!(NUM_INSTANCES, data.len()); + assert_eq!(NUM_INSTANCES as i64 / 2, data[NUM_INSTANCES / 2]); + data + }); + }); + } + + { + let data: Vec = (0..NUM_INSTANCES as i64).rev().collect(); + + group.bench_function("vec", |b| { + b.iter(|| { + let mut data = data.clone(); + data.sort_unstable(); + assert_eq!(NUM_INSTANCES, data.len()); + assert_eq!(NUM_INSTANCES as i64 / 2, data[NUM_INSTANCES / 2]); + data + }); + }); + } +} + +fn swap(c: &mut Criterion) { + let mut group = c.benchmark_group(format!("vector_ops/swap/instances={NUM_INSTANCES}")); + group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _)); + + { + let data: SmallVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).collect(); + let swaps: SmallVec<[usize; SMALLVEC_SIZE]> = (0..NUM_INSTANCES).rev().collect(); + + group.bench_function(format!("smallvec/n={SMALLVEC_SIZE}"), |b| { + b.iter(|| { + let mut data1 = data.clone(); + let data2 = data.clone(); + for &swap in &swaps { + data1[NUM_INSTANCES - swap - 1] = data2[swap]; + } + assert_eq!(NUM_INSTANCES, data1.len()); + assert_eq!(NUM_INSTANCES, data2.len()); + assert_eq!( + (NUM_INSTANCES as i64 / 2).max(1) - 1, + data1[NUM_INSTANCES / 2] + ); + (data1, data2) + }); + }); + } + + { + let data: TinyVec<[i64; SMALLVEC_SIZE]> = (0..NUM_INSTANCES as i64).collect(); + let swaps: TinyVec<[usize; SMALLVEC_SIZE]> = (0..NUM_INSTANCES).rev().collect(); + + group.bench_function(format!("tinyvec/n={SMALLVEC_SIZE}"), |b| { + b.iter(|| { + let mut data1 = data.clone(); + let data2 = data.clone(); + for &swap in &swaps { + data1[NUM_INSTANCES - swap - 1] = data2[swap]; + } + assert_eq!(NUM_INSTANCES, data1.len()); + assert_eq!(NUM_INSTANCES, data2.len()); + assert_eq!( + (NUM_INSTANCES as i64 / 2).max(1) - 1, + data1[NUM_INSTANCES / 2] + ); + (data1, data2) + }); + }); + } + + { + let data: Vec = (0..NUM_INSTANCES as i64).collect(); + let swaps: Vec = (0..NUM_INSTANCES).rev().collect(); + + group.bench_function("vec", |b| { + b.iter(|| { + let mut data1 = data.clone(); + let data2 = data.clone(); + for &swap in &swaps { + data1[NUM_INSTANCES - swap - 1] = data2[swap]; + } + assert_eq!(NUM_INSTANCES, data1.len()); + assert_eq!(NUM_INSTANCES, data2.len()); + assert_eq!( + (NUM_INSTANCES as i64 / 2).max(1) - 1, + data1[NUM_INSTANCES / 2] + ); + (data1, data2) + }); + }); + } +} + +fn swap_opt(c: &mut Criterion) { + let mut group = c.benchmark_group(format!("vector_ops/swap_opt/instances={NUM_INSTANCES}")); + group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _)); + + { + let data: SmallVec<[Option; SMALLVEC_SIZE]> = + (0..NUM_INSTANCES as i64).map(Some).collect(); + let swaps: SmallVec<[usize; SMALLVEC_SIZE]> = (0..NUM_INSTANCES).rev().collect(); + + group.bench_function(format!("smallvec/n={SMALLVEC_SIZE}"), |b| { + b.iter(|| { + let mut data1 = data.clone(); + let mut data2 = data.clone(); + for &swap in &swaps { + data1[NUM_INSTANCES - swap - 1] = data2[swap].take(); + } + assert_eq!(NUM_INSTANCES, data1.len()); + assert_eq!(NUM_INSTANCES, data2.len()); + assert_eq!( + Some((NUM_INSTANCES as i64 / 2).max(1) - 1), + data1[NUM_INSTANCES / 2] + ); + (data1, data2) + }); + }); + } + + { + let data: TinyVec<[Option; SMALLVEC_SIZE]> = + (0..NUM_INSTANCES as i64).map(Some).collect(); + let swaps: TinyVec<[usize; SMALLVEC_SIZE]> = (0..NUM_INSTANCES).rev().collect(); + + group.bench_function(format!("tinyvec/n={SMALLVEC_SIZE}"), |b| { + b.iter(|| { + let mut data1 = data.clone(); + let mut data2 = data.clone(); + for &swap in &swaps { + data1[NUM_INSTANCES - swap - 1] = data2[swap].take(); + } + assert_eq!(NUM_INSTANCES, data1.len()); + assert_eq!(NUM_INSTANCES, data2.len()); + assert_eq!( + Some((NUM_INSTANCES as i64 / 2).max(1) - 1), + data1[NUM_INSTANCES / 2] + ); + (data1, data2) + }); + }); + } + + { + let data: Vec> = (0..NUM_INSTANCES as i64).map(Some).collect(); + let swaps: Vec = (0..NUM_INSTANCES).rev().collect(); + + group.bench_function("vec", |b| { + b.iter(|| { + let mut data1 = data.clone(); + let mut data2 = data.clone(); + for &swap in &swaps { + data1[NUM_INSTANCES - swap - 1] = data2[swap].take(); + } + assert_eq!(NUM_INSTANCES, data1.len()); + assert_eq!(NUM_INSTANCES, data2.len()); + assert_eq!( + Some((NUM_INSTANCES as i64 / 2).max(1) - 1), + data1[NUM_INSTANCES / 2] + ); + (data1, data2) + }); + }); + } +} diff --git a/crates/re_arrow_store/src/arrow_util.rs b/crates/re_arrow_store/src/arrow_util.rs index d54d08fd26b4..ef119bd51b31 100644 --- a/crates/re_arrow_store/src/arrow_util.rs +++ b/crates/re_arrow_store/src/arrow_util.rs @@ -51,7 +51,14 @@ impl ArrayExt for dyn Array { /// /// Nested types are expanded and cleaned recursively fn clean_for_polars(&self) -> Box { - match self.data_type() { + let datatype = self.data_type(); + let datatype = if let DataType::Extension(_, inner, _) = datatype { + (**inner).clone() + } else { + datatype.clone() + }; + + match &datatype { DataType::List(field) => { // Recursively clean the contents let typed_arr = self.as_any().downcast_ref::>().unwrap(); diff --git a/crates/re_arrow_store/src/lib.rs b/crates/re_arrow_store/src/lib.rs index 3f2e5454caaa..0ac2be79a28e 100644 --- a/crates/re_arrow_store/src/lib.rs +++ b/crates/re_arrow_store/src/lib.rs @@ -16,6 +16,8 @@ mod arrow_util; mod store; +mod store_arrow; +mod store_dump; mod store_format; mod store_gc; mod store_read; @@ -33,17 +35,15 @@ pub mod polars_util; pub mod test_util; pub use self::arrow_util::ArrayExt; -pub use self::store::{ - DataStore, DataStoreConfig, IndexBucket, IndexRowNr, IndexTable, RowIndex, RowIndexKind, -}; +pub use self::store::{DataStore, DataStoreConfig}; pub use self::store_gc::GarbageCollectionTarget; pub use self::store_read::{LatestAtQuery, RangeQuery}; -pub use self::store_stats::DataStoreStats; +pub use self::store_stats::{DataStoreRowStats, DataStoreStats}; pub use self::store_write::{WriteError, WriteResult}; pub(crate) use self::store::{ - ComponentBucket, ComponentTable, IndexBucketIndices, PersistentComponentTable, - PersistentIndexTable, SecondaryIndex, TimeIndex, + ClusterCellCache, DataTypeRegistry, IndexedBucket, IndexedBucketInner, IndexedTable, + MetadataRegistry, PersistentIndexedTable, }; // Re-exports diff --git a/crates/re_arrow_store/src/polars_util.rs b/crates/re_arrow_store/src/polars_util.rs index d98a6c59e081..4e4beabbb69c 100644 --- a/crates/re_arrow_store/src/polars_util.rs +++ b/crates/re_arrow_store/src/polars_util.rs @@ -1,8 +1,7 @@ -use arrow2::array::Array; use itertools::Itertools; use polars_core::{prelude::*, series::Series}; use polars_ops::prelude::*; -use re_log_types::{ComponentName, EntityPath, TimeInt}; +use re_log_types::{ComponentName, DataCell, EntityPath, RowId, TimeInt}; use crate::{ArrayExt, DataStore, LatestAtQuery, RangeQuery}; @@ -38,12 +37,11 @@ pub fn latest_component( let cluster_key = store.cluster_key(); let components = &[cluster_key, primary]; - let row_indices = store + let (_, cells) = store .latest_at(query, ent_path, primary, components) - .unwrap_or([None; 2]); - let results = store.get(components, &row_indices); + .unwrap_or((RowId::ZERO, [(); 2].map(|_| None))); - dataframe_from_results(components, results) + dataframe_from_cells(&cells) } /// Queries any number of components and their cluster keys from their respective point-of-views, @@ -161,12 +159,11 @@ pub fn range_components<'a, const N: usize>( .chain( store .range(query, ent_path, components) - .map(move |(time, _, row_indices)| { - let results = store.get(&components, &row_indices); + .map(move |(time, _, cells)| { ( time, - row_indices[primary_col].is_some(), // is_primary - dataframe_from_results(&components, results), + cells[primary_col].is_some(), // is_primary + dataframe_from_cells(&cells), ) }), ) @@ -200,16 +197,19 @@ pub fn range_components<'a, const N: usize>( // --- Joins --- -pub fn dataframe_from_results( - components: &[ComponentName; N], - results: [Option>; N], +// TODO(#1619): none of this mess should be here + +pub fn dataframe_from_cells( + cells: &[Option; N], ) -> SharedResult { - let series: Result, _> = components + let series: Result, _> = cells .iter() - .zip(results) - .filter_map(|(component, col)| col.map(|col| (component, col))) - .map(|(&component, col)| { - Series::try_from((component.as_str(), col.as_ref().clean_for_polars())) + .flatten() + .map(|cell| { + Series::try_from(( + cell.component_name().as_str(), + cell.as_arrow_ref().clean_for_polars(), + )) }) .collect(); diff --git a/crates/re_arrow_store/src/store.rs b/crates/re_arrow_store/src/store.rs index 4d92abdd80a7..1edffd96b7c2 100644 --- a/crates/re_arrow_store/src/store.rs +++ b/crates/re_arrow_store/src/store.rs @@ -1,156 +1,38 @@ -use std::collections::{BTreeMap, HashMap, VecDeque}; -use std::num::NonZeroU64; +use std::collections::BTreeMap; use std::sync::atomic::AtomicU64; -use arrow2::array::{Array, Int64Array}; -use arrow2::datatypes::{DataType, TimeUnit}; +use ahash::HashMap; +use arrow2::datatypes::DataType; +use smallvec::SmallVec; use nohash_hasher::{IntMap, IntSet}; use parking_lot::RwLock; use re_log_types::{ - ComponentName, EntityPath, EntityPathHash, MsgId, TimeInt, TimePoint, TimeRange, Timeline, + ComponentName, DataCell, DataCellColumn, EntityPath, EntityPathHash, ErasedTimeVec, + NumInstancesVec, RowId, RowIdVec, SizeBytes, TimeInt, TimePoint, TimeRange, Timeline, }; -// --- Indices & offsets --- - -/// A vector of times. Our primary column, always densely filled. -pub type TimeIndex = Vec; - -/// A vector of references into the component tables. None = null. -// TODO(cmc): keeping a separate validity might be a better option, maybe. -pub type SecondaryIndex = Vec>; -static_assertions::assert_eq_size!(u64, Option); - -// TODO(#639): We desperately need to work on the terminology here: -// -// - `TimeIndex` is a vector of `TimeInt`s. -// It's the primary column and it's always dense. -// It's used to search the datastore by time. -// -// - `ComponentIndex` (currently `SecondaryIndex`) is a vector of `ComponentRowNr`s. -// It's the secondary column and is sparse. -// It's used to search the datastore by component once the search by time is complete. -// -// - `ComponentRowNr` (currently `RowIndex`) is a row offset into a component table. -// It only makes sense when associated with a component name. -// It is absolute. -// It's used to fetch actual data from the datastore. -// -// - `IndexRowNr` is a row offset into an index bucket. -// It only makes sense when associated with an entity path and a specific time. -// It is relative per bucket. -// It's used to tiebreak results with an identical time, should you need too. - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -#[repr(u64)] -pub enum RowIndexKind { - Temporal = 0, - Timeless = 1, -} - -/// An opaque type that directly refers to a row of data within the datastore, iff it is -/// associated with a component name. -/// -/// See [`DataStore::latest_at`], [`DataStore::range`] & [`DataStore::get`]. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub struct RowIndex(pub(crate) NonZeroU64); - -impl RowIndex { - const KIND_MASK: u64 = 0x8000_0000_0000_0000; - - /// Panics if `v` is 0. - /// In debug, panics if `v` has its most significant bit set. - pub(crate) fn from_u63(kind: RowIndexKind, v: u64) -> Self { - debug_assert!(v & Self::KIND_MASK == 0); - - let v = v | ((kind as u64) << 63); - Self(v.try_into().unwrap()) - } - - pub(crate) fn as_u64(self) -> u64 { - self.0.get() & !Self::KIND_MASK - } - - pub(crate) fn kind(self) -> RowIndexKind { - match self.0.get() & Self::KIND_MASK > 0 { - false => RowIndexKind::Temporal, - true => RowIndexKind::Timeless, - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub struct IndexRowNr(pub(crate) u64); - // --- Data store --- -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct DataStoreConfig { - /// The maximum size of a component bucket before triggering a split. + /// The maximum number of rows in an indexed bucket before triggering a split. /// Does not apply to timeless data. /// - /// ⚠ When configuring this threshold, do keep in mind that component tables are shared - /// across all timelines and all entities! - /// - /// This effectively controls how fine grained the garbage collection of components is. - /// The lower the size, the more fine-grained the garbage collection is, at the cost of more - /// metadata overhead. - /// - /// Note that this cannot split a single huge row: if a user inserts a single row that's - /// larger than the threshold, then that bucket will become larger than the threshold, and - /// we will split from there on. - /// - /// See [`Self::DEFAULT`] for defaults. - pub component_bucket_size_bytes: u64, - - /// The maximum number of rows in a component bucket before triggering a split. - /// Does not apply to timeless data. - /// - /// ⚠ When configuring this threshold, do keep in mind that component tables are shared - /// across all timelines and all entities! - /// - /// This effectively controls how fine grained the garbage collection of components is. - /// The lower the number, the more fine-grained the garbage collection is, at the cost of more - /// metadata overhead. - /// - /// Note: since component buckets aren't sorted, the number of rows isn't necessarily a great - /// metric to use as a threshold, although we do expose it if only for symmetry. - /// Prefer using [`Self::component_bucket_size_bytes`], or both. - /// - /// See [`Self::DEFAULT`] for defaults. - pub component_bucket_nb_rows: u64, - - /// The maximum size of an index bucket before triggering a split. - /// Does not apply to timeless data. - /// - /// ⚠ When configuring this threshold, do keep in mind that index tables are always scoped + /// ⚠ When configuring this threshold, do keep in mind that indexed tables are always scoped /// to a specific timeline _and_ a specific entity. /// - /// This effectively controls two aspects of the runtime: - /// - how fine grained the garbage collection of indices is, - /// - and how many rows will have to be sorted in the worst case when an index gets out - /// of order. - /// The lower the size, the more fine-grained the garbage collection is and smaller the - /// number of rows to sort gets, at the cost of more metadata overhead. + /// This effectively puts an upper bound on the number of rows that need to be sorted when an + /// indexed bucket gets out of order (e.g. because of new insertions or a GC pass). + /// This is a tradeoff: less rows means faster sorts at the cost of more metadata overhead. + /// In particular: + /// - Query performance scales inversely logarithmically to this number (i.e. it gets better + /// the higher this number gets). + /// - GC performance scales quadratically with this number (i.e. it gets better the lower this + /// number gets). /// /// See [`Self::DEFAULT`] for defaults. - pub index_bucket_size_bytes: u64, - - /// The maximum number of rows in an index bucket before triggering a split. - /// Does not apply to timeless data. - /// - /// ⚠ When configuring this threshold, do keep in mind that index tables are always scoped - /// to a specific timeline _and_ a specific entity. - /// - /// This effectively controls two aspects of the runtime: - /// - how fine grained the garbage collection of indices is, - /// - and how many rows will have to be sorted in the worst case when an index gets out - /// of order. - /// The lower the size, the more fine-grained the garbage collection is and smaller the - /// number of rows to sort gets, at the cost of more metadata overhead. - /// - /// See [`Self::DEFAULT`] for defaults. - pub index_bucket_nb_rows: u64, + pub indexed_bucket_num_rows: u64, /// If enabled, will store the ID of the write request alongside the inserted data. /// @@ -158,15 +40,17 @@ pub struct DataStoreConfig { /// `u64` value stored per row. /// /// Enabled by default in debug builds. - /// - /// See [`DataStore::insert_id_key`]. pub store_insert_ids: bool, - /// Should soon-to-be inactive buckets be compacted before being archived? - pub enable_compaction: bool, + /// If enabled, the store will throw an error if and when it notices that a single component + /// type maps to more than one arrow datatype. + /// + /// Enabled by default in debug builds. + pub enable_typecheck: bool, } impl Default for DataStoreConfig { + #[inline] fn default() -> Self { Self::DEFAULT } @@ -174,21 +58,103 @@ impl Default for DataStoreConfig { impl DataStoreConfig { pub const DEFAULT: Self = Self { - component_bucket_size_bytes: 32 * 1024 * 1024, // 32MiB - component_bucket_nb_rows: u64::MAX, - index_bucket_size_bytes: 32 * 1024, // 32kiB - index_bucket_nb_rows: 1024, - store_insert_ids: cfg!(debug_assertions), - // TODO(cmc): Compaction is disabled until we implement batching. - // See https://github.com/rerun-io/rerun/pull/1535 for rationale. + // NOTE: Empirical testing has shown that 512 is a good balance between sorting + // and binary search costs with the current GC implementation. // - // This has no noticeable impact on performance. - enable_compaction: false, + // Garbage collection costs are entirely driven by the number of buckets around, the size + // of the data itself has no impact. + indexed_bucket_num_rows: 512, + store_insert_ids: cfg!(debug_assertions), + enable_typecheck: cfg!(debug_assertions), }; } // --- +pub type InsertIdVec = SmallVec<[u64; 4]>; + +/// Keeps track of datatype information for all component types that have been written to the store +/// so far. +/// +/// See also [`DataStore::lookup_datatype`]. +#[derive(Debug, Default, Clone)] +pub struct DataTypeRegistry(pub IntMap); + +impl std::ops::Deref for DataTypeRegistry { + type Target = IntMap; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::DerefMut for DataTypeRegistry { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +/// Keeps track of arbitrary per-row metadata. +#[derive(Debug, Clone)] +pub struct MetadataRegistry { + pub registry: BTreeMap, + + /// Cached heap size, because the registry gets very, very large. + pub heap_size_bytes: u64, +} + +impl Default for MetadataRegistry { + fn default() -> Self { + let mut this = Self { + registry: Default::default(), + heap_size_bytes: 0, + }; + this.heap_size_bytes = this.heap_size_bytes(); // likely zero, just future proofing + this + } +} + +impl std::ops::Deref for MetadataRegistry { + type Target = BTreeMap; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.registry + } +} + +impl std::ops::DerefMut for MetadataRegistry { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.registry + } +} + +/// Used to cache auto-generated cluster cells (`[0]`, `[0, 1]`, `[0, 1, 2]`, ...) so that they +/// can be properly deduplicated on insertion. +#[derive(Debug, Default, Clone)] +pub struct ClusterCellCache(pub IntMap); + +impl std::ops::Deref for ClusterCellCache { + type Target = IntMap; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::DerefMut for ClusterCellCache { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +// --- + /// A complete data store: covers all timelines, all entities, everything. /// /// ## Debugging @@ -218,34 +184,30 @@ pub struct DataStore { /// The configuration of the data store (e.g. bucket sizes). pub(crate) config: DataStoreConfig, - /// Maps `MsgId`s to some metadata (just timepoints at the moment). + /// Keeps track of datatype information for all component types that have been written to + /// the store so far. /// - /// `BTreeMap` because of garbage collection. - pub(crate) messages: BTreeMap, - - /// Used to cache auto-generated cluster components, i.e. `[0]`, `[0, 1]`, `[0, 1, 2]`, etc - /// so that they can be properly deduplicated. - pub(crate) cluster_comp_cache: IntMap, + /// See also [`Self::lookup_datatype`]. + pub(crate) type_registry: DataTypeRegistry, - /// Dedicated index tables for timeless data. Never garbage collected. + /// Keeps track of arbitrary per-row metadata. /// - /// See also `Self::indices`. - pub(crate) timeless_indices: IntMap, + /// Only used to map `RowId`s to their original [`TimePoint`]s at the moment. + pub(crate) metadata_registry: MetadataRegistry, - /// Dedicated component tables for timeless data. Never garbage collected. - /// - /// See also `Self::components`. - pub(crate) timeless_components: IntMap, + /// Used to cache auto-generated cluster cells (`[0]`, `[0, 1]`, `[0, 1, 2]`, ...) + /// so that they can be properly deduplicated on insertion. + pub(crate) cluster_cell_cache: ClusterCellCache, - /// Maps an entity to its index, for a specific timeline. + /// All temporal [`IndexedTable`]s for all entities on all timelines. /// - /// An index maps specific points in time to rows in component tables. - pub(crate) indices: HashMap<(Timeline, EntityPathHash), IndexTable>, + /// See also [`Self::timeless_tables`]. + pub(crate) tables: HashMap<(Timeline, EntityPathHash), IndexedTable>, - /// Maps a component name to its associated table, for all timelines and all entities. + /// All timeless indexed tables for all entities. Never garbage collected. /// - /// A component table holds all the values ever inserted for a given component. - pub(crate) components: IntMap, + /// See also [`Self::tables`]. + pub(crate) timeless_tables: IntMap, /// Monotonically increasing ID for insertions. pub(crate) insert_id: u64, @@ -257,28 +219,45 @@ pub struct DataStore { pub(crate) gc_id: u64, } +impl Clone for DataStore { + fn clone(&self) -> Self { + Self { + cluster_key: self.cluster_key, + config: self.config.clone(), + type_registry: self.type_registry.clone(), + metadata_registry: self.metadata_registry.clone(), + cluster_cell_cache: self.cluster_cell_cache.clone(), + tables: self.tables.clone(), + timeless_tables: self.timeless_tables.clone(), + insert_id: self.insert_id, + query_id: self + .query_id + .load(std::sync::atomic::Ordering::Relaxed) + .into(), + gc_id: self.gc_id, + } + } +} + impl DataStore { /// See [`Self::cluster_key`] for more information about the cluster key. pub fn new(cluster_key: ComponentName, config: DataStoreConfig) -> Self { Self { cluster_key, config, - cluster_comp_cache: Default::default(), - messages: Default::default(), - indices: Default::default(), - components: Default::default(), - timeless_indices: Default::default(), - timeless_components: Default::default(), + cluster_cell_cache: Default::default(), + metadata_registry: Default::default(), + type_registry: Default::default(), + tables: Default::default(), + timeless_tables: Default::default(), insert_id: 0, query_id: AtomicU64::new(0), gc_id: 0, } } - /// The column name used for storing insert requests' IDs alongside the data. - /// - /// The insert IDs are stored as-is directly into the index tables, this is _not_ an - /// indirection into an associated component table! + /// The column name used for storing insert requests' IDs alongside the data when manipulating + /// dataframes. /// /// See [`DataStoreConfig::store_insert_ids`]. pub fn insert_id_key() -> ComponentName { @@ -290,156 +269,111 @@ impl DataStore { self.cluster_key } - /// Lookup the arrow `DataType` of a `Component` - pub fn lookup_data_type(&self, component: &ComponentName) -> Option<&DataType> { - self.components.get(component).map(|c| &c.datatype) + /// See [`DataStoreConfig`] for more information about configuration. + pub fn config(&self) -> &DataStoreConfig { + &self.config } -} -// --- Persistent Indices --- + /// Lookup the arrow [`DataType`] of a [`re_log_types::Component`] in the internal + /// `DataTypeRegistry`. + pub fn lookup_datatype(&self, component: &ComponentName) -> Option<&DataType> { + self.type_registry.get(component) + } -/// A `PersistentIndexTable` maps specific entries to rows in persistent component tables. -/// -/// See also `DataStore::IndexTable`. -#[derive(Debug)] -pub struct PersistentIndexTable { - /// The entity this table is related to, for debugging purposes. - pub(crate) ent_path: EntityPath, + /// The oldest time for which we have any data. + /// + /// Ignores timeless data. + /// + /// Useful to call after a gc. + pub fn oldest_time_per_timeline(&self) -> BTreeMap { + crate::profile_function!(); - /// Carrying the cluster key around to help with assertions and sanity checks all over the - /// place. - pub(crate) cluster_key: ComponentName, + let mut oldest_time_per_timeline = BTreeMap::default(); + + for index in self.tables.values() { + if let Some(bucket) = index.buckets.values().next() { + let entry = oldest_time_per_timeline + .entry(bucket.timeline) + .or_insert(TimeInt::MAX); + if let Some(time) = bucket.inner.read().col_time.first() { + *entry = TimeInt::min(*entry, (*time).into()); + } + } + } - /// The number of rows in the table: all indices should always be exactly of that length. - pub(crate) num_rows: u64, + oldest_time_per_timeline + } - /// All component indices for this bucket. + /// Returns a read-only iterator over the raw indexed tables. /// - /// One index per component: new components (and as such, new indices) can be added at any - /// time! - /// When that happens, they will be retro-filled with nulls until they are [`Self::num_rows`] - /// long. - pub(crate) indices: IntMap, - - /// Track all of the components that have been written to. - pub(crate) all_components: IntSet, + /// Do _not_ use this to try and assert the internal state of the datastore. + pub fn iter_indices( + &self, + ) -> impl ExactSizeIterator { + self.tables.iter().map(|((timeline, _), table)| { + ((*timeline, table.ent_path.clone() /* shallow */), table) + }) + } } -// --- Indices --- - -/// An `IndexTable` maps specific points in time to rows in component tables. +/// A simple example to look at the internal representation of a [`DataStore`]. /// -/// Example of a time-based index table (`MAX_ROWS_PER_BUCKET=2`): +/// Run with: /// ```text -/// IndexTable { -/// timeline: log_time -/// entity: this/that -/// size: 3 buckets for a total of 152 B across 5 total rows -/// buckets: [ -/// IndexBucket { -/// index time bound: >= +0.000s -/// size: 64 B across 2 rows -/// - log_time: from 19:37:35.713798Z to 19:37:35.713798Z (all inclusive) -/// data (sorted=true): -/// +-------------------------------+--------------+---------------+--------------------+ -/// | log_time | rerun.rect2d | rerun.point2d | rerun.instance_key | -/// +-------------------------------+--------------+---------------+--------------------+ -/// | 2022-12-20 19:37:35.713798552 | | 2 | 2 | -/// | 2022-12-20 19:37:35.713798552 | 4 | | 2 | -/// +-------------------------------+--------------+---------------+--------------------+ -/// -/// } -/// IndexBucket { -/// index time bound: >= 19:37:36.713798Z -/// size: 64 B across 2 rows -/// - log_time: from 19:37:36.713798Z to 19:37:36.713798Z (all inclusive) -/// data (sorted=true): -/// +-------------------------------+--------------+--------------------+---------------+ -/// | log_time | rerun.rect2d | rerun.instance_key | rerun.point2d | -/// +-------------------------------+--------------+--------------------+---------------+ -/// | 2022-12-20 19:37:36.713798552 | 1 | 2 | | -/// | 2022-12-20 19:37:36.713798552 | | 4 | | -/// +-------------------------------+--------------+--------------------+---------------+ -/// -/// } -/// IndexBucket { -/// index time bound: >= 19:37:37.713798Z -/// size: 24 B across 1 rows -/// - log_time: from 19:37:37.713798Z to 19:37:37.713798Z (all inclusive) -/// data (sorted=true): -/// +-------------------------------+--------------+--------------------+ -/// | log_time | rerun.rect2d | rerun.instance_key | -/// +-------------------------------+--------------+--------------------+ -/// | 2022-12-20 19:37:37.713798552 | 2 | 3 | -/// +-------------------------------+--------------+--------------------+ -/// -/// } -/// ] -/// } +/// cargo test -p re_arrow_store -- --nocapture datastore_internal_repr /// ``` +#[test] +fn datastore_internal_repr() { + use re_log_types::{component_types::InstanceKey, Component as _, DataTable}; + + let mut store = DataStore::new( + InstanceKey::name(), + DataStoreConfig { + indexed_bucket_num_rows: 0, + store_insert_ids: true, + enable_typecheck: true, + }, + ); + + let timeless = DataTable::example(true); + eprintln!("{timeless}"); + store.insert_table(&timeless).unwrap(); + + let temporal = DataTable::example(false); + eprintln!("{temporal}"); + store.insert_table(&temporal).unwrap(); + + store.sanity_check().unwrap(); + eprintln!("{store}"); +} + +// --- Temporal --- + +/// An `IndexedTable` is an ever-growing, arbitrary large [`re_log_types::DataTable`] that is +/// optimized for time-based insertions and queries (which means a lot of bucketing). /// -/// Example of a sequence-based index table (`MAX_ROWS_PER_BUCKET=2`): -/// ```text -/// IndexTable { -/// timeline: frame_nr -/// entity: this/that -/// size: 3 buckets for a total of 256 B across 8 total rows -/// buckets: [ -/// IndexBucket { -/// index time bound: >= #0 -/// size: 96 B across 3 rows -/// - frame_nr: from #41 to #41 (all inclusive) -/// data (sorted=true): -/// +----------+---------------+--------------+--------------------+ -/// | frame_nr | rerun.point2d | rerun.rect2d | rerun.instance_key | -/// +----------+---------------+--------------+--------------------+ -/// | 41 | | | 1 | -/// | 41 | 1 | | 2 | -/// | 41 | | 3 | 2 | -/// +----------+---------------+--------------+--------------------+ -/// -/// } -/// IndexBucket { -/// index time bound: >= #42 -/// size: 96 B across 3 rows -/// - frame_nr: from #42 to #42 (all inclusive) -/// data (sorted=true): -/// +----------+--------------+--------------------+---------------+ -/// | frame_nr | rerun.rect2d | rerun.instance_key | rerun.point2d | -/// +----------+--------------+--------------------+---------------+ -/// | 42 | 1 | 2 | | -/// | 42 | | 4 | | -/// | 42 | | 2 | 2 | -/// +----------+--------------+--------------------+---------------+ -/// -/// } -/// IndexBucket { -/// index time bound: >= #43 -/// size: 64 B across 2 rows -/// - frame_nr: from #43 to #44 (all inclusive) -/// data (sorted=true): -/// +----------+--------------+---------------+--------------------+ -/// | frame_nr | rerun.rect2d | rerun.point2d | rerun.instance_key | -/// +----------+--------------+---------------+--------------------+ -/// | 43 | 4 | | 2 | -/// | 44 | | 3 | 2 | -/// +----------+--------------+---------------+--------------------+ +/// See also [`IndexedBucket`]. /// -/// } -/// ] -/// } +/// Run the following command to display a visualization of the store's internal datastructures and +/// better understand how everything fits together: +/// ```text +/// cargo test -p re_arrow_store -- --nocapture datastore_internal_repr /// ``` -/// -/// See also: [`IndexBucket`]. -#[derive(Debug)] -pub struct IndexTable { +// +// TODO(#1524): inline visualization once it's back to a manageable state +#[derive(Debug, Clone)] +pub struct IndexedTable { /// The timeline this table operates in, for debugging purposes. - pub(crate) timeline: Timeline, + pub timeline: Timeline, /// The entity this table is related to, for debugging purposes. - pub(crate) ent_path: EntityPath, + pub ent_path: EntityPath, + + /// Carrying the cluster key around to help with assertions and sanity checks all over the + /// place. + pub cluster_key: ComponentName, - /// The actual buckets, where the indices are stored. + /// The actual buckets, where the data is stored. /// /// The keys of this `BTreeMap` represent the lower bounds of the time-ranges covered by /// their associated buckets, _as seen from an indexing rather than a data standpoint_! @@ -447,307 +381,198 @@ pub struct IndexTable { /// This means that e.g. for the initial bucket, this will always be `-∞`, as from an /// indexing standpoint, all reads and writes with a time `t >= -∞` should go there, even /// though the bucket doesn't actually contains data with a timestamp of `-∞`! - pub(crate) buckets: BTreeMap, - - /// Carrying the cluster key around to help with assertions and sanity checks all over the - /// place. - pub(crate) cluster_key: ComponentName, + pub buckets: BTreeMap, /// Track all of the components that have been written to. /// - /// Note that this set will never be purged and will continue to return - /// components that may have been set in the past even if all instances of - /// that component have since been purged to free up space. - pub(crate) all_components: IntSet, + /// Note that this set will never be purged and will continue to return components that may + /// have been set in the past even if all instances of that component have since been purged + /// to free up space. + pub all_components: IntSet, + + /// The number of rows stored in this table, across all of its buckets. + pub buckets_num_rows: u64, + + /// The size of both the control & component data stored in this table, across all of its + /// buckets, in bytes. + /// + /// This is a best-effort approximation, adequate for most purposes (stats, + /// triggering GCs, ...). + pub buckets_size_bytes: u64, } -impl IndexTable { - pub fn entity_path(&self) -> &EntityPath { - &self.ent_path +impl IndexedTable { + pub fn new(cluster_key: ComponentName, timeline: Timeline, ent_path: EntityPath) -> Self { + let bucket = IndexedBucket::new(cluster_key, timeline); + let buckets_size_bytes = bucket.total_size_bytes(); + Self { + timeline, + ent_path, + buckets: [(i64::MIN.into(), bucket)].into(), + cluster_key, + all_components: Default::default(), + buckets_num_rows: 0, + buckets_size_bytes, + } } } -/// An `IndexBucket` holds a size-delimited (data size and/or number of rows) chunk of a -/// [`IndexTable`]. -/// -/// - The data size limit is for garbage collection purposes. -/// - The number of rows limit is to bound sorting costs on the read path. -/// -/// See [`IndexTable`] to get an idea of what an `IndexBucket` looks like in practice. +/// An `IndexedBucket` holds a chunk of rows from an [`IndexedTable`] +/// (see [`DataStoreConfig::indexed_bucket_num_rows`]). #[derive(Debug)] -pub struct IndexBucket { +pub struct IndexedBucket { /// The timeline the bucket's parent table operates in, for debugging purposes. - pub(crate) timeline: Timeline, - - pub(crate) indices: RwLock, + pub timeline: Timeline, /// Carrying the cluster key around to help with assertions and sanity checks all over the /// place. - pub(crate) cluster_key: ComponentName, + pub cluster_key: ComponentName, + + // To simplify interior mutability. + pub inner: RwLock, } -/// Just the indices, to simplify interior mutability. -#[derive(Debug)] -pub struct IndexBucketIndices { - /// Whether the indices (all of them!) are currently sorted. +impl Clone for IndexedBucket { + fn clone(&self) -> Self { + Self { + timeline: self.timeline, + cluster_key: self.cluster_key, + inner: RwLock::new(self.inner.read().clone()), + } + } +} + +impl IndexedBucket { + fn new(cluster_key: ComponentName, timeline: Timeline) -> Self { + Self { + timeline, + inner: RwLock::new(IndexedBucketInner::default()), + cluster_key, + } + } +} + +/// See [`IndexedBucket`]; this is a helper struct to simplify interior mutability. +#[derive(Debug, Clone)] +pub struct IndexedBucketInner { + /// Are the rows in this table chunk sorted? /// - /// Querying an `IndexBucket` will always trigger a sort if the indices aren't already sorted. - pub(crate) is_sorted: bool, + /// Querying an [`IndexedBucket`] will always trigger a sort if the rows within aren't already + /// sorted. + pub is_sorted: bool, - /// The time range covered by the primary time index. + /// The time range covered by the primary time column (see [`Self::col_time`]). /// - /// This is the actual time range that's covered by the indexed data! - /// For an empty bucket, this defaults to [+∞,-∞]. - pub(crate) time_range: TimeRange, + /// For an empty bucket, this defaults to `[+∞,-∞]`. + pub time_range: TimeRange, - // The primary time index, which is guaranteed to be dense, and "drives" all other indices. - // - // All secondary indices are guaranteed to follow the same sort order and be the same length. - pub(crate) times: TimeIndex, + // The primary time column, which is what drives the ordering of every other column. + pub col_time: ErasedTimeVec, - /// All secondary indices for this bucket (i.e. everything but time). + /// The entire column of insertion IDs, if enabled in [`DataStoreConfig`]. + /// + /// Keeps track of insertion order from the point-of-view of the [`DataStore`]. + pub col_insert_id: InsertIdVec, + + /// The entire column of `RowId`s. + /// + /// Keeps track of the unique identifier for each row that was generated by the clients. + pub col_row_id: RowIdVec, + + /// The entire column of `num_instances`. + /// + /// Keeps track of the expected number of instances in each row. + pub col_num_instances: NumInstancesVec, + + /// All the rows for all the component columns. + /// + /// The cells are optional since not all rows will have data for every single component + /// (i.e. the table is sparse). + pub columns: IntMap, + + /// The size of both the control & component data stored in this bucket, heap and stack + /// included, in bytes. + /// + /// This is a best-effort approximation, adequate for most purposes (stats, + /// triggering GCs, ...). /// - /// One index per component: new components (and as such, new indices) can be added at any - /// time! - /// When that happens, they will be retro-filled with nulls so that they share the same - /// length as the primary index ([`Self::times`]). - pub(crate) indices: IntMap, + /// We cache this because there can be many, many buckets. + pub size_bytes: u64, } -impl Default for IndexBucketIndices { +impl Default for IndexedBucketInner { fn default() -> Self { - Self { + let mut this = Self { is_sorted: true, time_range: TimeRange::new(i64::MAX.into(), i64::MIN.into()), - times: Default::default(), - indices: Default::default(), - } - } -} - -impl IndexBucket { - /// Returns an (name, [`Int64Array`]) with a logical type matching the timeline. - pub fn times(&self) -> (String, Int64Array) { - crate::profile_function!(); - - let times = Int64Array::from_vec(self.indices.read().times.clone()); - let logical_type = match self.timeline.typ() { - re_log_types::TimeType::Time => DataType::Timestamp(TimeUnit::Nanosecond, None), - re_log_types::TimeType::Sequence => DataType::Int64, + col_time: Default::default(), + col_insert_id: Default::default(), + col_row_id: Default::default(), + col_num_instances: Default::default(), + columns: Default::default(), + size_bytes: 0, // NOTE: computed below }; - (self.timeline.name().to_string(), times.to(logical_type)) + this.compute_size_bytes(); + this } } -// --- Persistent Components --- - -/// A `PersistentComponentTable` holds all the timeless values ever inserted for a given component. -/// -/// See also `DataStore::ComponentTable`. -#[derive(Debug)] -pub struct PersistentComponentTable { - /// Name of the underlying component, for debugging purposes. - pub(crate) name: ComponentName, - - /// Type of the underlying component. - pub(crate) datatype: DataType, - - /// All the data for this table: many rows of a single column. - /// - /// Each chunk is a list of arrays of structs, i.e. `ListArray`: - /// - the list layer corresponds to the different rows, - /// - the array layer corresponds to the different instances within a single row, - /// - and finally the struct layer holds the components themselves. - /// E.g.: - /// ```text - /// [ - /// [{x: 8.687487, y: 1.9590926}, {x: 2.0559108, y: 0.1494348}, {x: 7.09219, y: 0.9616637}], - /// [{x: 7.158843, y: 0.68897724}, {x: 8.934421, y: 2.8420508}], - /// ] - /// ``` - /// - /// This can contain any number of chunks, depending on how the data was inserted (e.g. single - /// insertions vs. batches). - /// - /// Note that, as of today, we do not actually support batched insertion nor do we support - /// chunks of non-unit length (batches are inserted on a per-row basis internally). - /// As a result, chunks always contain one and only one row's worth of data, at least until - /// the bucket is compacted one or more times. - /// See also #589. - // - // TODO(cmc): compact timeless tables once in a while - pub(crate) chunks: Vec>, - - /// The total number of rows present in this bucket, across all chunks. - pub(crate) total_rows: u64, - - /// The size of this bucket in bytes, across all chunks. - /// - /// Accurately computing the size of arrow arrays is surprisingly costly, which is why we - /// cache this. - pub(crate) total_size_bytes: u64, -} - -// --- Components --- +// --- Timeless --- -/// A `ComponentTable` holds all the values ever inserted for a given component (provided they -/// are still alive, i.e. not GC'd). +/// The timeless specialization of an [`IndexedTable`]. /// -/// Example of a component table holding instances: +/// Run the following command to display a visualization of the store's internal datastructures and +/// better understand how everything fits together: /// ```text -/// ComponentTable { -/// name: rerun.instance_key -/// size: 2 buckets for a total of 128 B across 5 total rows -/// buckets: [ -/// ComponentBucket { -/// size: 64 B across 3 rows -/// row range: from 0 to 0 (all inclusive) -/// archived: true -/// time ranges: -/// - frame_nr: from #41 to #41 (all inclusive) -/// +------------------------------------------------------------------+ -/// | rerun.instance_key | -/// +------------------------------------------------------------------+ -/// | [] | -/// | [2382325256275464629, 9801782006807296871, 13644487945655724411] | -/// | [0, 1, 2] | -/// +------------------------------------------------------------------+ -/// } -/// ComponentBucket { -/// size: 64 B across 2 rows -/// row range: from 3 to 4 (all inclusive) -/// archived: false -/// time ranges: -/// - frame_nr: from #42 to #42 (all inclusive) -/// - log_time: from 19:37:36.713798Z to 19:37:37.713798Z (all inclusive) -/// +-------------------------------------------------------------------+ -/// | rerun.instance_key | -/// +-------------------------------------------------------------------+ -/// | [8907162807054976021, 14953141369327162382, 15742885776230395882] | -/// | [165204472818569687, 3210188998985913268, 13675065411448304501] | -/// +-------------------------------------------------------------------+ -/// } -/// ] -/// } +/// cargo test -p re_arrow_store -- --nocapture datastore_internal_repr /// ``` -/// -/// Example of a component-table holding 2D positions: -/// ```text -/// ComponentTable { -/// name: rerun.point2d -/// size: 2 buckets for a total of 96 B across 4 total rows -/// buckets: [ -/// ComponentBucket { -/// size: 64 B across 3 rows -/// row range: from 0 to 0 (all inclusive) -/// archived: true -/// time ranges: -/// - log_time: from 19:37:35.713798Z to 19:37:35.713798Z (all inclusive) -/// - frame_nr: from #41 to #42 (all inclusive) -/// +-------------------------------------------------------------------+ -/// | rerun.point2d | -/// +-------------------------------------------------------------------+ -/// | [] | -/// | [{x: 2.4033058, y: 8.535466}, {x: 4.051945, y: 7.6194324} | -/// | [{x: 1.4975989, y: 6.17476}, {x: 2.4128711, y: 1.853013} | -/// +-------------------------------------------------------------------+ -/// } -/// ComponentBucket { -/// size: 32 B across 1 rows -/// row range: from 3 to 3 (all inclusive) -/// archived: false -/// time ranges: -/// - frame_nr: from #44 to #44 (all inclusive) -/// +-------------------------------------------------------------------+ -/// | rerun.point2d | -/// +-------------------------------------------------------------------+ -/// | [{x: 0.6296742, y: 6.7517242}, {x: 2.3393118, y: 8.770799} | -/// +-------------------------------------------------------------------+ -/// } -/// ] -/// } -/// ``` -#[derive(Debug)] -pub struct ComponentTable { - /// Name of the underlying component. - pub(crate) name: ComponentName, - - /// Type of the underlying component. - pub(crate) datatype: DataType, - - /// The actual buckets, where the component data is stored. - /// - /// Component buckets are append-only, they can never be written to in an out of order - /// fashion. - /// As such, a double-ended queue covers all our needs: - /// - popping from the front for garbage collection - /// - pushing to the back for insertions - /// - binary search for queries - pub(crate) buckets: VecDeque, +// +// TODO(#1524): inline visualization once it's back to a manageable state +// TODO(#1807): timeless should be row-id ordered too then +#[derive(Debug, Clone)] +pub struct PersistentIndexedTable { + /// The entity this table is related to, for debugging purposes. + pub ent_path: EntityPath, + + /// Carrying the cluster key around to help with assertions and sanity checks all over the + /// place. + pub cluster_key: ComponentName, + + /// The entire column of insertion IDs, if enabled in [`DataStoreConfig`]. + /// + /// Keeps track of insertion order from the point-of-view of the [`DataStore`]. + pub col_insert_id: InsertIdVec, + + /// The entire column of `RowId`s. + /// + /// Keeps track of the unique identifier for each row that was generated by the clients. + pub col_row_id: RowIdVec, + + /// The entire column of `num_instances`. + /// + /// Keeps track of the expected number of instances in each row. + pub col_num_instances: NumInstancesVec, + + /// All the rows for all the component columns. + /// + /// The cells are optional since not all rows will have data for every single component + /// (i.e. the table is sparse). + pub columns: IntMap, } -/// A `ComponentBucket` holds a size-delimited (data size) chunk of a [`ComponentTable`]. -#[derive(Debug)] -pub struct ComponentBucket { - /// The component's name, for debugging purposes. - pub(crate) name: ComponentName, - - /// The offset of this bucket in the global table. - pub(crate) row_offset: u64, - - /// Has this bucket been archived yet? - /// - /// For every `ComponentTable`, there can only be one active bucket at a time (i.e. the bucket - /// that is currently accepting write requests), all the others are archived. - /// When the currently active bucket is full, it is archived in turn, and a new bucket is - /// created to take its place. - /// - /// Archiving a bucket is a good opportunity to run some maintenance tasks on it, e.g. - /// compaction (concatenating all chunks down to a single one). - /// Currently, an archived bucket is guaranteed to have these properties: - /// - the bucket is full (it has reached the maximum allowed length and/or size), - /// - the bucket has been compacted, - /// - the bucket is only used for reads. - pub(crate) archived: bool, - - /// The time ranges (plural!) covered by this bucket. - /// Buckets are never sorted over time, so these time ranges can grow arbitrarily large. - /// - /// These are only used for garbage collection. - pub(crate) time_ranges: HashMap, - - /// All the data for this bucket: many rows of a single column. - /// - /// Each chunk is a list of arrays of structs, i.e. `ListArray`: - /// - the list layer corresponds to the different rows, - /// - the array layer corresponds to the different instances within a single row, - /// - and finally the struct layer holds the components themselves. - /// E.g.: - /// ```text - /// [ - /// [{x: 8.687487, y: 1.9590926}, {x: 2.0559108, y: 0.1494348}, {x: 7.09219, y: 0.9616637}], - /// [{x: 7.158843, y: 0.68897724}, {x: 8.934421, y: 2.8420508}], - /// ] - /// ``` - /// - /// During the active lifespan of the bucket, this can contain any number of chunks, - /// depending on how the data was inserted (e.g. single insertions vs. batches). - /// All of these chunks get compacted into one contiguous array when the bucket is archived, - /// i.e. when the bucket is full and a new one is created. - /// - /// Note that, as of today, we do not actually support batched insertion nor do we support - /// chunks of non-unit length (batches are inserted on a per-row basis internally). - /// As a result, chunks always contain one and only one row's worth of data, at least until - /// the bucket is archived and compacted. - /// See also #589. - pub(crate) chunks: Vec>, - - /// The total number of rows present in this bucket, across all chunks. - pub(crate) total_rows: u64, - - /// The size of this bucket in bytes, across all chunks. - /// - /// Accurately computing the size of arrow arrays is surprisingly costly, which is why we - /// cache this. - pub(crate) total_size_bytes: u64, +impl PersistentIndexedTable { + pub fn new(cluster_key: ComponentName, ent_path: EntityPath) -> Self { + Self { + cluster_key, + ent_path, + col_insert_id: Default::default(), + col_row_id: Default::default(), + col_num_instances: Default::default(), + columns: Default::default(), + } + } + + pub fn is_empty(&self) -> bool { + self.col_num_instances.is_empty() + } } diff --git a/crates/re_arrow_store/src/store_arrow.rs b/crates/re_arrow_store/src/store_arrow.rs new file mode 100644 index 000000000000..d62d8c3b8f3b --- /dev/null +++ b/crates/re_arrow_store/src/store_arrow.rs @@ -0,0 +1,207 @@ +use std::collections::BTreeMap; + +use arrow2::{array::Array, chunk::Chunk, datatypes::Schema}; +use nohash_hasher::IntMap; +use re_log_types::{ + ComponentName, DataCellColumn, DataTable, DataTableResult, RowId, Timeline, COLUMN_INSERT_ID, + COLUMN_NUM_INSTANCES, COLUMN_ROW_ID, +}; + +use crate::store::{IndexedBucket, IndexedBucketInner, PersistentIndexedTable}; + +// --- + +impl IndexedBucket { + /// Serializes the entire bucket into an arrow payload and schema. + /// + /// Column order: + /// - `insert_id` + /// - `row_id` + /// - `time` + /// - `num_instances` + /// - `$cluster_key` + /// - rest of component columns in ascending lexical order + pub fn serialize(&self) -> DataTableResult<(Schema, Chunk>)> { + crate::profile_function!(); + + let Self { + timeline, + cluster_key, + inner, + } = self; + + let IndexedBucketInner { + is_sorted: _, + time_range: _, + col_time, + col_insert_id, + col_row_id, + col_num_instances, + columns, + size_bytes: _, + } = &*inner.read(); + + serialize( + cluster_key, + Some((*timeline, col_time)), + col_insert_id, + col_row_id, + col_num_instances, + columns, + ) + } +} + +impl PersistentIndexedTable { + /// Serializes the entire table into an arrow payload and schema. + /// + /// Column order: + /// - `insert_id` + /// - `row_id` + /// - `time` + /// - `num_instances` + /// - `$cluster_key` + /// - rest of component columns in ascending lexical order + pub fn serialize(&self) -> DataTableResult<(Schema, Chunk>)> { + crate::profile_function!(); + + let Self { + ent_path: _, + cluster_key, + col_insert_id, + col_row_id, + col_num_instances, + columns, + } = self; + + serialize( + cluster_key, + None, + col_insert_id, + col_row_id, + col_num_instances, + columns, + ) + } +} + +// --- + +fn serialize( + cluster_key: &ComponentName, + col_time: Option<(Timeline, &[i64])>, + col_insert_id: &[u64], + col_row_id: &[RowId], + col_num_instances: &[u32], + table: &IntMap, +) -> DataTableResult<(Schema, Chunk>)> { + crate::profile_function!(); + + let mut schema = Schema::default(); + let mut columns = Vec::new(); + + // NOTE: Empty table / bucket. + if col_row_id.is_empty() { + return Ok((schema, Chunk::new(columns))); + } + + { + let (control_schema, control_columns) = + serialize_control_columns(col_time, col_insert_id, col_row_id, col_num_instances)?; + schema.fields.extend(control_schema.fields); + schema.metadata.extend(control_schema.metadata); + columns.extend(control_columns.into_iter()); + } + + { + let (data_schema, data_columns) = serialize_data_columns(cluster_key, table)?; + schema.fields.extend(data_schema.fields); + schema.metadata.extend(data_schema.metadata); + columns.extend(data_columns.into_iter()); + } + + Ok((schema, Chunk::new(columns))) +} + +fn serialize_control_columns( + col_time: Option<(Timeline, &[i64])>, + col_insert_id: &[u64], + col_row_id: &[RowId], + col_num_instances: &[u32], +) -> DataTableResult<(Schema, Vec>)> { + crate::profile_function!(); + + let mut schema = Schema::default(); + let mut columns = Vec::new(); + + // NOTE: ordering is taken into account! + // - insert_id + // - row_id + // - time + // - num_instances + + // NOTE: Optional column, so make sure it's actually there: + if !col_insert_id.is_empty() { + let (insert_id_field, insert_id_column) = + DataTable::serialize_primitive_column(COLUMN_INSERT_ID, col_insert_id, None)?; + schema.fields.push(insert_id_field); + columns.push(insert_id_column); + } + + let (row_id_field, row_id_column) = + DataTable::serialize_control_column(COLUMN_ROW_ID, col_row_id)?; + schema.fields.push(row_id_field); + columns.push(row_id_column); + + if let Some((timeline, col_time)) = col_time { + let (time_field, time_column) = DataTable::serialize_primitive_column( + timeline.name(), + col_time, + timeline.datatype().into(), + )?; + schema.fields.push(time_field); + columns.push(time_column); + } + + let (num_instances_field, num_instances_column) = + DataTable::serialize_primitive_column(COLUMN_NUM_INSTANCES, col_num_instances, None)?; + schema.fields.push(num_instances_field); + columns.push(num_instances_column); + + Ok((schema, columns)) +} + +fn serialize_data_columns( + cluster_key: &ComponentName, + table: &IntMap, +) -> DataTableResult<(Schema, Vec>)> { + crate::profile_function!(); + + let mut schema = Schema::default(); + let mut columns = Vec::new(); + + // NOTE: ordering is taken into account! + let mut table: BTreeMap<_, _> = table.iter().collect(); + + // Cluster column first and foremost! + // + // NOTE: cannot fail, the cluster key _has_ to be there by definition + let cluster_column = table.remove(&cluster_key).unwrap(); + { + let (field, column) = + DataTable::serialize_data_column(cluster_key.as_str(), cluster_column)?; + schema.fields.push(field); + columns.push(column); + } + + for (component, column) in table { + // NOTE: Don't serialize columns with only null values. + if column.iter().any(Option::is_some) { + let (field, column) = DataTable::serialize_data_column(component.as_str(), column)?; + schema.fields.push(field); + columns.push(column); + } + } + + Ok((schema, columns)) +} diff --git a/crates/re_arrow_store/src/store_dump.rs b/crates/re_arrow_store/src/store_dump.rs new file mode 100644 index 000000000000..31845f75467d --- /dev/null +++ b/crates/re_arrow_store/src/store_dump.rs @@ -0,0 +1,195 @@ +use ahash::HashMapExt; +use arrow2::Either; +use nohash_hasher::IntMap; +use re_log_types::{ + DataCellColumn, DataTable, ErasedTimeVec, RowIdVec, TableId, TimeRange, Timeline, +}; + +use crate::{ + store::{IndexedBucketInner, PersistentIndexedTable}, + DataStore, IndexedBucket, +}; + +// --- + +impl DataStore { + /// Serializes the entire datastore into an iterator of [`DataTable`]s. + // TODO(#1793): This shouldn't dump cluster keys that were autogenerated. + // TODO(#1794): Implement simple recompaction. + pub fn to_data_tables( + &self, + time_filter: Option<(Timeline, TimeRange)>, + ) -> impl Iterator + '_ { + let timeless = self.dump_timeless_tables(); + let temporal = if let Some(time_filter) = time_filter { + Either::Left(self.dump_temporal_tables_filtered(time_filter)) + } else { + Either::Right(self.dump_temporal_tables()) + }; + + timeless.chain(temporal) + } + + fn dump_timeless_tables(&self) -> impl Iterator + '_ { + self.timeless_tables.values().map(|table| { + crate::profile_scope!("timeless_table"); + + let PersistentIndexedTable { + ent_path, + cluster_key: _, + col_insert_id: _, + col_row_id, + col_num_instances, + columns, + } = table; + + DataTable { + table_id: TableId::random(), + col_row_id: col_row_id.clone(), + col_timelines: Default::default(), + col_entity_path: std::iter::repeat_with(|| ent_path.clone()) + .take(table.num_rows() as _) + .collect(), + col_num_instances: col_num_instances.clone(), + columns: columns.clone(), // shallow + } + }) + } + + fn dump_temporal_tables(&self) -> impl Iterator + '_ { + self.tables.values().flat_map(|table| { + crate::profile_scope!("temporal_table"); + + table.buckets.values().map(move |bucket| { + crate::profile_scope!("temporal_bucket"); + + bucket.sort_indices_if_needed(); + + let IndexedBucket { + timeline, + cluster_key: _, + inner, + } = bucket; + + let IndexedBucketInner { + is_sorted, + time_range: _, + col_time, + col_insert_id: _, + col_row_id, + col_num_instances, + columns, + size_bytes: _, + } = &*inner.read(); + debug_assert!(is_sorted); + + DataTable { + table_id: TableId::random(), + col_row_id: col_row_id.clone(), + col_timelines: [(*timeline, col_time.iter().copied().map(Some).collect())] + .into(), + col_entity_path: std::iter::repeat_with(|| table.ent_path.clone()) + .take(table.num_rows() as _) + .collect(), + col_num_instances: col_num_instances.clone(), + columns: columns.clone(), // shallow + } + }) + }) + } + + fn dump_temporal_tables_filtered( + &self, + (timeline_filter, time_filter): (Timeline, TimeRange), + ) -> impl Iterator + '_ { + self.tables + .values() + .filter_map(move |table| { + crate::profile_scope!("temporal_table_filtered"); + + if table.timeline != timeline_filter { + return None; + } + + Some(table.buckets.values().filter_map(move |bucket| { + crate::profile_scope!("temporal_bucket_filtered"); + + bucket.sort_indices_if_needed(); + + let IndexedBucket { + timeline, + cluster_key: _, + inner, + } = bucket; + + let IndexedBucketInner { + is_sorted, + time_range, + col_time, + col_insert_id: _, + col_row_id, + col_num_instances, + columns, + size_bytes: _, + } = &*inner.read(); + debug_assert!(is_sorted); + + if !time_range.intersects(time_filter) { + return None; + } + + let col_row_id: RowIdVec = + filter_column(col_time, col_row_id.iter(), time_filter).collect(); + + // NOTE: Shouldn't ever happen due to check above, but better safe than + // sorry... + debug_assert!(!col_row_id.is_empty()); + if col_row_id.is_empty() { + return None; + } + + let col_timelines = [( + *timeline, + filter_column(col_time, col_time.iter(), time_filter) + .map(Some) + .collect(), + )] + .into(); + + let col_entity_path = std::iter::repeat_with(|| table.ent_path.clone()) + .take(col_row_id.len()) + .collect(); + + let col_num_instances = + filter_column(col_time, col_num_instances.iter(), time_filter).collect(); + + let mut columns2 = IntMap::with_capacity(columns.len()); + for (component, column) in columns { + let column = filter_column(col_time, column.iter(), time_filter).collect(); + columns2.insert(*component, DataCellColumn(column)); + } + + Some(DataTable { + table_id: TableId::random(), + col_row_id, + col_timelines, + col_entity_path, + col_num_instances, + columns: columns2, + }) + })) + }) + .flatten() + } +} + +fn filter_column<'a, T: 'a + Clone>( + col_time: &'a ErasedTimeVec, + column: impl Iterator + 'a, + time_filter: TimeRange, +) -> impl Iterator + 'a { + col_time + .iter() + .zip(column) + .filter_map(move |(time, v)| time_filter.contains((*time).into()).then(|| v.clone())) +} diff --git a/crates/re_arrow_store/src/store_format.rs b/crates/re_arrow_store/src/store_format.rs index 731def279d29..d21110f87274 100644 --- a/crates/re_arrow_store/src/store_format.rs +++ b/crates/re_arrow_store/src/store_format.rs @@ -1,27 +1,7 @@ -use arrow2::array::UInt64Array; -use re_format::{arrow, format_bytes, format_number}; +use re_format::{format_bytes, format_number}; +use re_log_types::SizeBytes as _; -use crate::{ - ComponentBucket, ComponentTable, DataStore, IndexBucket, IndexRowNr, IndexTable, - PersistentComponentTable, PersistentIndexTable, RowIndex, RowIndexKind, -}; - -// --- Indices & offsets --- - -impl std::fmt::Display for RowIndex { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self.kind() { - RowIndexKind::Temporal => f.write_fmt(format_args!("Temporal({})", self.0)), - RowIndexKind::Timeless => f.write_fmt(format_args!("Timeless({})", self.0)), - } - } -} - -impl std::fmt::Display for IndexRowNr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("{}", self.0)) - } -} +use crate::{DataStore, IndexedBucket, IndexedTable, PersistentIndexedTable}; // --- Data store --- @@ -31,12 +11,11 @@ impl std::fmt::Display for DataStore { let Self { cluster_key, config, - cluster_comp_cache: _, - messages: _, - indices, - components, - timeless_indices, - timeless_components, + cluster_cell_cache: _, + metadata_registry: _, + type_registry: _, + tables, + timeless_tables, insert_id: _, query_id: _, gc_id: _, @@ -54,33 +33,15 @@ impl std::fmt::Display for DataStore { f.write_str(&indent::indent_all_by( 4, format!( - "{} timeless index tables, for a total of {} across {} total rows\n", - timeless_indices.len(), - format_bytes(self.total_timeless_index_size_bytes() as _), - format_number(self.total_timeless_index_rows() as _) - ), - ))?; - f.write_str(&indent::indent_all_by(4, "timeless_indices: [\n"))?; - for table in timeless_indices.values() { - f.write_str(&indent::indent_all_by(8, "PersistentIndexTable {\n"))?; - f.write_str(&indent::indent_all_by(12, table.to_string() + "\n"))?; - f.write_str(&indent::indent_all_by(8, "}\n"))?; - } - f.write_str(&indent::indent_all_by(4, "]\n"))?; - } - { - f.write_str(&indent::indent_all_by( - 4, - format!( - "{} persistent component tables, for a total of {} across {} total rows\n", - timeless_components.len(), - format_bytes(self.total_timeless_component_size_bytes() as _), - format_number(self.total_timeless_component_rows() as _) + "{} timeless indexed tables, for a total of {} across {} total rows\n", + timeless_tables.len(), + format_bytes(self.timeless_size_bytes() as _), + format_number(self.num_timeless_rows() as _) ), ))?; - f.write_str(&indent::indent_all_by(4, "timeless_components: [\n"))?; - for table in timeless_components.values() { - f.write_str(&indent::indent_all_by(8, "PersistentComponentTable {\n"))?; + f.write_str(&indent::indent_all_by(4, "timeless_tables: [\n"))?; + for table in timeless_tables.values() { + f.write_str(&indent::indent_all_by(8, "PersistentIndexedTable {\n"))?; f.write_str(&indent::indent_all_by(12, table.to_string() + "\n"))?; f.write_str(&indent::indent_all_by(8, "}\n"))?; } @@ -91,33 +52,15 @@ impl std::fmt::Display for DataStore { f.write_str(&indent::indent_all_by( 4, format!( - "{} index tables, for a total of {} across {} total rows\n", - indices.len(), - format_bytes(self.total_temporal_index_size_bytes() as _), - format_number(self.total_temporal_index_rows() as _) + "{} indexed tables, for a total of {} across {} total rows\n", + tables.len(), + format_bytes(self.temporal_size_bytes() as _), + format_number(self.num_temporal_rows() as _) ), ))?; - f.write_str(&indent::indent_all_by(4, "indices: [\n"))?; - for table in indices.values() { - f.write_str(&indent::indent_all_by(8, "IndexTable {\n"))?; - f.write_str(&indent::indent_all_by(12, table.to_string() + "\n"))?; - f.write_str(&indent::indent_all_by(8, "}\n"))?; - } - f.write_str(&indent::indent_all_by(4, "]\n"))?; - } - { - f.write_str(&indent::indent_all_by( - 4, - format!( - "{} component tables, for a total of {} across {} total rows\n", - components.len(), - format_bytes(self.total_temporal_component_size_bytes() as _), - format_number(self.total_temporal_component_rows() as _) - ), - ))?; - f.write_str(&indent::indent_all_by(4, "components: [\n"))?; - for table in components.values() { - f.write_str(&indent::indent_all_by(8, "ComponentTable {\n"))?; + f.write_str(&indent::indent_all_by(4, "tables: [\n"))?; + for table in tables.values() { + f.write_str(&indent::indent_all_by(8, "IndexedTable {\n"))?; f.write_str(&indent::indent_all_by(12, table.to_string() + "\n"))?; f.write_str(&indent::indent_all_by(8, "}\n"))?; } @@ -130,56 +73,9 @@ impl std::fmt::Display for DataStore { } } -// --- Persistent Indices --- - -impl std::fmt::Display for PersistentIndexTable { - #[allow(clippy::string_add)] - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Self { - ent_path, - cluster_key: _, - num_rows: _, - indices: _, - all_components: _, - } = self; - - f.write_fmt(format_args!("entity: {ent_path}\n"))?; - - f.write_fmt(format_args!( - "size: {} across {} rows\n", - format_bytes(self.total_size_bytes() as _), - format_number(self.total_rows() as _), - ))?; - - let (col_names, cols): (Vec<_>, Vec<_>) = { - self.indices - .iter() - .map(|(name, index)| { - ( - name.to_string(), - UInt64Array::from( - index - .iter() - .map(|row_idx| row_idx.map(|row_idx| row_idx.as_u64())) - .collect::>(), - ), - ) - }) - .unzip() - }; - - let values = cols.into_iter().map(|c| c.boxed()); - let table = arrow::format_table(values, col_names); - - f.write_fmt(format_args!("data:\n{table}\n"))?; - - Ok(()) - } -} - -// --- Indices --- +// --- Temporal --- -impl std::fmt::Display for IndexTable { +impl std::fmt::Display for IndexedTable { #[allow(clippy::string_add)] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { @@ -188,6 +84,8 @@ impl std::fmt::Display for IndexTable { buckets, cluster_key: _, all_components: _, + buckets_num_rows: _, + buckets_size_bytes: _, } = self; f.write_fmt(format_args!("timeline: {}\n", timeline.name()))?; @@ -197,11 +95,11 @@ impl std::fmt::Display for IndexTable { "size: {} buckets for a total of {} across {} total rows\n", self.buckets.len(), format_bytes(self.total_size_bytes() as _), - format_number(self.total_rows() as _), + format_number(self.num_rows() as _), ))?; f.write_str("buckets: [\n")?; for (time, bucket) in buckets.iter() { - f.write_str(&indent::indent_all_by(4, "IndexBucket {\n"))?; + f.write_str(&indent::indent_all_by(4, "IndexedBucket {\n"))?; f.write_str(&indent::indent_all_by( 8, format!("index time bound: >= {}\n", timeline.typ().format(*time),), @@ -215,16 +113,16 @@ impl std::fmt::Display for IndexTable { } } -impl std::fmt::Display for IndexBucket { +impl std::fmt::Display for IndexedBucket { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!( "size: {} across {} rows\n", format_bytes(self.total_size_bytes() as _), - format_number(self.total_rows() as _), + format_number(self.num_rows() as _), ))?; let time_range = { - let time_range = &self.indices.read().time_range; + let time_range = &self.inner.read().time_range; if time_range.min.as_i64() != i64::MAX && time_range.max.as_i64() != i64::MIN { self.timeline.format_time_range(time_range) } else { @@ -233,157 +131,52 @@ impl std::fmt::Display for IndexBucket { }; f.write_fmt(format_args!("{time_range}\n"))?; - let (timeline_name, times) = self.times(); - let (col_names, cols): (Vec<_>, Vec<_>) = { - self.indices - .read() - .indices - .iter() - .map(|(name, index)| { - ( - name.to_string(), - UInt64Array::from( - index - .iter() - .map(|row_idx| row_idx.map(|row_idx| row_idx.as_u64())) - .collect::>(), - ), - ) - }) - .unzip() - }; - - let names = std::iter::once(timeline_name).chain(col_names); - let values = std::iter::once(times.boxed()).chain(cols.into_iter().map(|c| c.boxed())); - let table = arrow::format_table(values, names); - - let is_sorted = self.is_sorted(); - f.write_fmt(format_args!("data (sorted={is_sorted}):\n{table}\n"))?; - - Ok(()) - } -} - -// --- Persistent Components --- - -impl std::fmt::Display for PersistentComponentTable { - #[allow(clippy::string_add)] - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Self { - name, - datatype, - chunks, - total_rows, - total_size_bytes, - } = self; - - f.write_fmt(format_args!("name: {name}\n"))?; - if matches!( - std::env::var("RERUN_DATA_STORE_DISPLAY_SCHEMAS").as_deref(), - Ok("1") - ) { - f.write_fmt(format_args!("datatype: {datatype:#?}\n"))?; - } - - f.write_fmt(format_args!( - "size: {} across {} total rows\n", - format_bytes(*total_size_bytes as _), - format_number(*total_rows as _), - ))?; - - let data = { - use arrow2::compute::concatenate::concatenate; - let chunks = chunks.iter().map(|chunk| &**chunk).collect::>(); - concatenate(&chunks).unwrap() - }; - - let table = arrow::format_table([data], [self.name.as_str()]); - f.write_fmt(format_args!("{table}\n"))?; - - Ok(()) + let (schema, columns) = self.serialize().map_err(|err| { + re_log::error_once!("couldn't display indexed bucket: {err}"); + std::fmt::Error + })?; + re_format::arrow::format_table( + columns.columns(), + schema.fields.iter().map(|field| field.name.as_str()), + ) + .fmt(f)?; + + writeln!(f) } } -// --- Components --- +// --- Timeless --- -impl std::fmt::Display for ComponentTable { +impl std::fmt::Display for PersistentIndexedTable { #[allow(clippy::string_add)] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { - name, - datatype, - buckets, + ent_path, + cluster_key: _, + col_insert_id: _, + col_row_id: _, + col_num_instances: _, + columns: _, } = self; - f.write_fmt(format_args!("name: {name}\n"))?; - if matches!( - std::env::var("RERUN_DATA_STORE_DISPLAY_SCHEMAS").as_deref(), - Ok("1") - ) { - f.write_fmt(format_args!("datatype: {datatype:#?}\n"))?; - } - - f.write_fmt(format_args!( - "size: {} buckets for a total of {} across {} total rows\n", - self.buckets.len(), - format_bytes(self.total_size_bytes() as _), - format_number(self.total_rows() as _), - ))?; - f.write_str("buckets: [\n")?; - for bucket in buckets { - f.write_str(&indent::indent_all_by(4, "ComponentBucket {\n"))?; - f.write_str(&indent::indent_all_by(8, bucket.to_string()))?; - f.write_str(&indent::indent_all_by(4, "}\n"))?; - } - f.write_str("]")?; - - Ok(()) - } -} + f.write_fmt(format_args!("entity: {ent_path}\n"))?; -impl std::fmt::Display for ComponentBucket { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!( "size: {} across {} rows\n", format_bytes(self.total_size_bytes() as _), - format_number(self.total_rows() as _), + format_number(self.num_rows() as _), ))?; - f.write_fmt(format_args!( - "row range: from {} to {} (all inclusive)\n", - self.row_offset, - // Component buckets can never be empty at the moment: - // - the first bucket is always initialized with a single empty row - // - all buckets that follow are lazily instantiated when data get inserted - // - // TODO(#439): is that still true with deletion? - // TODO(#589): support for non-unit-length chunks - self.row_offset - + self - .chunks - .len() - .checked_sub(1) - .expect("buckets are never empty") as u64, - ))?; - - f.write_fmt(format_args!("archived: {}\n", self.archived))?; - f.write_str("time ranges:\n")?; - for (timeline, time_range) in &self.time_ranges { - f.write_fmt(format_args!( - "{}\n", - &timeline.format_time_range(time_range) - ))?; - } - - let data = { - use arrow2::compute::concatenate::concatenate; - let chunks = self.chunks.iter().map(|chunk| &**chunk).collect::>(); - concatenate(&chunks).unwrap() - }; - - let table = arrow::format_table([data], [self.name.as_str()]); - f.write_fmt(format_args!("{table}\n"))?; - - Ok(()) + let (schema, columns) = self.serialize().map_err(|err| { + re_log::error_once!("couldn't display timeless indexed table: {err}"); + std::fmt::Error + })?; + re_format::arrow::format_table( + columns.columns(), + schema.fields.iter().map(|field| field.name.as_str()), + ) + .fmt(f)?; + + writeln!(f) } } diff --git a/crates/re_arrow_store/src/store_gc.rs b/crates/re_arrow_store/src/store_gc.rs index 5eca6a872ce1..e418763eac33 100644 --- a/crates/re_arrow_store/src/store_gc.rs +++ b/crates/re_arrow_store/src/store_gc.rs @@ -1,215 +1,287 @@ -use std::collections::HashMap; +use re_log_types::{RowId, SizeBytes as _, TimeInt, TimeRange}; -use arrow2::array::{Array, ListArray}; - -use re_log::trace; -use re_log_types::{ComponentName, TimeRange, Timeline}; - -use crate::{ComponentBucket, DataStore}; +use crate::{ + store::{IndexedBucketInner, IndexedTable}, + DataStore, DataStoreStats, +}; // --- #[derive(Debug, Clone, Copy)] pub enum GarbageCollectionTarget { - /// Try to drop _at least_ the given percentage. + /// Try to drop _at least_ the given fraction. /// - /// The percentage must be a float in the range [0.0 : 1.0]. - DropAtLeastPercentage(f64), + /// The fraction must be a float in the range [0.0 : 1.0]. + DropAtLeastFraction(f64), } impl std::fmt::Display for GarbageCollectionTarget { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - GarbageCollectionTarget::DropAtLeastPercentage(p) => f.write_fmt(format_args!( - "DropAtLeast({}%)", - re_format::format_f64(*p * 100.0) - )), + GarbageCollectionTarget::DropAtLeastFraction(p) => { + write!(f, "DropAtLeast({:.3}%)", re_format::format_f64(*p * 100.0)) + } } } } impl DataStore { - /// Triggers a garbage collection according to the desired `target`, driven by the specified - /// `primary_component` and `primary_timeline`. - /// Returns all the raw data that was removed from the store for the given `primary_component`. + /// Triggers a garbage collection according to the desired `target`. + /// + /// Garbage collection's performance is bounded by the number of buckets in each table (for + /// each `RowId`, we have to find the corresponding bucket, which is roughly `O(log(n))`) as + /// well as the number of rows in each of those buckets (for each `RowId`, we have to sort the + /// corresponding bucket (roughly `O(n*log(n))`) and then find the corresponding row (roughly + /// `O(log(n))`. + /// The size of the data itself has no impact on performance. + /// + /// Returns the list of `RowId`s that were purged from the store. + /// + /// ## Semantics + /// + /// Garbage collection works on a row-level basis and is driven by [`RowId`] order, + /// i.e. the order defined by the clients' wall-clocks, allowing it to drop data across + /// the different timelines + /// in a fair, deterministic manner. + /// Similarly, out-of-order data is supported out of the box. /// - /// This only affects component tables, indices are left as-is, effectively behaving as - /// tombstones. + /// The garbage collector doesn't deallocate data in and of itself: all it does is drop the + /// store's internal references to that data (the `DataCell`s), which will be deallocated once + /// their reference count reaches 0. /// - /// The garbage collection is based on _insertion order_, which makes it both very efficient - /// and very simple from an implementation standpoint. - /// The tradeoff is that the given `primary_timeline` is expected to roughly follow insertion - /// order, otherwise the behaviour is essentially undefined. - pub fn gc( - &mut self, - target: GarbageCollectionTarget, - primary_timeline: Timeline, - primary_component: ComponentName, - ) -> Vec> { + /// ## Limitations + /// + /// The garbage collector is currently unaware of our latest-at semantics, i.e. it will drop + /// old data even if doing so would impact the results of recent queries. + /// See . + // + // TODO(#1804): There shouldn't be any need to return the purged `RowId`s, all secondary + // datastructures should be able to purge themselves based solely off of + // [`DataStore::oldest_time_per_timeline`]. + // + // TODO(#1803): The GC should be aware of latest-at semantics and make sure they are upheld + // when purging data. + // + // TODO(#1823): Workload specific optimizations. + pub fn gc(&mut self, target: GarbageCollectionTarget) -> (Vec, DataStoreStats) { crate::profile_function!(); self.gc_id += 1; - let initial_nb_rows = self.total_temporal_component_rows(); - let initial_size_bytes = self.total_temporal_component_size_bytes() as f64; + // NOTE: only temporal data and row metadata get purged! + let stats_before = DataStoreStats::from_store(self); + let initial_num_rows = + stats_before.temporal.num_rows + stats_before.metadata_registry.num_rows; + let initial_num_bytes = + (stats_before.temporal.num_bytes + stats_before.metadata_registry.num_bytes) as f64; - let res = match target { - GarbageCollectionTarget::DropAtLeastPercentage(p) => { + let row_ids = match target { + GarbageCollectionTarget::DropAtLeastFraction(p) => { assert!((0.0..=1.0).contains(&p)); - let drop_at_least_size_bytes = initial_size_bytes * p; - let target_size_bytes = initial_size_bytes - drop_at_least_size_bytes; + let num_bytes_to_drop = initial_num_bytes * p; + let target_num_bytes = initial_num_bytes - num_bytes_to_drop; re_log::debug!( kind = "gc", id = self.gc_id, %target, - timeline = %primary_timeline.name(), - %primary_component, - initial_nb_rows = re_format::format_large_number(initial_nb_rows as _), - initial_size_bytes = re_format::format_bytes(initial_size_bytes), - target_size_bytes = re_format::format_bytes(target_size_bytes), - drop_at_least_size_bytes = re_format::format_bytes(drop_at_least_size_bytes), + initial_num_rows = re_format::format_large_number(initial_num_rows as _), + initial_num_bytes = re_format::format_bytes(initial_num_bytes), + target_num_bytes = re_format::format_bytes(target_num_bytes), + drop_at_least_num_bytes = re_format::format_bytes(num_bytes_to_drop), "starting GC" ); - self.gc_drop_at_least_size_bytes( - primary_timeline, - primary_component, - drop_at_least_size_bytes, - ) + self.gc_drop_at_least_num_bytes(num_bytes_to_drop) } }; #[cfg(debug_assertions)] self.sanity_check().unwrap(); - let new_nb_rows = self.total_temporal_component_rows(); - let new_size_bytes = self.total_temporal_component_size_bytes() as f64; + // NOTE: only temporal data and row metadata get purged! + let stats_after = DataStoreStats::from_store(self); + let new_num_rows = stats_after.temporal.num_rows + stats_after.metadata_registry.num_rows; + let new_num_bytes = + (stats_after.temporal.num_bytes + stats_after.metadata_registry.num_bytes) as f64; re_log::debug!( kind = "gc", id = self.gc_id, %target, - timeline = %primary_timeline.name(), - %primary_component, - initial_nb_rows = re_format::format_large_number(initial_nb_rows as _), - initial_size_bytes = re_format::format_bytes(initial_size_bytes), - new_nb_rows = re_format::format_large_number(new_nb_rows as _), - new_size_bytes = re_format::format_bytes(new_size_bytes), + initial_num_rows = re_format::format_large_number(initial_num_rows as _), + initial_num_bytes = re_format::format_bytes(initial_num_bytes), + new_num_rows = re_format::format_large_number(new_num_rows as _), + new_num_bytes = re_format::format_bytes(new_num_bytes), "GC done" ); - res + let stats_diff = stats_before - stats_after; + + (row_ids, stats_diff) } - fn gc_drop_at_least_size_bytes( - &mut self, - primary_timeline: Timeline, - primary_component: ComponentName, - mut drop_at_least_size_bytes: f64, - ) -> Vec> { - let mut dropped = Vec::>::new(); - - let mut i = 0usize; - while drop_at_least_size_bytes > 0.0 { - // Find and drop the earliest (in terms of _insertion order_) primary component bucket - // that we can find. - let Some(primary_bucket) = self - .components - .get_mut(&primary_component) - .and_then(|table| (table.buckets.len() > 1).then(|| table.buckets.pop_front())) - .flatten() - else { - trace!( - kind = "gc", - id = self.gc_id, - timeline = %primary_timeline.name(), - %primary_component, - iter = i, - remaining = re_format::format_bytes(drop_at_least_size_bytes), - "no more primary buckets, giving up" - ); + /// Tries to drop _at least_ `num_bytes_to_drop` bytes of data from the store. + /// + /// Returns the list of `RowId`s that were purged from the store. + fn gc_drop_at_least_num_bytes(&mut self, mut num_bytes_to_drop: f64) -> Vec { + crate::profile_function!(); + + let mut row_ids = Vec::new(); + + // The algorithm is straightforward: + // 1. Pop the oldest `RowId` available + // 2. Find all tables that potentially hold data associated with that `RowId` + // 3. Drop the associated row and account for the space we got back + while num_bytes_to_drop > 0.0 { + // pop next row id + let Some((row_id, timepoint)) = self.metadata_registry.pop_first() else { break; }; + let metadata_dropped_size_bytes = + row_id.total_size_bytes() + timepoint.total_size_bytes(); + self.metadata_registry.heap_size_bytes -= metadata_dropped_size_bytes; + num_bytes_to_drop -= metadata_dropped_size_bytes as f64; + row_ids.push(row_id); - drop_at_least_size_bytes -= primary_bucket.total_size_bytes() as f64; - - trace!( - kind = "gc", - id = self.gc_id, - timeline = %primary_timeline.name(), - %primary_component, - iter = i, - reclaimed = re_format::format_bytes(primary_bucket.total_size_bytes() as f64), - remaining = re_format::format_bytes(drop_at_least_size_bytes), - "found & dropped primary component bucket" - ); + // find all tables that could possibly contain this `RowId` + let tables = self.tables.iter_mut().filter_map(|((timeline, _), table)| { + timepoint.get(timeline).map(|time| (*time, table)) + }); - // From there, find and drop all component buckets (in _insertion order_) that do not - // contain any data more recent than the time range covered by the primary - // component bucket (for the primary timeline!). - for table in self - .components - .iter_mut() - .filter_map(|(component, table)| (*component != primary_component).then_some(table)) - { - while table.buckets.len() > 1 { - let bucket = table.buckets.front().unwrap(); - if primary_bucket.encompasses(primary_timeline, &bucket.time_ranges) { - let bucket = table.buckets.pop_front().unwrap(); - drop_at_least_size_bytes -= bucket.total_size_bytes() as f64; - trace!( - kind = "gc", - id = self.gc_id, - timeline = %primary_timeline.name(), - %primary_component, - iter = i, - reclaimed = re_format::format_bytes(bucket.total_size_bytes() as f64), - remaining = re_format::format_bytes(drop_at_least_size_bytes), - "found & dropped secondary component bucket" - ); - } else { - break; - } - } - - i += 1; + for (time, table) in tables { + num_bytes_to_drop -= table.try_drop_row(row_id, time.as_i64()) as f64; } + } + + row_ids + } +} - // We don't collect indices: they behave as tombstones. +impl IndexedTable { + /// Tries to drop the given `row_id` from the table, which is expected to be found at the + /// specified `time`. + /// + /// Returns how many bytes were actually dropped, or zero if the row wasn't found. + fn try_drop_row(&mut self, row_id: RowId, time: i64) -> u64 { + crate::profile_function!(); - dropped.extend(primary_bucket.chunks.into_iter().map(|chunk| { - chunk - .as_any() - .downcast_ref::>() - .unwrap() - .values() - .clone() - })); + let table_has_more_than_one_bucket = self.buckets.len() > 1; + + let (bucket_key, bucket) = self.find_bucket_mut(time.into()); + let bucket_num_bytes = bucket.total_size_bytes(); + + let mut dropped_num_bytes = { + let inner = &mut *bucket.inner.write(); + inner.try_drop_row(row_id, time) + }; + + // NOTE: We always need to keep at least one bucket alive, otherwise we have + // nowhere to write to. + if table_has_more_than_one_bucket && bucket.num_rows() == 0 { + // NOTE: We're dropping the bucket itself in this case, rather than just its + // contents. + debug_assert!( + dropped_num_bytes <= bucket_num_bytes, + "Bucket contained more bytes than it thought" + ); + dropped_num_bytes = bucket_num_bytes; + self.buckets.remove(&bucket_key); + + // NOTE: If this is the first bucket of the table that we've just removed, we need the + // next one to become responsible for `-∞`. + if bucket_key == TimeInt::MIN { + if let Some((_, bucket)) = self.buckets.pop_first() { + self.buckets.insert(TimeInt::MIN, bucket); + } + } } - dropped + self.buckets_size_bytes -= dropped_num_bytes; + self.buckets_num_rows -= (dropped_num_bytes > 0) as u64; + + dropped_num_bytes } } -impl ComponentBucket { - /// Does `self` fully encompass `time_ranges` for the given `primary_timeline`? - fn encompasses( - &self, - primary_timeline: Timeline, - time_ranges: &HashMap, - ) -> bool { - if let (Some(time_range1), Some(time_range2)) = ( - self.time_ranges.get(&primary_timeline), - time_ranges.get(&primary_timeline), - ) { - return time_range1.max >= time_range2.max; +impl IndexedBucketInner { + /// Tries to drop the given `row_id` from the table, which is expected to be found at the + /// specified `time`. + /// + /// Returns how many bytes were actually dropped, or zero if the row wasn't found. + fn try_drop_row(&mut self, row_id: RowId, time: i64) -> u64 { + crate::profile_function!(); + + self.sort(); + + let IndexedBucketInner { + is_sorted, + time_range, + col_time, + col_insert_id, + col_row_id, + col_num_instances, + columns, + size_bytes, + } = self; + + let mut dropped_num_bytes = 0u64; + + let mut row_index = col_time.partition_point(|&time2| time2 < time); + while col_time.get(row_index) == Some(&time) { + if col_row_id[row_index] != row_id { + row_index += 1; + continue; + } + + // Update the time_range min/max: + if col_time.len() == 1 { + // We removed the last row + *time_range = TimeRange::EMPTY; + } else { + *is_sorted = false; + + // We have at least two rows, so we can safely [index] here: + if row_index == 0 { + // We removed the first row, so the second row holds the new min + time_range.min = col_time[1].into(); + } + if row_index + 1 == col_time.len() { + // We removed the last row, so the penultimate row holds the new max + time_range.max = col_time[row_index - 1].into(); + } + } + + // col_row_id + let removed_row_id = col_row_id.swap_remove(row_index); + debug_assert_eq!(row_id, removed_row_id); + dropped_num_bytes += removed_row_id.total_size_bytes(); + + // col_time + let row_time = col_time.swap_remove(row_index); + dropped_num_bytes += row_time.total_size_bytes(); + + // col_insert_id (if present) + if !col_insert_id.is_empty() { + dropped_num_bytes += col_insert_id.swap_remove(row_index).total_size_bytes(); + } + + // col_num_instances + dropped_num_bytes += col_num_instances.swap_remove(row_index).total_size_bytes(); + + // each data column + for column in columns.values_mut() { + dropped_num_bytes += column.0.swap_remove(row_index).total_size_bytes(); + } + + // NOTE: A single `RowId` cannot possibly have more than one datapoint for + // a single timeline. + break; } - // There's only one way this can happen: this is a bucket that only holds the fake row at - // offset #0. - // Ignore it. - true + *size_bytes -= dropped_num_bytes; + + dropped_num_bytes } } diff --git a/crates/re_arrow_store/src/store_polars.rs b/crates/re_arrow_store/src/store_polars.rs index fb095fcfb62f..00a233c70d6f 100644 --- a/crates/re_arrow_store/src/store_polars.rs +++ b/crates/re_arrow_store/src/store_polars.rs @@ -1,21 +1,23 @@ +#![allow(clippy::all, unused_variables, dead_code)] + use std::collections::BTreeSet; use arrow2::{ - array::{new_empty_array, Array, BooleanArray, ListArray, UInt64Array, Utf8Array}, + array::{new_empty_array, Array, BooleanArray, ListArray, Utf8Array}, bitmap::Bitmap, compute::concatenate::concatenate, offset::Offsets, }; -use nohash_hasher::IntMap; use polars_core::{functions::diag_concat_df, prelude::*}; -use re_log_types::ComponentName; +use re_log_types::{ComponentName, DataCell, DataTable}; use crate::{ - store::SecondaryIndex, ArrayExt, DataStore, DataStoreConfig, IndexBucket, IndexBucketIndices, - PersistentIndexTable, RowIndex, + store::InsertIdVec, ArrayExt, DataStore, DataStoreConfig, IndexedBucket, IndexedBucketInner, + PersistentIndexedTable, }; // TODO(#1692): all of this stuff should be defined by Data{Cell,Row,Table}, not the store. +// TODO(#1619): remove this and reimplement it on top of store serialization // --- @@ -29,7 +31,7 @@ impl DataStore { const TIMELESS_COL: &str = "_is_timeless"; - let timeless_dfs = self.timeless_indices.values().map(|index| { + let timeless_dfs = self.timeless_tables.values().map(|index| { let ent_path = index.ent_path.clone(); let mut df = index.to_dataframe(self, &self.config); @@ -45,7 +47,7 @@ impl DataStore { (ent_path, df.clone()) }); - let temporal_dfs = self.indices.values().map(|index| { + let temporal_dfs = self.tables.values().map(|index| { let dfs: Vec<_> = index .buckets .values() @@ -109,7 +111,7 @@ impl DataStore { // Arrange the columns in the order that makes the most sense as a user. let timelines: BTreeSet<&str> = self - .indices + .tables .keys() .map(|(timeline, _)| timeline.name().as_str()) .collect(); @@ -159,7 +161,7 @@ impl DataStore { } } -impl PersistentIndexTable { +impl PersistentIndexedTable { /// Dumps the entire table as a flat, denormalized dataframe. /// /// This cannot fail: it always tries to yield as much valuable information as it can, even in @@ -170,24 +172,25 @@ impl PersistentIndexTable { let Self { ent_path: _, cluster_key: _, - num_rows, - indices, - all_components: _, + col_insert_id, + col_row_id, + col_num_instances, + columns, } = self; + let num_rows = self.num_rows() as usize; + let insert_ids = config .store_insert_ids - .then(|| insert_ids_as_series(*num_rows as usize, indices)) - .flatten(); + .then(|| insert_ids_as_series(&col_insert_id)); let comp_series = // One column for insert IDs, if they are available. std::iter::once(insert_ids) .flatten() // filter options - // One column for each component index. - .chain(indices.iter().filter_map(|(component, comp_row_nrs)| { - let datatype = find_component_datatype(store, component)?; - component_as_series(store, *num_rows as usize, datatype, *component, comp_row_nrs).into() + .chain(columns.iter().filter_map(|(component, cells)| { + let datatype = store.lookup_datatype(component)?.clone(); + column_as_series(store, num_rows, datatype, *component, cells).into() })); DataFrame::new(comp_series.collect::>()) @@ -197,7 +200,7 @@ impl PersistentIndexTable { } } -impl IndexBucket { +impl IndexedBucket { /// Dumps the entire bucket as a flat, denormalized dataframe. /// /// This cannot fail: it always tries to yield as much valuable information as it can, even in @@ -205,20 +208,28 @@ impl IndexBucket { pub fn to_dataframe(&self, store: &DataStore, config: &DataStoreConfig) -> DataFrame { crate::profile_function!(); - let (_, times) = self.times(); - let num_rows = times.len(); - - let IndexBucketIndices { + let IndexedBucketInner { is_sorted: _, time_range: _, - times: _, - indices, - } = &*self.indices.read(); + col_time, + col_insert_id, + col_row_id, + col_num_instances, + columns, + size_bytes: _, + } = &*self.inner.read(); + + let (_, times) = DataTable::serialize_primitive_column( + self.timeline.name(), + col_time, + self.timeline.datatype().into(), + ) + .unwrap(); + let num_rows = times.len(); let insert_ids = config .store_insert_ids - .then(|| insert_ids_as_series(num_rows, indices)) - .flatten(); + .then(|| insert_ids_as_series(&col_insert_id)); // Need to create one `Series` for the time index and one for each component index. let comp_series = [ @@ -227,16 +238,16 @@ impl IndexBucket { // One column for the time index. Some(new_infallible_series( self.timeline.name().as_str(), - ×, + &*times, num_rows, )), ] .into_iter() .flatten() // filter options // One column for each component index. - .chain(indices.iter().filter_map(|(component, comp_row_nrs)| { - let datatype = find_component_datatype(store, component)?; - component_as_series(store, num_rows, datatype, *component, comp_row_nrs).into() + .chain(columns.iter().filter_map(|(component, cells)| { + let datatype = store.lookup_datatype(component)?.clone(); + column_as_series(store, num_rows, datatype, *component, cells).into() })); DataFrame::new(comp_series.collect::>()) @@ -248,67 +259,41 @@ impl IndexBucket { // --- -fn insert_ids_as_series( - num_rows: usize, - indices: &IntMap, -) -> Option { +fn insert_ids_as_series(col_insert_id: &InsertIdVec) -> Series { crate::profile_function!(); - indices.get(&DataStore::insert_id_key()).map(|insert_ids| { - let insert_ids = insert_ids - .iter() - .map(|id| id.map(|id| id.0.get())) - .collect::>(); - let insert_ids = UInt64Array::from(insert_ids); - new_infallible_series(DataStore::insert_id_key().as_str(), &insert_ids, num_rows) - }) + let insert_ids = arrow2::array::UInt64Array::from_slice(col_insert_id.as_slice()); + new_infallible_series( + DataStore::insert_id_key().as_str(), + &insert_ids, + insert_ids.len(), + ) } -fn find_component_datatype( - store: &DataStore, - component: &ComponentName, -) -> Option { - crate::profile_function!(); - - let timeless = store - .timeless_components - .get(component) - .map(|table| table.datatype.clone()); - let temporal = store - .components - .get(component) - .map(|table| table.datatype.clone()); - timeless.or(temporal) -} - -fn component_as_series( +fn column_as_series( store: &DataStore, num_rows: usize, datatype: arrow2::datatypes::DataType, component: ComponentName, - comp_row_nrs: &[Option], + cells: &[Option], ) -> Series { crate::profile_function!(); - let components = &[component]; - - // For each row in the index, grab the associated data from the component tables. - let comp_rows: Vec> = comp_row_nrs - .iter() - .cloned() - .map(|comp_row_nr| store.get(components, &[comp_row_nr])[0].clone()) - .collect(); - // Computing the validity bitmap is just a matter of checking whether the data was // available in the component tables. - let comp_validity: Vec<_> = comp_rows.iter().map(|row| row.is_some()).collect(); + let comp_validity: Vec<_> = cells.iter().map(|cell| cell.is_some()).collect(); // Each cell is actually a list, so we need to compute offsets one cell at a time. - let comp_lengths = comp_rows - .iter() - .map(|row| row.as_ref().map_or(0, |row| row.len())); + let comp_lengths = cells.iter().map(|cell| { + cell.as_ref() + .map_or(0, |cell| cell.num_instances() as usize) + }); - let comp_values: Vec<_> = comp_rows.iter().flatten().map(|row| row.as_ref()).collect(); + let comp_values: Vec<_> = cells + .iter() + .flatten() + .map(|cell| cell.as_arrow_ref()) + .collect(); // Bring everything together into one big list. let comp_values = ListArray::::new( @@ -358,13 +343,6 @@ fn sort_df_columns( all.remove(all.binary_search(&"entity").expect("has to exist")); - if store_insert_ids { - all.remove( - all.binary_search(&DataStore::insert_id_key().as_str()) - .expect("has to exist"), - ); - } - let timelines = timelines.iter().copied().map(Some).collect::>(); let native_components = all @@ -382,7 +360,7 @@ fn sort_df_columns( .collect::>(); [ - vec![store_insert_ids.then(|| DataStore::insert_id_key().as_str())], + // vec![store_insert_ids.then(|| DataStore::insert_id_key().as_str())], timelines, vec![Some("entity")], native_components, diff --git a/crates/re_arrow_store/src/store_read.rs b/crates/re_arrow_store/src/store_read.rs index 00dce413215f..02c753ce4be3 100644 --- a/crates/re_arrow_store/src/store_read.rs +++ b/crates/re_arrow_store/src/store_read.rs @@ -1,20 +1,18 @@ use std::{ops::RangeBounds, sync::atomic::Ordering}; -use arrow2::array::{Array, ListArray}; - use itertools::Itertools; +use nohash_hasher::IntSet; use re_log::trace; -use re_log_types::{ComponentName, EntityPath, MsgId, TimeInt, TimePoint, TimeRange, Timeline}; - -use crate::{ - ComponentBucket, ComponentTable, DataStore, IndexBucket, IndexBucketIndices, IndexRowNr, - IndexTable, PersistentComponentTable, PersistentIndexTable, RowIndex, RowIndexKind, - SecondaryIndex, +use re_log_types::{ + ComponentName, DataCell, EntityPath, RowId, TimeInt, TimePoint, TimeRange, Timeline, }; +use smallvec::SmallVec; + +use crate::{DataStore, IndexedBucket, IndexedBucketInner, IndexedTable, PersistentIndexedTable}; // --- Queries --- -/// A query a given time, for a given timeline. +/// A query at a given time, for a given timeline. /// /// Get the latest version of the data available at this time. #[derive(Clone)] @@ -76,13 +74,13 @@ impl RangeQuery { // --- Data store --- impl DataStore { - /// Retrieve all the `ComponentName`s that have been written to for a given `EntityPath` on - /// a specific `Timeline`. + /// Retrieve all the [`ComponentName`]s that have been written to for a given [`EntityPath`] on + /// a specific [`Timeline`]. /// /// # Temporal semantics /// - /// In addition to the temporal results, this also includes all `ComponentName`s present in - /// the timeless indices for this entity. + /// In addition to the temporal results, this also includes all [`ComponentName`]s present in + /// the timeless tables for this entity. pub fn all_components( &self, timeline: &Timeline, @@ -103,15 +101,15 @@ impl DataStore { "query started..." ); - let timeless = self - .timeless_indices + let timeless: Option> = self + .timeless_tables .get(&ent_path_hash) - .map(|index| &index.all_components); + .map(|table| table.columns.keys().copied().collect()); let temporal = self - .indices + .tables .get(&(*timeline, ent_path_hash)) - .map(|index| &index.all_components); + .map(|table| &table.all_components); let components = match (timeless, temporal) { (None, Some(temporal)) => temporal.iter().cloned().collect_vec(), @@ -132,35 +130,30 @@ impl DataStore { Some(components) } - /// Queries the datastore for the internal row indices of the specified `components`, as seen - /// from the point of view of the so-called `primary` component. + /// Queries the datastore for the cells of the specified `components`, as seen from the point + /// of view of the so-called `primary` component. /// - /// Returns an array of row indices on success, or `None` otherwise. - /// Success is defined by one thing and thing only: whether a row index could be found for the + /// Returns an array of [`DataCell`]s on success, or `None` otherwise. + /// Success is defined by one thing and thing only: whether a cell could be found for the /// `primary` component. /// The presence or absence of secondary components has no effect on the success criteria. /// - /// * On success, the returned array is filled with the internal row index of each and every - /// component in `components`, or `None` if said component is not available in that row. - /// - /// To actually retrieve the data associated with these indices, see [`Self::get`]. - /// /// # Temporal semantics /// - /// Temporal indices take precedence, then timeless indices are queried to fill the holes left + /// Temporal indices take precedence, then timeless tables are queried to fill the holes left /// by missing temporal data. /// /// ## Example /// - /// The following example demonstrate how to fetch the latest row indices for a given - /// component and the associated cluster key, then get the corresponding data using these row - /// indices, and finally turn everything into a nice-to-work-with polars's dataframe. + /// The following example demonstrate how to fetch the latest cells for a given component + /// and its associated cluster key, and wrap the result into a nice-to-work-with polars's + /// dataframe. /// /// ```rust /// # use polars_core::{prelude::*, series::Series}; - /// # use re_log_types::{ComponentName, EntityPath as EntityPath, TimeInt}; + /// # use re_log_types::{ComponentName, EntityPath, RowId, TimeInt}; /// # use re_arrow_store::{DataStore, LatestAtQuery, RangeQuery}; - /// + /// # /// pub fn latest_component( /// store: &DataStore, /// query: &LatestAtQuery, @@ -170,16 +163,19 @@ impl DataStore { /// let cluster_key = store.cluster_key(); /// /// let components = &[cluster_key, primary]; - /// let row_indices = store - /// .latest_at(query, ent_path, primary, components) - /// .unwrap_or([None; 2]); - /// let results = store.get(components, &row_indices); + /// let (_, cells) = store + /// .latest_at(&query, ent_path, primary, components) + /// .unwrap_or((RowId::ZERO, [(); 2].map(|_| None))); /// - /// let series: Result, _> = components + /// let series: Result, _> = cells /// .iter() - /// .zip(results) - /// .filter_map(|(component, col)| col.map(|col| (component, col))) - /// .map(|(&component, col)| Series::try_from((component.as_str(), col))) + /// .flatten() + /// .map(|cell| { + /// Series::try_from(( + /// cell.component_name().as_str(), + /// cell.to_arrow(), + /// )) + /// }) /// .collect(); /// /// DataFrame::new(series?).map_err(Into::into) @@ -197,7 +193,7 @@ impl DataStore { ent_path: &EntityPath, primary: ComponentName, components: &[ComponentName; N], - ) -> Option<[Option; N]> { + ) -> Option<(RowId, [Option; N])> { crate::profile_function!(); // TODO(cmc): kind & query_id need to somehow propagate through the span system. @@ -215,62 +211,64 @@ impl DataStore { "query started..." ); - let row_indices = self - .indices + let cells = self + .tables .get(&(query.timeline, ent_path_hash)) - .and_then(|index| { - let row_indices = index.latest_at(query.at, primary, components); + .and_then(|table| { + let cells = table.latest_at(query.at, primary, components); trace!( kind = "latest_at", query = ?query, entity = %ent_path, %primary, ?components, - ?row_indices, timeless = false, - "row indices fetched" + "row cells fetched" ); - row_indices + cells }); - // If we've found everything we were looking for in the temporal index, then we can + // If we've found everything we were looking for in the temporal table, then we can // return the results immediately. - if row_indices.map_or(false, |row_indices| row_indices.iter().all(Option::is_some)) { - return row_indices; + if cells + .as_ref() + .map_or(false, |(_, cells)| cells.iter().all(Option::is_some)) + { + return cells; } - let row_indices_timeless = self.timeless_indices.get(&ent_path_hash).and_then(|index| { - let row_indices = index.latest_at(primary, components); + let cells_timeless = self.timeless_tables.get(&ent_path_hash).and_then(|table| { + let cells = table.latest_at(primary, components); trace!( kind = "latest_at", query = ?query, entity = %ent_path, %primary, ?components, - ?row_indices, + ?cells, timeless = true, - "row indices fetched" + "cells fetched" ); - row_indices + cells }); - // Otherwise, let's see what's in the timeless index, and then..: - match (row_indices, row_indices_timeless) { - // nothing in the timeless index: return those partial row indices we got. - (Some(row_indices), None) => return Some(row_indices), - // no temporal row indices, but some timeless ones: return those as-is. - (None, Some(row_indices_timeless)) => return Some(row_indices_timeless), - // we have both temporal & timeless indices: let's merge the two when it makes sense + // Otherwise, let's see what's in the timeless table, and then..: + match (cells, cells_timeless) { + // nothing in the timeless table: return those partial cells we got. + (Some(cells), None) => return Some(cells), + // no temporal cells, but some timeless ones: return those as-is. + (None, Some(cells_timeless)) => return Some(cells_timeless), + // we have both temporal & timeless cells: let's merge the two when it makes sense // and return the end result. - (Some(mut row_indices), Some(row_indices_timeless)) => { - for (i, row_idx) in row_indices_timeless.into_iter().enumerate() { - if row_indices[i].is_none() { - row_indices[i] = row_idx; + (Some((row_id, mut cells)), Some((_, cells_timeless))) => { + for (i, row_idx) in cells_timeless.into_iter().enumerate() { + if cells[i].is_none() { + cells[i] = row_idx; } } - return Some(row_indices); + return Some((row_id, cells)); } - // no row indices at all. + // no cells at all. (None, None) => {} } @@ -286,20 +284,16 @@ impl DataStore { None } - /// Iterates the datastore in order to return the internal row indices of the the specified - /// `components`, as seen from the point of view of the so-called `primary` component, for the - /// given time range. + /// Iterates the datastore in order to return the cells of the the specified `components`, + /// as seen from the point of view of the so-called `primary` component, for the given time + /// range. /// /// For each and every relevant row that is found, the returned iterator will yield an array - /// that is filled with the internal row index of each and every component in `components`, - /// or `None` if said component is not available in that row. + /// that is filled with the cells of each and every component in `components`, or `None` if + /// said component is not available in that row. /// A row is considered iff it contains data for the `primary` component. /// - /// This method cannot fail! If there's no data to return (whether that's due to a missing - /// primary index, missing secondary components, an empty point-of-view...), then an empty - /// iterator is returned. - /// - /// To actually retrieve the data associated with these indices, see [`Self::get`]. + /// This method cannot fail! If there's no data to return, an empty iterator is returned. /// /// ⚠ Contrary to latest-at queries, range queries can and will yield multiple rows for a /// single timestamp if that timestamp happens to hold multiple entries for the `primary` @@ -311,39 +305,41 @@ impl DataStore { /// /// Yields the contents of the temporal indices. /// Iff the query's time range starts at `TimeInt::MIN`, this will yield the contents of the - /// timeless indices before anything else. + /// timeless tables before anything else. /// /// When yielding timeless entries, the associated time will be `None`. /// /// ## Example /// - /// The following example demonstrate how to range over the row indices of a given - /// component and its associated cluster key, then get the corresponding data using these - /// row indices, and finally turn everything into a nice-to-work-with iterator of - /// polars's dataframe. - /// Additionally, it yields the latest-at state of the component a the start of the time range, + /// The following example demonstrate how to range over the cells of a given + /// component and its associated cluster key, and turn the results into a nice-to-work-with + /// iterator of polars's dataframe. + /// Additionally, it yields the latest-at state of the component at the start of the time range, /// if available. /// /// ```rust /// # use arrow2::array::Array; /// # use polars_core::{prelude::*, series::Series}; - /// # use re_log_types::{ComponentName, EntityPath as EntityPath, TimeInt}; + /// # use re_log_types::{ComponentName, DataCell, EntityPath, RowId, TimeInt}; /// # use re_arrow_store::{DataStore, LatestAtQuery, RangeQuery}; - /// - /// # pub fn dataframe_from_results( - /// # components: &[ComponentName; N], - /// # results: [Option>; N], + /// # + /// # pub fn dataframe_from_cells( + /// # cells: [Option; N], /// # ) -> anyhow::Result { - /// # let series: Result, _> = components + /// # let series: Result, _> = cells /// # .iter() - /// # .zip(results) - /// # .filter_map(|(component, col)| col.map(|col| (component, col))) - /// # .map(|(&component, col)| Series::try_from((component.as_str(), col))) + /// # .flatten() + /// # .map(|cell| { + /// # Series::try_from(( + /// # cell.component_name().as_str(), + /// # cell.to_arrow(), + /// # )) + /// # }) /// # .collect(); /// # /// # DataFrame::new(series?).map_err(Into::into) /// # } - /// + /// # /// pub fn range_component<'a>( /// store: &'a DataStore, /// query: &'a RangeQuery, @@ -358,11 +354,10 @@ impl DataStore { /// let latest_time = query.range.min.as_i64().saturating_sub(1).into(); /// let df_latest = { /// let query = LatestAtQuery::new(query.timeline, latest_time); - /// let row_indices = store + /// let (_, cells) = store /// .latest_at(&query, ent_path, primary, &components) - /// .unwrap_or([None; 2]); - /// let results = store.get(&components, &row_indices); - /// dataframe_from_results(&components, results) + /// .unwrap_or((RowId::ZERO, [(); 2].map(|_| None))); + /// dataframe_from_cells(cells) /// }; /// /// // Send the latest-at state before anything else.. @@ -370,10 +365,7 @@ impl DataStore { /// // ..but only if it's not an empty dataframe. /// .filter(|df| df.as_ref().map_or(true, |(_, df)| !df.is_empty())) /// .chain(store.range(query, ent_path, components).map( - /// move |(time, _, row_indices)| { - /// let results = store.get(&components, &row_indices); - /// dataframe_from_results(&components, results).map(|df| (time, df)) - /// }, + /// move |(time, _, cells)| dataframe_from_cells(cells).map(|df| (time, df)) /// )) /// } /// ``` @@ -389,7 +381,7 @@ impl DataStore { query: &RangeQuery, ent_path: &EntityPath, components: [ComponentName; N], - ) -> impl Iterator, IndexRowNr, [Option; N])> + 'a { + ) -> impl Iterator, RowId, [Option; N])> + 'a { // Beware! This merely measures the time it takes to gather all the necessary metadata // for building the returned iterator. crate::profile_function!(); @@ -409,21 +401,21 @@ impl DataStore { ); let temporal = self - .indices + .tables .get(&(query.timeline, ent_path_hash)) .map(|index| index.range(query.range, components)) .into_iter() .flatten() - .map(|(time, idx_row_nr, row_indices)| (Some(time), idx_row_nr, row_indices)); + .map(|(time, row_id, cells)| (Some(time), row_id, cells)); if query.range.min == TimeInt::MIN { let timeless = self - .timeless_indices + .timeless_tables .get(&ent_path_hash) .map(|index| { index .range(components) - .map(|(idx_row_nr, row_indices)| (None, idx_row_nr, row_indices)) + .map(|(row_id, cells)| (None, row_id, cells)) }) .into_iter() .flatten(); @@ -433,226 +425,34 @@ impl DataStore { } } - /// Retrieves the data associated with a list of `components` at the specified `indices`. - /// - /// If the associated data is found, it will be written into the returned array at the - /// appropriate index, or `None` otherwise. - /// - /// `row_indices` takes a list of options so that one can easily re-use the results obtained - /// from [`Self::latest_at`] & [`Self::range`]. - pub fn get( - &self, - components: &[ComponentName; N], - row_indices: &[Option; N], - ) -> [Option>; N] { - crate::profile_function!(); - - let mut results = [(); N].map(|_| None); // work around non-Copy const initialization limitations - - for (i, &component, row_idx) in components - .iter() - .zip(row_indices) - .enumerate() - .filter_map(|(i, (comp, row_idx))| row_idx.map(|row_idx| (i, comp, row_idx))) - { - match row_idx.kind() { - RowIndexKind::Timeless => { - let row = self - .timeless_components - .get(&component) - .map(|table| table.get(row_idx)); - results[i] = row; - } - RowIndexKind::Temporal => { - let row = self - .components - .get(&component) - .and_then(|table| table.get(row_idx)); - results[i] = row; - } - } - } - - results - } - - pub fn get_msg_metadata(&self, msg_id: &MsgId) -> Option<&TimePoint> { + pub fn get_msg_metadata(&self, row_id: &RowId) -> Option<&TimePoint> { crate::profile_function!(); - self.messages.get(msg_id) + self.metadata_registry.get(row_id) } /// Sort all unsorted indices in the store. pub fn sort_indices_if_needed(&mut self) { - for index in self.indices.values_mut() { + for index in self.tables.values_mut() { index.sort_indices_if_needed(); } } - - /// Returns a read-only iterator over the raw index tables. - /// - /// Do _not_ use this to try and test the internal state of the datastore. - pub fn iter_indices( - &self, - ) -> impl ExactSizeIterator { - self.indices.iter().map(|((timeline, _), table)| { - ((*timeline, table.ent_path.clone() /* shallow */), table) - }) - } -} - -// --- Persistent Indices --- - -impl PersistentIndexTable { - /// Returns `None` iff no row index could be found for the `primary` component. - pub fn latest_at( - &self, - primary: ComponentName, - components: &[ComponentName; N], - ) -> Option<[Option; N]> { - if self.num_rows == 0 { - return None; - } - - // Early-exit if this bucket is unaware of this component. - let index = self.indices.get(&primary)?; - - crate::profile_function!(); - - trace!( - kind = "latest_at", - %primary, - ?components, - timeless = true, - "searching for primary & secondary row indices..." - ); - - // find the primary index's row. - let primary_idx = self.num_rows - 1; - - trace!( - kind = "latest_at", - %primary, - ?components, - %primary_idx, - timeless = true, - "found primary index", - ); - - // find the secondary indices' rows, and the associated row indices. - let mut secondary_idx = primary_idx as i64; - while index[secondary_idx as usize].is_none() { - secondary_idx -= 1; - if secondary_idx < 0 { - trace!( - kind = "latest_at", - %primary, - ?components, - timeless = true, - %primary_idx, - "no secondary index found", - ); - return None; - } - } - - trace!( - kind = "latest_at", - %primary, - ?components, - timeless = true, - %primary_idx, %secondary_idx, - "found secondary index", - ); - debug_assert!(index[secondary_idx as usize].is_some()); - - let mut row_indices = [None; N]; - for (i, component) in components.iter().enumerate() { - if let Some(index) = self.indices.get(component) { - if let Some(row_idx) = index[secondary_idx as usize] { - trace!( - kind = "latest_at", - %primary, - %component, - timeless = true, - %primary_idx, %secondary_idx, %row_idx, - "found row index", - ); - row_indices[i] = Some(row_idx); - } - } - } - - Some(row_indices) - } - - /// Returns an empty iterator if no data could be found for any reason. - pub fn range( - &self, - components: [ComponentName; N], - ) -> impl Iterator; N])> + '_ { - // Early-exit if the table is unaware of any of our components of interest. - if components - .iter() - .all(|component| self.indices.get(component).is_none()) - { - return itertools::Either::Right(std::iter::empty()); - } - - // Beware! This merely measures the time it takes to gather all the necessary metadata - // for building the returned iterator. - crate::profile_function!(); - - // TODO(cmc): Cloning these is obviously not great and will need to be addressed at - // some point. - // But, really, it's not _that_ bad either: these are integers and e.g. with the default - // configuration there are only 1024 of them (times the number of components). - let comp_indices = self.indices.clone(); - - let row_indices = (0..self.num_rows).filter_map(move |comp_idx_row_nr| { - let comp_idx_row_nr = IndexRowNr(comp_idx_row_nr); - - let mut row_indices = [None; N]; - for (i, component) in components.iter().enumerate() { - if let Some(index) = comp_indices.get(component) { - if let Some(row_idx) = index[comp_idx_row_nr.0 as usize] { - row_indices[i] = Some(row_idx); - } - } - } - - // We only yield rows that contain data for at least one of the components of - // interest. - if row_indices.iter().all(Option::is_none) { - return None; - } - - trace!( - kind = "range", - ?components, - timeless = true, - %comp_idx_row_nr, - ?row_indices, - "yielding row indices", - ); - - Some((comp_idx_row_nr, row_indices)) - }); - - itertools::Either::Left(row_indices) - } } -// --- Indices --- +// --- Temporal --- -impl IndexTable { - /// Returns `None` iff no row index could be found for the `primary` component. +impl IndexedTable { + /// Queries the table for the cells of the specified `components`, as seen from the point + /// of view of the so-called `primary` component. + /// + /// Returns an array of [`DataCell`]s on success, or `None` iff no cell could be found for + /// the `primary` component. pub fn latest_at( &self, time: TimeInt, primary: ComponentName, components: &[ComponentName; N], - ) -> Option<[Option; N]> { + ) -> Option<(RowId, [Option; N])> { crate::profile_function!(); // Early-exit if this entire table is unaware of this component. @@ -665,9 +465,9 @@ impl IndexTable { // The time we're looking for gives us an upper bound: all components must be indexed // in either this bucket _or any of those that come before_! // - // That is because secondary indices allow for null values, which forces us to not only - // walk backwards within an index bucket, but sometimes even walk backwards across - // multiple index buckets within the same table! + // That is because secondary columns allow for null values, which forces us to not only + // walk backwards within an indexed bucket, but sometimes even walk backwards across + // multiple indexed buckets within the same table! let buckets = self .range_buckets_rev(..=time) @@ -681,23 +481,32 @@ impl IndexTable { %primary, ?components, attempt, - bucket_time_range = timeline.typ().format_range(bucket.indices.read().time_range), + bucket_time_range = timeline.typ().format_range(bucket.inner.read().time_range), "found candidate bucket" ); - if let row_indices @ Some(_) = bucket.latest_at(time, primary, components) { - return row_indices; // found at least the primary component! + if let cells @ Some(_) = bucket.latest_at(time, primary, components) { + return cells; // found at least the primary component! } } None // primary component not found } - /// Returns an empty iterator if no data could be found for any reason. + /// Iterates the table in order to return the cells of the the specified `components`, + /// as seen from the point of view of the so-called `primary` component, for the given time + /// range. + /// + /// For each and every relevant row that is found, the returned iterator will yield an array + /// that is filled with the cells of each and every component in `components`, or `None` if + /// said component is not available in that row. + /// A row is considered iff it contains data for the `primary` component. + /// + /// This method cannot fail! If there's no data to return, an empty iterator is returned. pub fn range( &self, time_range: TimeRange, components: [ComponentName; N], - ) -> impl Iterator; N])> + '_ { + ) -> impl Iterator; N])> + '_ { // Beware! This merely measures the time it takes to gather all the necessary metadata // for building the returned iterator. crate::profile_function!(); @@ -715,7 +524,7 @@ impl IndexTable { kind = "range", bucket_nr, bucket_time_range = - timeline.typ().format_range(bucket.indices.read().time_range), + timeline.typ().format_range(bucket.inner.read().time_range), timeline = %timeline.name(), ?time_range, ?components, @@ -726,29 +535,31 @@ impl IndexTable { }) } - /// Returns the index bucket whose time range covers the given `time`. + /// Returns the indexed bucket whose time range covers the given `time`. /// - /// In addition to returning a reference to the `IndexBucket` itself, this also returns its + /// In addition to returning a reference to the `IndexedBucket` itself, this also returns its /// _indexing time_, which is different from its minimum time range bound! - /// See `IndexTable::buckets` for more information. - pub fn find_bucket(&self, time: TimeInt) -> (TimeInt, &IndexBucket) { + /// + /// See [`IndexedTable::buckets`] for more information. + pub fn find_bucket(&self, time: TimeInt) -> (TimeInt, &IndexedBucket) { crate::profile_function!(); // This cannot fail, `iter_bucket` is guaranteed to always yield at least one bucket, - // since index tables always spawn with a default bucket that covers [-∞;+∞]. + // since indexed tables always spawn with a default bucket that covers [-∞;+∞]. self.range_buckets_rev(..=time).next().unwrap() } - /// Returns the index bucket whose time range covers the given `time`. + /// Returns the indexed bucket whose time range covers the given `time`. /// - /// In addition to returning a reference to the `IndexBucket` itself, this also returns its + /// In addition to returning a reference to the `IndexedBucket` itself, this also returns its /// _indexing time_, which is different from its minimum time range bound! - /// See `IndexTable::buckets` for more information. - pub fn find_bucket_mut(&mut self, time: TimeInt) -> (TimeInt, &mut IndexBucket) { + /// + /// See [`IndexedTable::buckets`] for more information. + pub fn find_bucket_mut(&mut self, time: TimeInt) -> (TimeInt, &mut IndexedBucket) { crate::profile_function!(); // This cannot fail, `iter_bucket_mut` is guaranteed to always yield at least one bucket, - // since index tables always spawn with a default bucket that covers [-∞;+∞]. + // since indexed tables always spawn with a default bucket that covers [-∞;+∞]. self.range_bucket_rev_mut(..=time).next().unwrap() } @@ -757,13 +568,14 @@ impl IndexTable { /// /// It then continues yielding buckets until it runs out, in increasing time range order. /// - /// In addition to yielding references to the `IndexBucket`s themselves, this also returns + /// In addition to yielding references to the `IndexedBucket`s themselves, this also returns /// their _indexing times_, which are different from their minimum time range bounds! - /// See `IndexTable::buckets` for more information. + /// + /// See [`IndexedTable::buckets`] for more information. pub fn range_buckets( &self, time_range: impl RangeBounds, - ) -> impl Iterator { + ) -> impl Iterator { // Beware! This merely measures the time it takes to gather all the necessary metadata // for building the returned iterator. crate::profile_function!(); @@ -778,13 +590,14 @@ impl IndexTable { /// /// It then continues yielding buckets until it runs out, in decreasing time range order. /// - /// In addition to yielding references to the `IndexBucket`s themselves, this also returns + /// In addition to yielding references to the `IndexedBucket`s themselves, this also returns /// their _indexing times_, which are different from their minimum time range bounds! - /// See `IndexTable::buckets` for more information. + /// + /// See [`IndexedTable::buckets`] for more information. pub fn range_buckets_rev( &self, time_range: impl RangeBounds, - ) -> impl Iterator { + ) -> impl Iterator { // Beware! This merely measures the time it takes to gather all the necessary metadata // for building the returned iterator. crate::profile_function!(); @@ -800,13 +613,14 @@ impl IndexTable { /// /// It then continues yielding buckets until it runs out, in decreasing time range order. /// - /// In addition to yielding references to the `IndexBucket`s themselves, this also returns + /// In addition to yielding references to the `IndexedBucket`s themselves, this also returns /// their _indexing times_, which are different from their minimum time range bounds! - /// See `IndexTable::buckets` for more information. + /// + /// See [`IndexedTable::buckets`] for more information. pub fn range_bucket_rev_mut( &mut self, time_range: impl RangeBounds, - ) -> impl Iterator { + ) -> impl Iterator { // Beware! This merely measures the time it takes to gather all the necessary metadata // for building the returned iterator. crate::profile_function!(); @@ -817,54 +631,54 @@ impl IndexTable { .map(|(time, bucket)| (*time, bucket)) } - /// Sort all unsorted index buckets in this table. + /// Sort all unsorted indexed buckets in this table. pub fn sort_indices_if_needed(&self) { for bucket in self.buckets.values() { bucket.sort_indices_if_needed(); } } - - /// Returns a read-only iterator over the raw buckets. - /// - /// Do _not_ use this to try and test the internal state of the datastore. - pub fn iter_buckets(&self) -> impl ExactSizeIterator { - self.buckets.values() - } } -impl IndexBucket { +impl IndexedBucket { /// Sort all component indices by time, provided that's not already the case. pub fn sort_indices_if_needed(&self) { - if self.indices.read().is_sorted { + if self.inner.read().is_sorted { return; // early read-only exit } crate::profile_scope!("sort"); - self.indices.write().sort(); + self.inner.write().sort(); } - /// Returns `None` iff no row index could be found for the `primary` component. + /// Queries the bucket for the cells of the specified `components`, as seen from the point + /// of view of the so-called `primary` component. + /// + /// Returns an array of [`DataCell`]s on success, or `None` iff no cell could be found for + /// the `primary` component. pub fn latest_at( &self, time: TimeInt, primary: ComponentName, components: &[ComponentName; N], - ) -> Option<[Option; N]> { + ) -> Option<(RowId, [Option; N])> { crate::profile_function!(); + self.sort_indices_if_needed(); - let IndexBucketIndices { + let IndexedBucketInner { is_sorted, time_range: _, - times, - indices, - } = &*self.indices.read(); + col_time, + col_insert_id: _, + col_row_id, + col_num_instances: _, + columns, + size_bytes: _, + } = &*self.inner.read(); debug_assert!(is_sorted); // Early-exit if this bucket is unaware of this component. - let index = indices.get(&primary)?; - - crate::profile_function!(); + let column = columns.get(&primary)?; trace!( kind = "latest_at", @@ -872,48 +686,47 @@ impl IndexBucket { ?components, timeline = %self.timeline.name(), time = self.timeline.typ().format(time), - "searching for primary & secondary row indices..." + "searching for primary & secondary cells..." ); - // find the primary index's row. - let primary_idx = times.partition_point(|t| *t <= time.as_i64()) as i64; + let time_row_nr = col_time.partition_point(|t| *t <= time.as_i64()) as i64; // The partition point is always _beyond_ the index that we're looking for. // A partition point of 0 thus means that we're trying to query for data that lives // _before_ the beginning of time... there's nothing to be found there. - if primary_idx == 0 { + if time_row_nr == 0 { return None; } // The partition point is always _beyond_ the index that we're looking for; we need // to step back to find what we came for. - let primary_idx = primary_idx - 1; + let primary_row_nr = time_row_nr - 1; trace!( kind = "latest_at", %primary, ?components, timeline = %self.timeline.name(), time = self.timeline.typ().format(time), - %primary_idx, - "found primary index", + %primary_row_nr, + "found primary row number", ); - // find the secondary indices' rows, and the associated row indices. - let mut secondary_idx = primary_idx; - while index[secondary_idx as usize].is_none() { - secondary_idx -= 1; - if secondary_idx < 0 { + // find the secondary row number, and the associated cells. + let mut secondary_row_nr = primary_row_nr; + while column[secondary_row_nr as usize].is_none() { + if secondary_row_nr == 0 { trace!( kind = "latest_at", %primary, ?components, timeline = %self.timeline.name(), time = self.timeline.typ().format(time), - %primary_idx, - "no secondary index found", + %primary_row_nr, + "no secondary row number found", ); return None; } + secondary_row_nr -= 1; } trace!( @@ -922,46 +735,59 @@ impl IndexBucket { ?components, timeline = %self.timeline.name(), time = self.timeline.typ().format(time), - %primary_idx, %secondary_idx, - "found secondary index", + %primary_row_nr, %secondary_row_nr, + "found secondary row number", ); - debug_assert!(index[secondary_idx as usize].is_some()); + debug_assert!(column[secondary_row_nr as usize].is_some()); - let mut row_indices = [None; N]; + let mut cells = [(); N].map(|_| None); for (i, component) in components.iter().enumerate() { - if let Some(index) = indices.get(component) { - if let Some(row_idx) = index[secondary_idx as usize] { + if let Some(column) = columns.get(component) { + if let Some(cell) = &column[secondary_row_nr as usize] { trace!( kind = "latest_at", %primary, %component, timeline = %self.timeline.name(), time = self.timeline.typ().format(time), - %primary_idx, %secondary_idx, %row_idx, - "found row index", + %primary_row_nr, %secondary_row_nr, + "found cell", ); - row_indices[i] = Some(row_idx); + cells[i] = Some(cell.clone() /* shallow */); } } } - Some(row_indices) + Some((col_row_id[secondary_row_nr as usize], cells)) } - /// Returns an empty iterator if no data could be found for any reason. + /// Iterates the bucket in order to return the cells of the the specified `components`, + /// as seen from the point of view of the so-called `primary` component, for the given time + /// range. + /// + /// For each and every relevant row that is found, the returned iterator will yield an array + /// that is filled with the cells of each and every component in `components`, or `None` if + /// said component is not available in that row. + /// A row is considered iff it contains data for the `primary` component. + /// + /// This method cannot fail! If there's no data to return, an empty iterator is returned. pub fn range( &self, time_range: TimeRange, components: [ComponentName; N], - ) -> impl Iterator; N])> + '_ { + ) -> impl Iterator; N])> + '_ { self.sort_indices_if_needed(); - let IndexBucketIndices { + let IndexedBucketInner { is_sorted, time_range: bucket_time_range, - times, - indices, - } = &*self.indices.read(); + col_time, + col_insert_id: _, + col_row_id, + col_num_instances: _, + columns, + size_bytes: _, + } = &*self.inner.read(); debug_assert!(is_sorted); let bucket_time_range = *bucket_time_range; @@ -969,7 +795,7 @@ impl IndexBucket { // Early-exit if this bucket is unaware of any of our components of interest. if components .iter() - .all(|component| indices.get(component).is_none()) + .all(|component| columns.get(component).is_none()) { return itertools::Either::Right(std::iter::empty()); } @@ -984,12 +810,10 @@ impl IndexBucket { ?components, timeline = %self.timeline.name(), time_range = self.timeline.typ().format_range(time_range), - "searching for time & component row index numbers..." + "searching for time & component cell numbers..." ); - // find the time index's row number - let time_idx_row_nr: IndexRowNr = - IndexRowNr(times.partition_point(|t| *t < time_range.min.as_i64()) as u64); + let time_row_nr = col_time.partition_point(|t| *t < time_range.min.as_i64()) as u64; trace!( kind = "range", @@ -997,46 +821,48 @@ impl IndexBucket { ?components, timeline = %self.timeline.name(), time_range = self.timeline.typ().format_range(time_range), - %time_idx_row_nr, - "found time index row number", + %time_row_nr, + "found time row number", ); // TODO(cmc): Cloning these is obviously not great and will need to be addressed at // some point. - // But, really, it's not _that_ bad either: these are integers and e.g. with the default - // configuration there are only 1024 of them (times the number of components). - let time_idx = times.clone(); - let comp_indices = indices.clone(); + // But, really, it's not _that_ bad either: these are either integers or erased pointers, + // and e.g. with the default configuration there are only 1024 of them (times the number + // of components). + let col_time = col_time.clone(); + let col_row_id = col_row_id.clone(); + let mut columns = columns.clone(); // shallow // We have found the index of the first row that possibly contains data for any single one // of the components we're interested in. // // Now we need to iterate through every remaining rows in the bucket and yield any that // contains data for these components and is still within the time range. - let row_indices = time_idx + let cells = col_time .into_iter() - .skip(time_idx_row_nr.0 as usize) + .skip(time_row_nr as usize) // don't go beyond the time range we're interested in! .filter(move |time| time_range.contains((*time).into())) .enumerate() - .filter_map(move |(time_idx_offset, time)| { - let comp_idx_row_nr = IndexRowNr(time_idx_row_nr.0 + time_idx_offset as u64); + .filter_map(move |(time_row_offset, time)| { + let row_nr = time_row_nr + time_row_offset as u64; - let mut row_indices = [None; N]; + let mut cells = [(); N].map(|_| None); for (i, component) in components.iter().enumerate() { - if let Some(index) = comp_indices.get(component) { - if let Some(row_idx) = index[comp_idx_row_nr.0 as usize] { - row_indices[i] = Some(row_idx); - } + if let Some(column) = columns.get_mut(component) { + cells[i] = column[row_nr as usize].take(); } } // We only yield rows that contain data for at least one of the components of // interest. - if row_indices.iter().all(Option::is_none) { + if cells.iter().all(Option::is_none) { return None; } + let row_id = col_row_id[row_nr as usize]; + trace!( kind = "range", bucket_time_range = @@ -1044,30 +870,35 @@ impl IndexBucket { ?components, timeline = %self.timeline.name(), time_range = self.timeline.typ().format_range(time_range), - %comp_idx_row_nr, - ?row_indices, - "yielding row indices", + %row_nr, + %row_id, + ?cells, + "yielding cells", ); - Some((time.into(), comp_idx_row_nr, row_indices)) + Some((time.into(), row_id, cells)) }); - itertools::Either::Left(row_indices) + itertools::Either::Left(cells) } - /// Whether the indices in this `IndexBucket` are sorted + /// Whether the indices in this `IndexedBucket` are sorted pub fn is_sorted(&self) -> bool { - self.indices.read().is_sorted + self.inner.read().is_sorted } } -impl IndexBucketIndices { +impl IndexedBucketInner { pub fn sort(&mut self) { let Self { is_sorted, time_range: _, - times, - indices, + col_time, + col_insert_id, + col_row_id, + col_num_instances, + columns, + size_bytes: _, } = self; if *is_sorted { @@ -1077,8 +908,9 @@ impl IndexBucketIndices { crate::profile_function!(); let swaps = { - let mut swaps = (0..times.len()).collect::>(); - swaps.sort_by_key(|&i| ×[i]); + crate::profile_scope!("swaps"); + let mut swaps = (0..col_time.len()).collect::>(); + swaps.sort_by_key(|&i| &col_time[i]); swaps .iter() .copied() @@ -1090,135 +922,197 @@ impl IndexBucketIndices { // Yep, the reshuffle implementation is very dumb and very slow :) // TODO(#442): re_datastore: implement efficient shuffling on the read path. - // shuffle time index back into a sorted state { - let source = times.clone(); - for (from, to) in swaps.iter().copied() { - times[to] = source[from]; + crate::profile_scope!("control"); + + fn reshuffle_control_column( + column: &mut SmallVec<[T; N]>, + swaps: &[(usize, usize)], + ) { + let source = { + crate::profile_scope!("clone"); + column.clone() + }; + { + crate::profile_scope!("rotate"); + for (from, to) in swaps.iter().copied() { + column[to] = source[from]; + } + } } - } - fn reshuffle_index(index: &mut SecondaryIndex, swaps: &[(usize, usize)]) { - // shuffle data - { - let source = index.clone(); - for (from, to) in swaps.iter().copied() { - index[to] = source[from]; - } + reshuffle_control_column(col_time, &swaps); + if !col_insert_id.is_empty() { + reshuffle_control_column(col_insert_id, &swaps); } + reshuffle_control_column(col_row_id, &swaps); + reshuffle_control_column(col_num_instances, &swaps); } - // shuffle component indices back into a sorted state - for index in indices.values_mut() { - reshuffle_index(index, &swaps); + { + crate::profile_scope!("data"); + // shuffle component columns back into a sorted state + for column in columns.values_mut() { + let mut source = { + crate::profile_scope!("clone"); + column.clone() + }; + { + crate::profile_scope!("rotate"); + for (from, to) in swaps.iter().copied() { + column[to] = source[from].take(); + } + } + } } *is_sorted = true; } } -// --- Persistent Components --- +// --- Timeless --- -impl PersistentComponentTable { - /// Returns a shallow clone of the row data present at the given `row_idx`. +impl PersistentIndexedTable { + /// Queries the table for the cells of the specified `components`, as seen from the point + /// of view of the so-called `primary` component. /// - /// Panics if `row_idx` is out of bounds. - pub fn get(&self, row_idx: RowIndex) -> Box { - crate::profile_function!(); + /// Returns an array of [`DataCell`]s on success, or `None` iff no cell could be found for + /// the `primary` component. + fn latest_at( + &self, + primary: ComponentName, + components: &[ComponentName; N], + ) -> Option<(RowId, [Option; N])> { + if self.is_empty() { + return None; + } - self.chunks[row_idx.as_u64() as usize] - .as_any() - .downcast_ref::>() - .unwrap() - .value(0) - } -} + // Early-exit if this bucket is unaware of this component. + let column = self.columns.get(&primary)?; + + crate::profile_function!(); -// --- Components --- + trace!( + kind = "latest_at", + %primary, + ?components, + timeless = true, + "searching for primary & secondary cells..." + ); -impl ComponentTable { - pub fn get(&self, row_idx: RowIndex) -> Option> { - let bucket_nr = self - .buckets - .partition_point(|bucket| row_idx.as_u64() >= bucket.row_offset); + // find the primary row number's row. + let primary_row_nr = self.num_rows() - 1; - // The partition point will give us the index of the first bucket that has a row offset - // strictly greater than the row index we're looking for, therefore we need to take a - // step back to find what we're looking for. - // - // Component tables always spawn with a default bucket at offset 0, so the smallest - // partition point that can ever be returned is one, making this operation always - // overflow-safe... unless the garbage collector has ever run, in which case all bets are - // off! - let Some(bucket_nr) = bucket_nr.checked_sub(1) else { return None }; + trace!( + kind = "latest_at", + %primary, + ?components, + %primary_row_nr, + timeless = true, + "found primary row number", + ); - if let Some(bucket) = self.buckets.get(bucket_nr) { - trace!( - kind = "get", - component = self.name.as_str(), - %row_idx, - bucket_nr, - %bucket.row_offset, - "fetching component data" - ); - bucket.get(row_idx) - } else { - trace!( - kind = "get", - component = self.name.as_str(), - %row_idx, - bucket_nr, - "row index is out of bounds" - ); - None + // find the secondary indices' rows, and the associated cells. + let mut secondary_row_nr = primary_row_nr; + while column[secondary_row_nr as usize].is_none() { + if secondary_row_nr == 0 { + trace!( + kind = "latest_at", + %primary, + ?components, + timeless = true, + %primary_row_nr, + "no secondary row number found", + ); + return None; + } + secondary_row_nr -= 1; } - } - /// Returns an iterator over the `ComponentBucket` in this table - #[allow(dead_code)] - pub fn iter_buckets(&self) -> impl ExactSizeIterator { - self.buckets.iter() - } -} + trace!( + kind = "latest_at", + %primary, + ?components, + timeless = true, + %primary_row_nr, %secondary_row_nr, + "found secondary row number", + ); + debug_assert!(column[secondary_row_nr as usize].is_some()); + + let mut cells = [(); N].map(|_| None); + for (i, component) in components.iter().enumerate() { + if let Some(column) = self.columns.get(component) { + if let Some(cell) = &column[secondary_row_nr as usize] { + trace!( + kind = "latest_at", + %primary, + %component, + timeless = true, + %primary_row_nr, %secondary_row_nr, + "found cell", + ); + cells[i] = Some(cell.clone() /* shallow */); + } + } + } -impl ComponentBucket { - /// Returns the name of the component stored in this bucket. - #[allow(dead_code)] - pub fn name(&self) -> &str { - &self.name + Some((self.col_row_id[secondary_row_nr as usize], cells)) } - /// Returns a shallow clone of the row data present at the given `row_idx`. - pub fn get(&self, row_idx: RowIndex) -> Option> { - let row_idx = row_idx.as_u64() - self.row_offset; - // This has to be safe to unwrap, otherwise it would never have made it past insertion. - if self.archived { - debug_assert_eq!(self.chunks.len(), 1); - let list = self.chunks[0] - .as_any() - .downcast_ref::>() - .unwrap(); - (row_idx < list.len() as u64).then(|| list.value(row_idx as _)) - } else { - self.chunks.get(row_idx as usize).map(|chunk| { - chunk - .as_any() - .downcast_ref::>() - .unwrap() - .value(0) - }) + /// Iterates the table in order to return the cells of the the specified `components`, + /// as seen from the point of view of the so-called `primary` component, for the given time + /// range. + /// + /// For each and every relevant row that is found, the returned iterator will yield an array + /// that is filled with the cells of each and every component in `components`, or `None` if + /// said component is not available in that row. + /// A row is considered iff it contains data for the `primary` component. + /// + /// This method cannot fail! If there's no data to return, an empty iterator is returned. + pub fn range( + &self, + components: [ComponentName; N], + ) -> impl Iterator; N])> + '_ { + // Early-exit if the table is unaware of any of our components of interest. + if components + .iter() + .all(|component| self.columns.get(component).is_none()) + { + return itertools::Either::Right(std::iter::empty()); } - } - /// Returns a shallow clone of all the chunks in this bucket. - #[allow(dead_code)] - pub fn data(&self) -> Vec> { - self.chunks.clone() // shallow - } + // Beware! This merely measures the time it takes to gather all the necessary metadata + // for building the returned iterator. + crate::profile_function!(); + + let cells = (0..self.num_rows()).filter_map(move |row_nr| { + let mut cells = [(); N].map(|_| None); + for (i, component) in components.iter().enumerate() { + if let Some(column) = self.columns.get(component) { + cells[i] = column[row_nr as usize].clone(); + } + } + + // We only yield rows that contain data for at least one of the components of + // interest. + if cells.iter().all(Option::is_none) { + return None; + } + + let row_id = self.col_row_id[row_nr as usize]; + + trace!( + kind = "range", + ?components, + timeless = true, + %row_nr, + ?cells, + "yielding cells", + ); + + Some((row_id, cells)) + }); - /// Return an iterator over the time ranges in this bucket. - #[allow(dead_code)] - pub fn iter_time_ranges(&self) -> impl Iterator { - self.time_ranges.iter() + itertools::Either::Left(cells) } } diff --git a/crates/re_arrow_store/src/store_sanity.rs b/crates/re_arrow_store/src/store_sanity.rs index f002f1c13d8c..eba65eb70011 100644 --- a/crates/re_arrow_store/src/store_sanity.rs +++ b/crates/re_arrow_store/src/store_sanity.rs @@ -1,15 +1,59 @@ -use std::collections::BTreeMap; - -use anyhow::{anyhow, ensure}; -use nohash_hasher::IntMap; -use re_log_types::{TimeInt, Timeline}; - -use crate::{ - ComponentBucket, ComponentTable, DataStore, IndexBucket, IndexBucketIndices, IndexTable, - PersistentComponentTable, PersistentIndexTable, +use re_log_types::{ + ComponentName, DataCellColumn, SizeBytes as _, TimeRange, COLUMN_NUM_INSTANCES, COLUMN_ROW_ID, + COLUMN_TIMEPOINT, }; -// TODO(#527): Typed errors. +use crate::{DataStore, IndexedBucket, IndexedBucketInner, IndexedTable, PersistentIndexedTable}; + +// --- + +/// Returned by the `sanity_check` family of function when an invariant violation has been detected +/// in the `DataStore`'s internal datastructures. +/// These violations can only stem from a bug in the store's implementation itself. +#[derive(thiserror::Error, Debug)] +pub enum SanityError { + #[error( + "Reported time range for indexed bucket is out of sync: got {got:?}, expected {expected:?}" + )] + TimeRangeOutOfSync { expected: TimeRange, got: TimeRange }, + + #[error("Reported size for {origin} is out of sync: got {got}, expected {expected}")] + SizeOutOfSync { + origin: &'static str, + expected: String, + got: String, + }, + + #[error("Reported number of rows for {origin} is out of sync: got {got}, expected {expected}")] + RowsOutOfSync { + origin: &'static str, + expected: String, + got: String, + }, + + #[error("Column '{component}' has too few/many rows: got {got} instead of {expected}")] + ColumnLengthMismatch { + component: ComponentName, + expected: u64, + got: u64, + }, + + #[error("Couldn't find any column for the configured cluster key ('{cluster_key}')")] + ClusterColumnMissing { cluster_key: ComponentName }, + + #[error("The cluster column must be dense, found holes: {cluster_column:?}")] + ClusterColumnSparse { cluster_column: Box }, + + #[error("Found overlapping indexed buckets: {t1_max_formatted} ({t1_max}) <-> {t2_max_formatted} ({t2_max})")] + OverlappingBuckets { + t1_max: i64, + t1_max_formatted: String, + t2_max: i64, + t2_max_formatted: String, + }, +} + +pub type SanityResult = ::std::result::Result; // --- Data store --- @@ -17,163 +61,28 @@ impl DataStore { /// Runs the sanity check suite for the entire datastore. /// /// Returns an error if anything looks wrong. - pub fn sanity_check(&self) -> anyhow::Result<()> { + pub fn sanity_check(&self) -> SanityResult<()> { crate::profile_function!(); - // Row indices should be continuous across all index tables. - if self.gc_id == 0 { - let mut row_indices: IntMap<_, Vec> = IntMap::default(); - for table in self.indices.values() { - for bucket in table.buckets.values() { - for (comp, index) in &bucket.indices.read().indices { - let row_indices = row_indices.entry(*comp).or_default(); - row_indices.extend(index.iter().flatten().map(|row_idx| row_idx.as_u64())); - } - } - } - - for (comp, mut row_indices) in row_indices { - // Not an actual row index! - if comp == DataStore::insert_id_key() { - continue; - } - - row_indices.sort(); - row_indices.dedup(); - for pair in row_indices.windows(2) { - let &[i1, i2] = pair else { unreachable!() }; - ensure!( - i1 + 1 == i2, - "found hole in index coverage for {comp:?}: \ - in {row_indices:?}, {i1} -> {i2}" - ); - } - } - } - - // Row indices should be continuous across all timeless index tables. - { - let mut row_indices: IntMap<_, Vec> = IntMap::default(); - for table in self.timeless_indices.values() { - for (comp, index) in &table.indices { - let row_indices = row_indices.entry(*comp).or_default(); - row_indices.extend(index.iter().flatten().map(|row_idx| row_idx.as_u64())); - } - } - - for (comp, mut row_indices) in row_indices { - // Not an actual row index! - if comp == DataStore::insert_id_key() { - continue; - } - - row_indices.sort(); - row_indices.dedup(); - for pair in row_indices.windows(2) { - let &[i1, i2] = pair else { unreachable!() }; - ensure!( - i1 + 1 == i2, - "found hole in timeless index coverage for {comp:?}: \ - in {row_indices:?}, {i1} -> {i2}" - ); - } - } - } - - for table in self.timeless_indices.values() { - table.sanity_check()?; - } - for table in self.timeless_components.values() { + for table in self.timeless_tables.values() { table.sanity_check()?; } - for table in self.indices.values() { - table.sanity_check()?; - } - for table in self.components.values() { + for table in self.tables.values() { table.sanity_check()?; } Ok(()) } - - /// The oldest time for which we have any data. - /// - /// Ignores timeless data. - /// - /// Useful to call after a gc. - pub fn oldest_time_per_timeline(&self) -> BTreeMap { - crate::profile_function!(); - - let mut oldest_time_per_timeline = BTreeMap::default(); - - for component_table in self.components.values() { - for bucket in &component_table.buckets { - for (timeline, time_range) in &bucket.time_ranges { - let entry = oldest_time_per_timeline - .entry(*timeline) - .or_insert(TimeInt::MAX); - *entry = time_range.min.min(*entry); - } - } - } - - oldest_time_per_timeline - } } -// --- Persistent Indices --- +// --- Temporal --- -impl PersistentIndexTable { +impl IndexedTable { /// Runs the sanity check suite for the entire table. /// /// Returns an error if anything looks wrong. - pub fn sanity_check(&self) -> anyhow::Result<()> { - crate::profile_function!(); - - let Self { - ent_path: _, - cluster_key, - num_rows, - indices, - all_components: _, - } = self; - - // All indices should be `Self::num_rows` long. - { - for (comp, index) in indices { - let secondary_len = index.len() as u64; - ensure!( - *num_rows == secondary_len, - "found rogue secondary index for {comp:?}: \ - expected {num_rows} rows, got {secondary_len} instead", - ); - } - } - - // The cluster index must be fully dense. - { - let cluster_idx = indices - .get(cluster_key) - .ok_or_else(|| anyhow!("no index found for cluster key: {cluster_key:?}"))?; - ensure!( - cluster_idx.iter().all(|row| row.is_some()), - "the cluster index ({cluster_key:?}) must be fully dense: \ - got {cluster_idx:?}", - ); - } - - Ok(()) - } -} - -// --- Indices --- - -impl IndexTable { - /// Runs the sanity check suite for the entire table. - /// - /// Returns an error if anything looks wrong. - pub fn sanity_check(&self) -> anyhow::Result<()> { + pub fn sanity_check(&self) -> SanityResult<()> { crate::profile_function!(); // No two buckets should ever overlap time-range-wise. @@ -181,18 +90,31 @@ impl IndexTable { let time_ranges = self .buckets .values() - .map(|bucket| bucket.indices.read().time_range) + .map(|bucket| bucket.inner.read().time_range) .collect::>(); for time_ranges in time_ranges.windows(2) { let &[t1, t2] = time_ranges else { unreachable!() }; - ensure!( - t1.max.as_i64() < t2.min.as_i64(), - "found overlapping index buckets: {} ({}) <-> {} ({})", - self.timeline.typ().format(t1.max), - t1.max.as_i64(), - self.timeline.typ().format(t2.min), - t2.min.as_i64(), - ); + if t1.max.as_i64() >= t2.min.as_i64() { + return Err(SanityError::OverlappingBuckets { + t1_max: t1.max.as_i64(), + t1_max_formatted: self.timeline.typ().format(t1.max), + t2_max: t2.max.as_i64(), + t2_max_formatted: self.timeline.typ().format(t2.max), + }); + } + } + } + + // Make sure row numbers aren't out of sync + { + let num_rows = self.num_rows(); + let num_rows_uncached = self.num_rows_uncached(); + if num_rows != num_rows_uncached { + return Err(SanityError::RowsOutOfSync { + origin: std::any::type_name::(), + expected: re_format::format_number(num_rows_uncached as _), + got: re_format::format_number(num_rows as _), + }); } } @@ -201,70 +123,122 @@ impl IndexTable { bucket.sanity_check()?; } + // Make sure size values aren't out of sync + { + let total_size_bytes = self.total_size_bytes(); + let total_size_bytes_uncached = self.size_bytes_uncached(); + if total_size_bytes != total_size_bytes_uncached { + return Err(SanityError::SizeOutOfSync { + origin: std::any::type_name::(), + expected: re_format::format_bytes(total_size_bytes_uncached as _), + got: re_format::format_bytes(total_size_bytes as _), + }); + } + } + Ok(()) } } -impl IndexBucket { +impl IndexedBucket { /// Runs the sanity check suite for the entire bucket. /// /// Returns an error if anything looks wrong. - pub fn sanity_check(&self) -> anyhow::Result<()> { + pub fn sanity_check(&self) -> SanityResult<()> { crate::profile_function!(); - let IndexBucketIndices { - is_sorted: _, - time_range: _, - times, - indices, - } = &*self.indices.read(); + let Self { + timeline: _, + cluster_key, + inner, + } = self; - // All indices should contain the exact same number of rows as the time index. { - let primary_len = times.len(); - for (comp, index) in indices { - let secondary_len = index.len(); - ensure!( - primary_len == secondary_len, - "found rogue secondary index for {comp:?}: \ - expected {primary_len} rows, got {secondary_len} instead", - ); + let IndexedBucketInner { + is_sorted: _, + time_range, + col_time, + col_insert_id, + col_row_id, + col_num_instances, + columns, + size_bytes: _, + } = &*inner.read(); + + // Time ranges are eagerly maintained. + { + let mut times = col_time.clone(); + times.sort(); + + let expected_min = times.first().copied().unwrap_or(i64::MAX).into(); + let expected_max = times.last().copied().unwrap_or(i64::MIN).into(); + let expected_time_range = TimeRange::new(expected_min, expected_max); + + if expected_time_range != *time_range { + return Err(SanityError::TimeRangeOutOfSync { + expected: expected_time_range, + got: *time_range, + }); + } } - } - - // The cluster index must be fully dense. - { - let cluster_key = self.cluster_key; - let cluster_idx = indices - .get(&cluster_key) - .ok_or_else(|| anyhow!("no index found for cluster key: {cluster_key:?}"))?; - ensure!( - cluster_idx.iter().all(|row| row.is_some()), - "the cluster index ({cluster_key:?}) must be fully dense: \ - got {cluster_idx:?}", - ); - } - Ok(()) - } -} - -// --- Persistent Components --- + // All columns should be `Self::num_rows` long. + { + let num_rows = self.num_rows(); + + let column_lengths = [ + (!col_insert_id.is_empty()) + .then(|| (DataStore::insert_id_key(), col_insert_id.len())), // + Some((COLUMN_TIMEPOINT.into(), col_time.len())), + Some((COLUMN_ROW_ID.into(), col_row_id.len())), + Some((COLUMN_NUM_INSTANCES.into(), col_num_instances.len())), + ] + .into_iter() + .flatten() + .chain( + columns + .iter() + .map(|(component, column)| (*component, column.len())), + ) + .map(|(component, len)| (component, len as u64)); + + for (component, len) in column_lengths { + if len != num_rows { + return Err(SanityError::ColumnLengthMismatch { + component, + expected: num_rows, + got: len, + }); + } + } + } -impl PersistentComponentTable { - /// Runs the sanity check suite for the entire table. - /// - /// Returns an error if anything looks wrong. - pub fn sanity_check(&self) -> anyhow::Result<()> { - crate::profile_function!(); + // The cluster column must be fully dense. + if self.num_rows() > 0 { + let cluster_column = + columns + .get(cluster_key) + .ok_or(SanityError::ClusterColumnMissing { + cluster_key: *cluster_key, + })?; + if !cluster_column.iter().all(|cell| cell.is_some()) { + return Err(SanityError::ClusterColumnSparse { + cluster_column: cluster_column.clone().into(), + }); + } + } + } - // All chunks should always be dense + // Make sure size values aren't out of sync { - for chunk in &self.chunks { - ensure!( - chunk.validity().is_none(), - "persistent component chunks should always be dense", - ); + let size_bytes = inner.read().size_bytes; + let size_bytes_uncached = inner.write().compute_size_bytes(); + if size_bytes != size_bytes_uncached { + return Err(SanityError::SizeOutOfSync { + origin: std::any::type_name::(), + expected: re_format::format_bytes(size_bytes_uncached as _), + got: re_format::format_bytes(size_bytes as _), + }); } } @@ -272,53 +246,66 @@ impl PersistentComponentTable { } } -// --- Components --- +// --- Timeless --- -impl ComponentTable { +impl PersistentIndexedTable { /// Runs the sanity check suite for the entire table. /// /// Returns an error if anything looks wrong. - pub fn sanity_check(&self) -> anyhow::Result<()> { + pub fn sanity_check(&self) -> SanityResult<()> { crate::profile_function!(); - // No two buckets should ever overlap row-range-wise. + let Self { + ent_path: _, + cluster_key, + col_insert_id, + col_row_id, + col_num_instances, + columns, + } = self; + + // All columns should be `Self::num_rows` long. { - let row_ranges = self - .buckets - .iter() - .map(|bucket| bucket.row_offset..bucket.row_offset + bucket.total_rows()) - .collect::>(); - for row_ranges in row_ranges.windows(2) { - let &[r1, r2] = &row_ranges else { unreachable!() }; - ensure!( - !r1.contains(&r2.start), - "found overlapping component buckets: {r1:?} <-> {r2:?}" - ); + let num_rows = self.num_rows(); + + let column_lengths = [ + (!col_insert_id.is_empty()) + .then(|| (DataStore::insert_id_key(), col_insert_id.len())), // + Some((COLUMN_ROW_ID.into(), col_row_id.len())), + Some((COLUMN_NUM_INSTANCES.into(), col_num_instances.len())), + ] + .into_iter() + .flatten() + .chain( + columns + .iter() + .map(|(component, column)| (*component, column.len())), + ) + .map(|(component, len)| (component, len as u64)); + + for (component, len) in column_lengths { + if len != num_rows { + return Err(SanityError::ColumnLengthMismatch { + component, + expected: num_rows, + got: len, + }); + } } } - for bucket in &self.buckets { - bucket.sanity_check()?; - } - - Ok(()) - } -} - -impl ComponentBucket { - /// Runs the sanity check suite for the entire table. - /// - /// Returns an error if anything looks wrong. - pub fn sanity_check(&self) -> anyhow::Result<()> { - crate::profile_function!(); - - // All chunks should always be dense - { - for chunk in &self.chunks { - ensure!( - chunk.validity().is_none(), - "component bucket chunks should always be dense", - ); + // The cluster column must be fully dense. + if self.num_rows() > 0 { + let cluster_column = + columns + .get(cluster_key) + .ok_or(SanityError::ClusterColumnMissing { + cluster_key: *cluster_key, + })?; + if !cluster_column.iter().all(|cell| cell.is_some()) { + return Err(SanityError::ClusterColumnSparse { + cluster_column: cluster_column.clone().into(), + }); } } diff --git a/crates/re_arrow_store/src/store_stats.rs b/crates/re_arrow_store/src/store_stats.rs index 10111073064a..a18f6c7b3485 100644 --- a/crates/re_arrow_store/src/store_stats.rs +++ b/crates/re_arrow_store/src/store_stats.rs @@ -1,452 +1,359 @@ +use nohash_hasher::IntMap; +use re_log_types::{ComponentName, SizeBytes, TimePoint}; + use crate::{ - ComponentBucket, ComponentTable, DataStore, DataStoreConfig, IndexBucket, IndexBucketIndices, - IndexTable, PersistentComponentTable, PersistentIndexTable, + store::IndexedBucketInner, ClusterCellCache, DataStore, DataTypeRegistry, IndexedBucket, + IndexedTable, MetadataRegistry, PersistentIndexedTable, }; // --- -// TODO(cmc): compute incrementally once/if this becomes too expensive. -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd)] +pub struct DataStoreRowStats { + pub num_rows: u64, + pub num_bytes: u64, +} + +impl std::ops::Sub for DataStoreRowStats { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + Self { + num_rows: self.num_rows - rhs.num_rows, + num_bytes: self.num_bytes - rhs.num_bytes, + } + } +} + +impl std::ops::Add for DataStoreRowStats { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Self { + num_rows: self.num_rows + rhs.num_rows, + num_bytes: self.num_bytes + rhs.num_bytes, + } + } +} + +#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd)] pub struct DataStoreStats { - pub total_timeless_index_rows: u64, - pub total_timeless_index_size_bytes: u64, - pub total_timeless_component_rows: u64, - pub total_timeless_component_size_bytes: u64, - - pub total_temporal_index_rows: u64, - pub total_temporal_index_size_bytes: u64, - pub total_temporal_index_buckets: u64, - pub total_temporal_component_rows: u64, - pub total_temporal_component_size_bytes: u64, - pub total_temporal_component_buckets: u64, - - pub total_index_rows: u64, - pub total_index_size_bytes: u64, - pub total_component_rows: u64, - pub total_component_size_bytes: u64, - - pub config: DataStoreConfig, + pub type_registry: DataStoreRowStats, + pub metadata_registry: DataStoreRowStats, + pub autogenerated: DataStoreRowStats, + pub timeless: DataStoreRowStats, + pub temporal: DataStoreRowStats, + pub temporal_buckets: u64, + pub total: DataStoreRowStats, +} + +impl std::ops::Sub for DataStoreStats { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + Self { + type_registry: self.type_registry - rhs.type_registry, + metadata_registry: self.metadata_registry - rhs.metadata_registry, + autogenerated: self.autogenerated - rhs.autogenerated, + timeless: self.timeless - rhs.timeless, + temporal: self.temporal - rhs.temporal, + temporal_buckets: self.temporal_buckets - rhs.temporal_buckets, + total: self.total - rhs.total, + } + } +} + +impl std::ops::Add for DataStoreStats { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Self { + type_registry: self.type_registry + rhs.type_registry, + metadata_registry: self.metadata_registry + rhs.metadata_registry, + autogenerated: self.autogenerated + rhs.autogenerated, + timeless: self.timeless + rhs.timeless, + temporal: self.temporal + rhs.temporal, + temporal_buckets: self.temporal_buckets + rhs.temporal_buckets, + total: self.total + rhs.total, + } + } } impl DataStoreStats { pub fn from_store(store: &DataStore) -> Self { crate::profile_function!(); - let total_timeless_index_rows = store.total_timeless_index_rows(); - let total_timeless_index_size_bytes = store.total_timeless_index_size_bytes(); - let total_timeless_component_rows = store.total_timeless_component_rows(); - let total_timeless_component_size_bytes = store.total_timeless_component_size_bytes(); - - let total_temporal_index_rows = store.total_temporal_index_rows(); - let total_temporal_index_size_bytes = store.total_temporal_index_size_bytes(); - let total_temporal_index_buckets = store.total_temporal_index_buckets(); - let total_temporal_component_rows = store.total_temporal_component_rows(); - let total_temporal_component_size_bytes = store.total_temporal_component_size_bytes(); - let total_temporal_component_buckets = store.total_temporal_component_buckets(); - - let total_index_rows = total_timeless_index_rows + total_temporal_index_rows; - let total_index_size_bytes = - total_timeless_index_size_bytes + total_temporal_index_size_bytes; - let total_component_rows = total_timeless_component_rows + total_temporal_component_rows; - let total_component_size_bytes = - total_timeless_component_size_bytes + total_temporal_component_size_bytes; + let type_registry = { + crate::profile_scope!("type_registry"); + DataStoreRowStats { + num_rows: store.type_registry.len() as _, + num_bytes: store.type_registry.total_size_bytes(), + } + }; + + let metadata_registry = { + crate::profile_scope!("metadata_registry"); + DataStoreRowStats { + num_rows: store.metadata_registry.len() as _, + num_bytes: store.metadata_registry.total_size_bytes(), + } + }; + + let autogenerated = { + crate::profile_scope!("autogenerated"); + DataStoreRowStats { + num_rows: store.cluster_cell_cache.len() as _, + num_bytes: store.cluster_cell_cache.total_size_bytes(), + } + }; + + let timeless = { + crate::profile_scope!("timeless"); + DataStoreRowStats { + num_rows: store.num_timeless_rows(), + num_bytes: store.timeless_size_bytes(), + } + }; + + let (temporal, temporal_buckets) = { + crate::profile_scope!("temporal"); + ( + DataStoreRowStats { + num_rows: store.num_temporal_rows(), + num_bytes: store.temporal_size_bytes(), + }, + store.num_temporal_buckets(), + ) + }; + + let total = DataStoreRowStats { + num_rows: timeless.num_rows + temporal.num_rows, + num_bytes: type_registry.num_bytes + + metadata_registry.num_bytes + + autogenerated.num_bytes + + timeless.num_bytes + + temporal.num_bytes, + }; Self { - total_timeless_index_rows, - total_timeless_index_size_bytes, - total_timeless_component_rows, - total_timeless_component_size_bytes, - - total_temporal_index_rows, - total_temporal_index_size_bytes, - total_temporal_index_buckets, - total_temporal_component_rows, - total_temporal_component_size_bytes, - total_temporal_component_buckets, - - total_index_rows, - total_index_size_bytes, - total_component_rows, - total_component_size_bytes, - - config: store.config.clone(), + type_registry, + metadata_registry, + autogenerated, + timeless, + temporal, + temporal_buckets, + total, } } } // --- Data store --- -impl DataStore { - /// Returns the number of timeless index rows stored across this entire store, i.e. the sum of - /// the number of rows across all of its timeless index tables. - pub fn total_timeless_index_rows(&self) -> u64 { - crate::profile_function!(); - self.timeless_indices - .values() - .map(|table| table.total_rows()) - .sum() - } +impl SizeBytes for DataTypeRegistry { + #[inline] + fn heap_size_bytes(&self) -> u64 { + type K = ComponentName; - /// Returns the size of the timeless index data stored across this entire store, i.e. the sum - /// of the size of the data stored across all of its timeless index tables, in bytes. - pub fn total_timeless_index_size_bytes(&self) -> u64 { - crate::profile_function!(); - self.timeless_indices - .values() - .map(|table| table.total_size_bytes()) - .sum() - } + // NOTE: This is only here to make sure this method fails to compile if the inner type + // changes, as the following size computation assumes POD types. + let inner: &IntMap = &self.0; - /// Returns the number of timeless component rows stored across this entire store, i.e. the - /// sum of the number of rows across all of its timeless component tables. - pub fn total_timeless_component_rows(&self) -> u64 { - crate::profile_function!(); - self.timeless_components - .values() - .map(|table| table.total_rows()) - .sum() + let keys_size_bytes = std::mem::size_of::() * inner.len(); + // NOTE: It's all on the heap at this point. + let values_size_bytes = self.values().map(SizeBytes::total_size_bytes).sum::(); + + keys_size_bytes as u64 + values_size_bytes } +} - /// Returns the size of the timeless component data stored across this entire store, i.e. the - /// sum of the size of the data stored across all of its timeless component tables, in bytes. - pub fn total_timeless_component_size_bytes(&self) -> u64 { - crate::profile_function!(); - self.timeless_components - .values() - .map(|table| table.total_size_bytes()) - .sum() +impl SizeBytes for MetadataRegistry { + #[inline] + fn heap_size_bytes(&self) -> u64 { + self.heap_size_bytes } +} - /// Returns the number of temporal index rows stored across this entire store, i.e. the sum of - /// the number of rows across all of its temporal index tables. - pub fn total_temporal_index_rows(&self) -> u64 { - crate::profile_function!(); - self.indices.values().map(|table| table.total_rows()).sum() +impl SizeBytes for ClusterCellCache { + #[inline] + fn heap_size_bytes(&self) -> u64 { + self.0.heap_size_bytes() } +} - /// Returns the size of the temporal index data stored across this entire store, i.e. the sum - /// of the size of the data stored across all of its temporal index tables, in bytes. - pub fn total_temporal_index_size_bytes(&self) -> u64 { +impl DataStore { + /// Returns the number of timeless index rows stored across this entire store, i.e. the sum of + /// the number of rows across all of its timeless indexed tables. + #[inline] + pub fn num_timeless_rows(&self) -> u64 { crate::profile_function!(); - self.indices + self.timeless_tables .values() - .map(|table| table.total_size_bytes()) + .map(|table| table.num_rows()) .sum() } - /// Returns the number of temporal index buckets stored across this entire store. - pub fn total_temporal_index_buckets(&self) -> u64 { + /// Returns the size of the timeless index data stored across this entire store, i.e. the sum + /// of the size of the data stored across all of its timeless indexed tables, in bytes. + #[inline] + pub fn timeless_size_bytes(&self) -> u64 { crate::profile_function!(); - self.indices + self.timeless_tables .values() - .map(|table| table.total_buckets()) + .map(|table| table.total_size_bytes()) .sum() } - /// Returns the number of temporal component rows stored across this entire store, i.e. the - /// sum of the number of rows across all of its temporal component tables. - pub fn total_temporal_component_rows(&self) -> u64 { + /// Returns the number of temporal index rows stored across this entire store, i.e. the sum of + /// the number of rows across all of its temporal indexed tables. + #[inline] + pub fn num_temporal_rows(&self) -> u64 { crate::profile_function!(); - self.components - .values() - .map(|table| table.total_rows()) - .sum() + self.tables.values().map(|table| table.num_rows()).sum() } - /// Returns the size of the temporal component data stored across this entire store, i.e. the - /// sum of the size of the data stored across all of its temporal component tables, in bytes. - pub fn total_temporal_component_size_bytes(&self) -> u64 { + /// Returns the size of the temporal index data stored across this entire store, i.e. the sum + /// of the size of the data stored across all of its temporal indexed tables, in bytes. + #[inline] + pub fn temporal_size_bytes(&self) -> u64 { crate::profile_function!(); - self.components + self.tables .values() .map(|table| table.total_size_bytes()) .sum() } - /// Returns the number of temporal component buckets stored across this entire store. - pub fn total_temporal_component_buckets(&self) -> u64 { + /// Returns the number of temporal indexed buckets stored across this entire store. + #[inline] + pub fn num_temporal_buckets(&self) -> u64 { crate::profile_function!(); - self.components - .values() - .map(|table| table.total_buckets()) - .sum() + self.tables.values().map(|table| table.num_buckets()).sum() } } -// --- Persistent Indices --- +// --- Temporal --- -impl PersistentIndexTable { - /// Returns the number of rows stored across this table. - pub fn total_rows(&self) -> u64 { - self.num_rows - } - - /// Returns the size of the data stored across this table, in bytes. - pub fn total_size_bytes(&self) -> u64 { - self.indices - .values() - .map(|index| std::mem::size_of_val(index.as_slice()) as u64) - .sum::() +impl IndexedTable { + /// Returns the number of rows stored across this entire table, i.e. the sum of the number + /// of rows stored across all of its buckets. + #[inline] + pub fn num_rows(&self) -> u64 { + self.buckets_num_rows } -} - -// --- Indices --- -impl IndexTable { /// Returns the number of rows stored across this entire table, i.e. the sum of the number /// of rows stored across all of its buckets. - pub fn total_rows(&self) -> u64 { - self.buckets - .values() - .map(|bucket| bucket.total_rows()) - .sum() + /// + /// Recomputed from scratch, for sanity checking. + #[inline] + pub(crate) fn num_rows_uncached(&self) -> u64 { + crate::profile_function!(); + self.buckets.values().map(|bucket| bucket.num_rows()).sum() } - /// Returns the size of data stored across this entire table, i.e. the sum of the size of - /// the data stored across all of its buckets, in bytes. - pub fn total_size_bytes(&self) -> u64 { - self.buckets - .values() - .map(|bucket| bucket.total_size_bytes()) - .sum() + #[inline] + pub(crate) fn size_bytes_uncached(&self) -> u64 { + crate::profile_function!(); + self.stack_size_bytes() + + self + .buckets + .values() + .map(|bucket| bucket.total_size_bytes()) + .sum::() } /// Returns the number of buckets stored across this entire table. - pub fn total_buckets(&self) -> u64 { + #[inline] + pub fn num_buckets(&self) -> u64 { self.buckets.len() as _ } } -impl IndexBucket { - /// Returns the number of rows stored across this bucket. - pub fn total_rows(&self) -> u64 { - self.indices.read().times.len() as u64 - } - - /// Returns the size of the data stored across this bucket, in bytes. - pub fn total_size_bytes(&self) -> u64 { - let IndexBucketIndices { - is_sorted: _, - time_range: _, - times, - indices, - } = &*self.indices.read(); - - std::mem::size_of_val(times.as_slice()) as u64 - + indices - .values() - .map(|index| std::mem::size_of_val(index.as_slice()) as u64) - .sum::() +impl SizeBytes for IndexedTable { + #[inline] + fn heap_size_bytes(&self) -> u64 { + self.buckets_size_bytes } } -// --- Persistent Components --- - -impl PersistentComponentTable { - /// Returns the number of rows stored across this table. - pub fn total_rows(&self) -> u64 { - self.total_rows - } - - /// Returns the size of the data stored across this table, in bytes. - pub fn total_size_bytes(&self) -> u64 { - self.total_size_bytes +impl IndexedBucket { + /// Returns the number of rows stored across this bucket. + #[inline] + pub fn num_rows(&self) -> u64 { + crate::profile_function!(); + self.inner.read().col_time.len() as u64 } } -// --- Components --- - -impl ComponentTable { - /// Returns the number of rows stored across this entire table, i.e. the sum of the number - /// of rows stored across all of its buckets. - pub fn total_rows(&self) -> u64 { - self.buckets.iter().map(|bucket| bucket.total_rows()).sum() - } - - /// Returns the size of data stored across this entire table, i.e. the sum of the size of - /// the data stored across all of its buckets, in bytes. - pub fn total_size_bytes(&self) -> u64 { - self.buckets - .iter() - .map(|bucket| bucket.total_size_bytes()) - .sum() - } - - /// Returns the number of buckets stored across this entire table. - pub fn total_buckets(&self) -> u64 { - self.buckets.len() as _ +impl SizeBytes for IndexedBucket { + #[inline] + fn heap_size_bytes(&self) -> u64 { + self.inner.read().size_bytes } } -impl ComponentBucket { - /// Returns the number of rows stored across this bucket. - pub fn total_rows(&self) -> u64 { - self.total_rows - } +impl IndexedBucketInner { + /// Computes and caches the size of both the control & component data stored in this bucket, + /// stack and heap included, in bytes. + /// + /// This is a best-effort approximation, adequate for most purposes (stats, + /// triggering GCs, ...). + #[inline] + pub fn compute_size_bytes(&mut self) -> u64 { + crate::profile_function!(); - /// Returns the size of the data stored across this bucket, in bytes. - pub fn total_size_bytes(&self) -> u64 { - self.total_size_bytes + let Self { + is_sorted, + time_range, + col_time, + col_insert_id, + col_row_id, + col_num_instances, + columns, + size_bytes, + } = self; + + *size_bytes = is_sorted.total_size_bytes() + + time_range.total_size_bytes() + + col_time.total_size_bytes() + + col_insert_id.total_size_bytes() + + col_row_id.total_size_bytes() + + col_num_instances.total_size_bytes() + + columns.total_size_bytes() + + size_bytes.total_size_bytes(); + + *size_bytes } } -// This test exists because the documentation and online discussions revolving around -// arrow2's `estimated_bytes_size()` function indicate that there's a lot of limitations and -// edge cases to be aware of. -// -// Also, it's just plain hard to be sure that the answer you get is the answer you're looking -// for with these kinds of tools. When in doubt.. test everything we're going to need from it. -// -// In many ways, this is a specification of what we mean when we ask "what's the size of this -// Arrow array?". -#[test] -#[allow(clippy::from_iter_instead_of_collect)] -fn test_arrow_estimated_size_bytes() { - use arrow2::{ - array::{Array, Float64Array, ListArray, StructArray, UInt64Array, Utf8Array}, - compute::aggregate::estimated_bytes_size, - datatypes::{DataType, Field}, - offset::Offsets, - }; - - // simple primitive array - { - let data = vec![42u64; 100]; - let array = UInt64Array::from_vec(data.clone()).boxed(); - assert_eq!( - std::mem::size_of_val(data.as_slice()), - estimated_bytes_size(&*array) - ); - } - - // utf8 strings array - { - let data = vec![Some("some very, very, very long string indeed"); 100]; - let array = Utf8Array::::from(data.clone()).to_boxed(); - - let raw_size_bytes = data - .iter() - // headers + bodies! - .map(|s| std::mem::size_of_val(s) + std::mem::size_of_val(s.unwrap().as_bytes())) - .sum::(); - let arrow_size_bytes = estimated_bytes_size(&*array); +// --- Timeless --- - assert_eq!(5600, raw_size_bytes); - assert_eq!(4404, arrow_size_bytes); // smaller because validity bitmaps instead of opts - } - - // simple primitive list array - { - let data = std::iter::repeat(vec![42u64; 100]) - .take(50) - .collect::>(); - let array = { - let array_flattened = - UInt64Array::from_vec(data.clone().into_iter().flatten().collect()).boxed(); - - ListArray::::new( - ListArray::::default_datatype(DataType::UInt64), - Offsets::try_from_lengths(std::iter::repeat(50).take(50)) - .unwrap() - .into(), - array_flattened, - None, - ) - .boxed() - }; - - let raw_size_bytes = data - .iter() - // headers + bodies! - .map(|s| std::mem::size_of_val(s) + std::mem::size_of_val(s.as_slice())) - .sum::(); - let arrow_size_bytes = estimated_bytes_size(&*array); - - assert_eq!(41200, raw_size_bytes); - assert_eq!(40200, arrow_size_bytes); // smaller because smaller inner headers - } - - // compound type array - { - #[derive(Clone, Copy)] - struct Point { - x: f64, - y: f64, - } - - impl Default for Point { - fn default() -> Self { - Self { x: 42.0, y: 666.0 } - } - } - - let data = vec![Point::default(); 100]; - let array = { - let x = Float64Array::from_vec(data.iter().map(|p| p.x).collect()).boxed(); - let y = Float64Array::from_vec(data.iter().map(|p| p.y).collect()).boxed(); - let fields = vec![ - Field::new("x", DataType::Float64, false), - Field::new("y", DataType::Float64, false), - ]; - StructArray::new(DataType::Struct(fields), vec![x, y], None).boxed() - }; - - let raw_size_bytes = std::mem::size_of_val(data.as_slice()); - let arrow_size_bytes = estimated_bytes_size(&*array); - - assert_eq!(1600, raw_size_bytes); - assert_eq!(1600, arrow_size_bytes); +impl PersistentIndexedTable { + /// Returns the number of rows stored across this table. + #[inline] + pub fn num_rows(&self) -> u64 { + self.col_num_instances.len() as _ } +} - // compound type list array - { - #[derive(Clone, Copy)] - struct Point { - x: f64, - y: f64, - } - - impl Default for Point { - fn default() -> Self { - Self { x: 42.0, y: 666.0 } - } - } - - let data = std::iter::repeat(vec![Point::default(); 100]) - .take(50) - .collect::>(); - let array: Box = { - let array = { - let x = - Float64Array::from_vec(data.iter().flatten().map(|p| p.x).collect()).boxed(); - let y = - Float64Array::from_vec(data.iter().flatten().map(|p| p.y).collect()).boxed(); - let fields = vec![ - Field::new("x", DataType::Float64, false), - Field::new("y", DataType::Float64, false), - ]; - StructArray::new(DataType::Struct(fields), vec![x, y], None) - }; - - ListArray::::new( - ListArray::::default_datatype(array.data_type().clone()), - Offsets::try_from_lengths(std::iter::repeat(50).take(50)) - .unwrap() - .into(), - array.boxed(), - None, - ) - .boxed() - }; - - let raw_size_bytes = data - .iter() - // headers + bodies! - .map(|s| std::mem::size_of_val(s) + std::mem::size_of_val(s.as_slice())) - .sum::(); - let arrow_size_bytes = estimated_bytes_size(&*array); +impl SizeBytes for PersistentIndexedTable { + #[inline] + fn heap_size_bytes(&self) -> u64 { + crate::profile_function!(); - assert_eq!(81200, raw_size_bytes); - assert_eq!(80200, arrow_size_bytes); // smaller because smaller inner headers + let Self { + ent_path, + cluster_key, + col_insert_id, + col_row_id, + col_num_instances, + columns, + } = self; + + ent_path.total_size_bytes() + + cluster_key.total_size_bytes() + + col_insert_id.total_size_bytes() + + col_row_id.total_size_bytes() + + col_num_instances.total_size_bytes() + + columns.total_size_bytes() } } diff --git a/crates/re_arrow_store/src/store_write.rs b/crates/re_arrow_store/src/store_write.rs index 1585ac537021..677310e5043d 100644 --- a/crates/re_arrow_store/src/store_write.rs +++ b/crates/re_arrow_store/src/store_write.rs @@ -1,23 +1,23 @@ use arrow2::datatypes::DataType; use itertools::Itertools as _; -use nohash_hasher::IntMap; +use nohash_hasher::{IntMap, IntSet}; use parking_lot::RwLock; +use smallvec::SmallVec; use re_log::{debug, trace}; use re_log_types::{ - component_types::InstanceKey, ComponentName, DataCell, DataCellError, DataRow, DataTable, - EntityPath, MsgId, TimeInt, TimePoint, TimeRange, Timeline, + component_types::InstanceKey, ComponentName, DataCell, DataCellColumn, DataCellError, DataRow, + DataTable, RowId, SizeBytes as _, TimeInt, TimePoint, TimeRange, }; use crate::{ - ComponentBucket, ComponentTable, DataStore, DataStoreConfig, IndexBucket, IndexBucketIndices, - IndexTable, PersistentComponentTable, PersistentIndexTable, RowIndex, RowIndexKind, TimeIndex, + store::MetadataRegistry, DataStore, DataStoreConfig, IndexedBucket, IndexedBucketInner, + IndexedTable, PersistentIndexedTable, }; // TODO(#1619): // - The store should insert column-per-column rather than row-per-row (purely a performance // matter) -// - The store shouldn't ever deal with raw arrow arrays, use cells/rows/tables instead // --- Data store --- @@ -26,7 +26,6 @@ pub enum WriteError { #[error("Error with one or more the underlying data cells")] DataCell(#[from] DataCellError), - // Clustering key #[error("The cluster component must be dense, got {0:?}")] SparseClusteringComponent(DataCell), @@ -36,9 +35,14 @@ pub enum WriteError { )] InvalidClusteringComponent(DataCell), - // Misc - #[error("Other error")] - Other(#[from] anyhow::Error), + #[error( + "Component '{component}' failed to typecheck: expected {expected:#?} but got {got:#?}" + )] + TypeCheck { + component: ComponentName, + expected: DataType, + got: DataType, + }, } pub type WriteResult = ::std::result::Result; @@ -54,7 +58,7 @@ impl DataStore { /// /// See [`Self::insert_row`]. pub fn insert_table(&mut self, table: &DataTable) -> WriteResult<()> { - for row in table.as_rows() { + for row in table.to_rows() { self.insert_row(&row)?; } Ok(()) @@ -64,7 +68,7 @@ impl DataStore { /// /// If the bundle doesn't carry a payload for the cluster key, one will be auto-generated /// based on the length of the components in the payload, in the form of an array of - /// monotonically increasing u64s going from `0` to `N-1`. + /// monotonically increasing `u64`s going from `0` to `N-1`. pub fn insert_row(&mut self, row: &DataRow) -> WriteResult<()> { // TODO(cmc): kind & insert_id need to somehow propagate through the span system. self.insert_id += 1; @@ -75,15 +79,42 @@ impl DataStore { crate::profile_function!(); + // Update type registry and do typechecking if enabled + if self.config.enable_typecheck { + for cell in row.cells().iter() { + use std::collections::hash_map::Entry; + match self.type_registry.entry(cell.component_name()) { + Entry::Occupied(entry) => { + if entry.get() != cell.datatype() { + return Err(WriteError::TypeCheck { + component: cell.component_name(), + expected: entry.get().clone(), + got: cell.datatype().clone(), + }); + } + } + Entry::Vacant(entry) => { + entry.insert(cell.datatype().clone()); + } + } + } + } else { + for cell in row.cells().iter() { + self.type_registry + .insert(cell.component_name(), cell.datatype().clone()); + } + } + let DataRow { row_id, timepoint, entity_path: ent_path, - num_instances: _, + num_instances, cells, } = row; let ent_path_hash = ent_path.hash(); + let num_instances = *num_instances; trace!( kind = "insert", @@ -102,150 +133,8 @@ impl DataStore { .find_position(|cell| cell.component_name() == self.cluster_key) .map(|(pos, _)| pos); - if timepoint.is_timeless() { - let mut row_indices = IntMap::default(); - - self.insert_timeless_row_helper(cluster_cell_pos, cells, &mut row_indices)?; - - let index = self - .timeless_indices - .entry(ent_path_hash) - .or_insert_with(|| PersistentIndexTable::new(self.cluster_key, ent_path.clone())); - index.insert(&row_indices)?; - } else { - let mut row_indices = IntMap::default(); - - self.insert_row_helper(timepoint, cluster_cell_pos, cells, &mut row_indices)?; - - for (timeline, time) in timepoint.iter() { - let ent_path = ent_path.clone(); // shallow - let index = self - .indices - .entry((*timeline, ent_path_hash)) - .or_insert_with(|| IndexTable::new(self.cluster_key, *timeline, ent_path)); - index.insert(&self.config, *time, &row_indices)?; - } - } - - // This is valuable information, even for a timeless timepoint! - self.messages.insert(*row_id, timepoint.clone()); - - Ok(()) - } - - fn insert_timeless_row_helper( - &mut self, - cluster_cell_pos: Option, - cells: &[DataCell], - row_indices: &mut IntMap, - ) -> WriteResult<()> { - crate::profile_function!(); - - let cluster_row_idx = - self.get_or_create_cluster_component(cluster_cell_pos, cells, &TimePoint::default())?; - - // Always insert the cluster component. - row_indices.insert(self.cluster_key, cluster_row_idx); - - if self.config.store_insert_ids { - // Store the ID of the write request alongside the data. - // - // This is _not_ an actual `RowIndex`, there isn't even a component table associated - // with insert IDs! - // We're just abusing the fact that any value we push here as a `RowIndex` will end up - // as-is in the index. - row_indices.insert( - Self::insert_id_key(), - RowIndex::from_u63(RowIndexKind::Temporal, self.insert_id), - ); - } - - for cell in cells - .iter() - .filter(|cell| cell.component_name() != self.cluster_key) - { - let component = cell.component_name(); - - let table = self - .timeless_components - .entry(cell.component_name()) - .or_insert_with(|| PersistentComponentTable::new(component, cell.datatype())); - - let row_idx = table.push_cell(cell); - row_indices.insert(component, row_idx); - } - - Ok(()) - } - - fn insert_row_helper( - &mut self, - time_point: &TimePoint, - cluster_cell_pos: Option, - cells: &[DataCell], - row_indices: &mut IntMap, - ) -> WriteResult<()> { - crate::profile_function!(); - - let cluster_row_idx = - self.get_or_create_cluster_component(cluster_cell_pos, cells, time_point)?; - - // Always insert the cluster component. - row_indices.insert(self.cluster_key, cluster_row_idx); - - if self.config.store_insert_ids { - // Store the ID of the write request alongside the data. - // - // This is _not_ an actual `RowIndex`, there isn't even a component table associated - // with insert IDs! - // We're just abusing the fact that any value we push here as a `RowIndex` will end up - // as-is in the index. - row_indices.insert( - Self::insert_id_key(), - RowIndex::from_u63(RowIndexKind::Temporal, self.insert_id), - ); - } - - for cell in cells - .iter() - .filter(|cell| cell.component_name() != self.cluster_key) - { - let component = cell.component_name(); - - let table = self - .components - .entry(component) - .or_insert_with(|| ComponentTable::new(component, cell.datatype())); - - let row_idx = table.push_cell(&self.config, time_point, cell); - row_indices.insert(component, row_idx); - } - - Ok(()) - } - - /// Tries to find the cluster component for the current row, or creates it if the caller hasn't - /// specified any. - /// - /// When creating an auto-generated cluster component of a specific length for the first time, - /// this will keep track of its assigned row index and re-use it later on as a mean of - /// deduplication. - fn get_or_create_cluster_component( - &mut self, - cluster_cell_pos: Option, - cells: &[DataCell], - time_point: &TimePoint, - ) -> WriteResult { - crate::profile_function!(); - - enum ClusterData<'a> { - Cached(RowIndex), - GenData(DataCell), - UserData(&'a DataCell), - } - - let (cluster_len, cluster_data) = if let Some(cluster_cell_pos) = cluster_cell_pos { - // We found a component with a name matching the cluster key's, let's make sure it's + let generated_cluster_cell = if let Some(cluster_cell_pos) = cluster_cell_pos { + // We found a column with a name matching the cluster key's, let's make sure it's // valid (dense, sorted, no duplicates) and use that if so. let cluster_cell = &cells[cluster_cell_pos]; @@ -259,191 +148,169 @@ impl DataStore { return Err(WriteError::InvalidClusteringComponent(cluster_cell.clone())); } - ( - cluster_cell.num_instances(), - ClusterData::UserData(cluster_cell), - ) + None } else { // The caller has not specified any cluster component, and so we'll have to generate // one... unless we've already generated one of this exact length in the past, - // in which case we can simply re-use that row index. - - // Use the length of any other component in the batch, they are guaranteed to all - // share the same length at this point anyway. - let len = cells.first().map_or(0, |comp| comp.num_instances()); + // in which case we can simply re-use that cell. - if let Some(row_idx) = self.cluster_comp_cache.get(&len) { - // Cache hit! Re-use that row index. - (len, ClusterData::Cached(*row_idx)) - } else { - // Cache miss! Craft a new instance keys from the ground up. - - // TODO(#1712): That's exactly how one should create a cell of instance keys... but - // it turns out that running `TryIntoArrow` on a primitive type is orders of - // magnitude slower than manually creating the equivalent primitive array for some - // reason... - // let cell = DataCell::from_component::(0..len as u64); - - // ...so we create it manually instead. - use re_log_types::Component as _; - let values = - arrow2::array::UInt64Array::from_vec((0..len as u64).collect_vec()).boxed(); - let cell = DataCell::from_arrow(InstanceKey::name(), values); - - (len, ClusterData::GenData(cell)) - } + Some(self.generate_cluster_cell(num_instances)) }; - match cluster_data { - ClusterData::Cached(row_idx) => Ok(row_idx), - ClusterData::GenData(cell) => { - // We had to generate a cluster component of the given length for the first time, - // let's store it forever. - - let table = self - .timeless_components - .entry(self.cluster_key) - .or_insert_with(|| { - PersistentComponentTable::new(self.cluster_key, cell.datatype()) - }); - let row_idx = table.push_cell(&cell); - - self.cluster_comp_cache.insert(cluster_len, row_idx); + let insert_id = self.config.store_insert_ids.then_some(self.insert_id); - Ok(row_idx) - } - ClusterData::UserData(cell) => { - // If we didn't hit the cache, then we have to insert this cluster component in - // the right tables, just like any other component. - - let row_idx = if time_point.is_timeless() { - let table = self - .timeless_components - .entry(self.cluster_key) - .or_insert_with(|| { - PersistentComponentTable::new(self.cluster_key, cell.datatype()) - }); - table.push_cell(cell) - } else { - let table = self - .components - .entry(self.cluster_key) - .or_insert_with(|| ComponentTable::new(self.cluster_key, cell.datatype())); - table.push_cell(&self.config, time_point, cell) - }; + if timepoint.is_timeless() { + let index = self + .timeless_tables + .entry(ent_path_hash) + .or_insert_with(|| PersistentIndexedTable::new(self.cluster_key, ent_path.clone())); - Ok(row_idx) + index.insert_row(insert_id, generated_cluster_cell, row); + } else { + for (timeline, time) in timepoint.iter() { + let ent_path = ent_path.clone(); // shallow + let index = self + .tables + .entry((*timeline, ent_path_hash)) + .or_insert_with(|| IndexedTable::new(self.cluster_key, *timeline, ent_path)); + + index.insert_row( + &self.config, + insert_id, + *time, + generated_cluster_cell.clone(), /* shallow */ + row, + ); } } - } - pub fn clear_msg_metadata(&mut self, drop_msg_ids: &ahash::HashSet) { - crate::profile_function!(); + self.metadata_registry.upsert(*row_id, timepoint.clone()); - self.messages - .retain(|msg_id, _| !drop_msg_ids.contains(msg_id)); + Ok(()) } -} - -// --- Persistent Indices --- -impl PersistentIndexTable { - pub fn new(cluster_key: ComponentName, ent_path: EntityPath) -> Self { - Self { - cluster_key, - ent_path, - indices: Default::default(), - num_rows: 0, - all_components: Default::default(), - } + /// Wipes all timeless data. + /// + /// Mostly useful for testing/debugging purposes. + pub fn wipe_timeless_data(&mut self) { + self.timeless_tables = Default::default(); } - #[allow(clippy::unnecessary_wraps)] - pub fn insert(&mut self, row_indices: &IntMap) -> anyhow::Result<()> { + /// Auto-generates an appropriate cluster cell for the specified number of instances and + /// transparently handles caching. + // TODO(#1777): shared slices for auto generated keys + fn generate_cluster_cell(&mut self, num_instances: u32) -> DataCell { crate::profile_function!(); - // 2-way merge, step1: left-to-right - // - // push new row indices to their associated secondary index - for (name, row_idx) in row_indices { - let index = self - .indices - .entry(*name) - .or_insert_with(|| vec![None; self.num_rows as usize]); - index.push(Some(*row_idx)); + if let Some(cell) = self.cluster_cell_cache.get(&num_instances) { + // Cache hit! + + cell.clone() // shallow + } else { + // Cache miss! Craft a new instance keys from the ground up. + + // TODO(#1712): That's exactly how one should create a cell of instance keys... + // but it turns out that running `TryIntoArrow` on a primitive type is orders of + // magnitude slower than manually creating the equivalent primitive array for some + // reason... + // let cell = DataCell::from_component::(0..len as u64); + + // ...so we create it manually instead. + use re_log_types::Component as _; + let values = + arrow2::array::UInt64Array::from_vec((0..num_instances as u64).collect_vec()) + .boxed(); + let mut cell = DataCell::from_arrow(InstanceKey::name(), values); + cell.compute_size_bytes(); + + self.cluster_cell_cache + .insert(num_instances, cell.clone() /* shallow */); + + cell } + } +} - // 2-way merge, step2: right-to-left - // - // fill unimpacted secondary indices with null values - for (name, index) in &mut self.indices { - if !row_indices.contains_key(name) { - index.push(None); +impl MetadataRegistry { + fn upsert(&mut self, row_id: RowId, timepoint: TimePoint) { + let mut added_size_bytes = 0; + + // This is valuable information even for a timeless timepoint! + match self.entry(row_id) { + std::collections::btree_map::Entry::Vacant(entry) => { + // NOTE: In a map, thus on the heap! + added_size_bytes += row_id.total_size_bytes(); + added_size_bytes += timepoint.total_size_bytes(); + entry.insert(timepoint); + } + // NOTE: When saving and loading data from disk, it's very possible that we try to + // insert data for a single `RowId` in multiple calls (buckets are per-timeline, so a + // single `RowId` can get spread across multiple buckets)! + std::collections::btree_map::Entry::Occupied(mut entry) => { + let entry = entry.get_mut(); + for (timeline, time) in timepoint { + if let Some(old_time) = entry.insert(timeline, time) { + if old_time != time { + re_log::error!(%row_id, ?timeline, old_time = ?old_time, new_time = ?time, "detected re-used `RowId/Timeline` pair, this is illegal and will lead to undefined behavior in the datastore"); + debug_assert!(false, "detected re-used `RowId/Timeline`"); + } + } else { + // NOTE: In a map, thus on the heap! + added_size_bytes += timeline.total_size_bytes(); + added_size_bytes += time.as_i64().total_size_bytes(); + } + } } } - self.num_rows += 1; - - #[cfg(debug_assertions)] - self.sanity_check().unwrap(); - - // Insert components last, only if bucket-insert succeeded. - self.all_components.extend(row_indices.keys()); - - Ok(()) + self.heap_size_bytes += added_size_bytes; } } -// --- Indices --- +// --- Temporal --- -impl IndexTable { - pub fn new(cluster_key: ComponentName, timeline: Timeline, ent_path: EntityPath) -> Self { - Self { - timeline, - ent_path, - buckets: [(i64::MIN.into(), IndexBucket::new(cluster_key, timeline))].into(), - cluster_key, - all_components: Default::default(), - } - } - - pub fn insert( +impl IndexedTable { + pub fn insert_row( &mut self, config: &DataStoreConfig, + insert_id: Option, time: TimeInt, - indices: &IntMap, - ) -> anyhow::Result<()> { + generated_cluster_cell: Option, + row: &DataRow, + ) { crate::profile_function!(); + let components: IntSet<_> = row.component_names().collect(); + // borrowck workaround let timeline = self.timeline; let ent_path = self.ent_path.clone(); // shallow let (_, bucket) = self.find_bucket_mut(time); - let size = bucket.total_size_bytes(); - let size_overflow = bucket.total_size_bytes() > config.index_bucket_size_bytes; - - let len = bucket.total_rows(); - let len_overflow = len > config.index_bucket_nb_rows; + let len = bucket.num_rows(); + let len_overflow = len > config.indexed_bucket_num_rows; - if size_overflow || len_overflow { + if len_overflow { + let bucket_size_before = bucket.total_size_bytes(); if let Some((min, second_half)) = bucket.split() { trace!( kind = "insert", timeline = %timeline.name(), time = timeline.typ().format(time), entity = %ent_path, - size_limit = config.component_bucket_size_bytes, - len_limit = config.component_bucket_nb_rows, - size, size_overflow, + len_limit = config.indexed_bucket_num_rows, len, len_overflow, new_time_bound = timeline.typ().format(min), - "splitting off index bucket following overflow" + "splitting off indexed bucket following overflow" ); + self.buckets_size_bytes += + bucket.total_size_bytes() + second_half.total_size_bytes(); + self.buckets_size_bytes -= bucket_size_before; self.buckets.insert(min, second_half); - return self.insert(config, time, indices); + + return self.insert_row(config, insert_id, time, generated_cluster_cell, row); } // We couldn't split the bucket, either because it's already too small, or because it @@ -462,16 +329,16 @@ impl IndexTable { // covers a time range which includes this timepoint (if such a bucket existed, then // we would have stumbled upon it before ever finding the current one!). // This gives us an opportunity to create a new bucket that starts at the upper - // bound of the current one _excluded_ and that ranges all the way up to the timepoint - // that we're inserting. + // bound of the current one _excluded_ and that ranges all the way up to the + // timepoint that we're inserting. // Not only is this a great opportunity to naturally split things up, it's actually // mandatory to avoid a nasty edge case where one keeps inserting into a full, // unsplittable bucket and indefinitely creates new single-entry buckets, leading // to the worst-possible case of fragmentation. let (bucket_upper_bound, bucket_len) = { - let guard = bucket.indices.read(); - (guard.times.last().copied(), guard.times.len()) + let guard = bucket.inner.read(); + (guard.col_time.last().copied(), guard.col_time.len()) }; if let Some(upper_bound) = bucket_upper_bound { @@ -482,27 +349,31 @@ impl IndexTable { timeline = %timeline.name(), time = timeline.typ().format(time), entity = %ent_path, - size_limit = config.component_bucket_size_bytes, - len_limit = config.component_bucket_nb_rows, - size, size_overflow, + len_limit = config.indexed_bucket_num_rows, len, len_overflow, new_time_bound = timeline.typ().format(new_time_bound.into()), - "creating brand new index bucket following overflow" + "creating brand new indexed bucket following overflow" ); + + let (inner, inner_size_bytes) = { + let mut inner = IndexedBucketInner { + time_range: TimeRange::new(time, time), + ..Default::default() + }; + let size_bytes = inner.compute_size_bytes(); + (inner, size_bytes) + }; self.buckets.insert( (new_time_bound).into(), - IndexBucket { + IndexedBucket { timeline, - indices: RwLock::new(IndexBucketIndices { - is_sorted: true, - time_range: TimeRange::new(time, time), - times: Default::default(), - indices: Default::default(), - }), cluster_key: self.cluster_key, + inner: RwLock::new(inner), }, ); - return self.insert(config, time, indices); + + self.buckets_size_bytes += inner_size_bytes; + return self.insert_row(config, insert_id, time, generated_cluster_cell, row); } } @@ -511,11 +382,9 @@ impl IndexTable { timeline = %timeline.name(), time = timeline.typ().format(time), entity = %ent_path, - size_limit = config.component_bucket_size_bytes, - len_limit = config.component_bucket_nb_rows, - size, size_overflow, + len_limit = config.indexed_bucket_num_rows, len, len_overflow, - "couldn't split index bucket, proceeding to ignore limits" + "couldn't split indexed bucket, proceeding to ignore limits" ); } @@ -524,79 +393,117 @@ impl IndexTable { timeline = %timeline.name(), time = timeline.typ().format(time), entity = %ent_path, - components = ?indices.iter().collect::>(), - "inserted into index table" + ?components, + "inserted into indexed tables" ); - bucket.insert(time, indices)?; + self.buckets_size_bytes += + bucket.insert_row(insert_id, time, generated_cluster_cell, row, &components); + self.buckets_num_rows += 1; // Insert components last, only if bucket-insert succeeded. - self.all_components.extend(indices.keys()); - - Ok(()) + self.all_components.extend(components); } } -impl IndexBucket { - pub fn new(cluster_key: ComponentName, timeline: Timeline) -> Self { - Self { - timeline, - indices: RwLock::new(IndexBucketIndices::default()), - cluster_key, - } - } - - #[allow(clippy::unnecessary_wraps)] - pub fn insert( +impl IndexedBucket { + /// Returns the size in bytes of the inserted arrow data. + fn insert_row( &mut self, + insert_id: Option, time: TimeInt, - row_indices: &IntMap, - ) -> anyhow::Result<()> { + generated_cluster_cell: Option, + row: &DataRow, + components: &IntSet, + ) -> u64 { crate::profile_function!(); - let mut guard = self.indices.write(); - let IndexBucketIndices { + let mut size_bytes_added = 0u64; + let num_rows = self.num_rows() as usize; + + let mut inner = self.inner.write(); + let IndexedBucketInner { is_sorted, time_range, - times, - indices, - } = &mut *guard; - - // append time to primary index and update time range appropriately - times.push(time.as_i64()); + col_time, + col_insert_id, + col_row_id, + col_num_instances, + columns, + size_bytes, + } = &mut *inner; + + // append time to primary column and update time range appropriately + col_time.push(time.as_i64()); *time_range = TimeRange::new(time_range.min.min(time), time_range.max.max(time)); + size_bytes_added += time.as_i64().total_size_bytes(); - // append components to secondary indices (2-way merge) + // update all control columns + if let Some(insert_id) = insert_id { + col_insert_id.push(insert_id); + size_bytes_added += insert_id.total_size_bytes(); + } + col_row_id.push(row.row_id()); + size_bytes_added += row.row_id().total_size_bytes(); + col_num_instances.push(row.num_instances()); + size_bytes_added += row.num_instances().total_size_bytes(); + + // insert auto-generated cluster cell if present + if let Some(cluster_cell) = generated_cluster_cell { + let component = cluster_cell.component_name(); + let column = columns.entry(component).or_insert_with(|| { + let column = DataCellColumn::empty(num_rows); + size_bytes_added += component.total_size_bytes(); + size_bytes_added += column.total_size_bytes(); + column + }); + size_bytes_added += cluster_cell.total_size_bytes(); + column.0.push(Some(cluster_cell)); + } - // 2-way merge, step1: left-to-right - // - // push new row indices to their associated secondary index - for (name, row_idx) in row_indices { - let index = indices - .entry(*name) - .or_insert_with(|| vec![None; times.len().saturating_sub(1)]); - index.push(Some(*row_idx)); + // append components to their respective columns (2-way merge) + + // 2-way merge, step 1: left-to-right + for cell in row.cells().iter() { + let component = cell.component_name(); + let column = columns.entry(component).or_insert_with(|| { + let column = DataCellColumn::empty(col_time.len().saturating_sub(1)); + size_bytes_added += component.total_size_bytes(); + size_bytes_added += column.total_size_bytes(); + column + }); + size_bytes_added += cell.total_size_bytes(); + column.0.push(Some(cell.clone() /* shallow */)); } - // 2-way merge, step2: right-to-left + // 2-way merge, step 2: right-to-left // - // fill unimpacted secondary indices with null values - for (name, index) in &mut *indices { - if !row_indices.contains_key(name) { - index.push(None); + // fill unimpacted columns with null values + for (component, column) in &mut *columns { + // The cluster key always gets added one way or another, don't try to force fill it! + if *component == self.cluster_key { + continue; + } + + if !components.contains(component) { + let none_cell: Option = None; + size_bytes_added += none_cell.total_size_bytes(); + column.0.push(none_cell); } } // TODO(#433): re_datastore: properly handle already sorted data during insertion *is_sorted = false; + *size_bytes += size_bytes_added; + #[cfg(debug_assertions)] { - drop(guard); // sanity checking will grab the lock! + drop(inner); self.sanity_check().unwrap(); } - Ok(()) + size_bytes_added } /// Splits the bucket into two, potentially uneven parts. @@ -612,84 +519,44 @@ impl IndexBucket { /// /// # Unsplittable buckets /// - /// The datastore and query path operate under the general assumption that _all of the - /// index data_ for a given timepoint will reside in _one and only one_ bucket. + /// The datastore and query path operate under the general assumption that _all of the data_ + /// for a given timepoint will reside in _one and only one_ bucket. /// This function makes sure to uphold that restriction, which sometimes means splitting the /// bucket into two uneven parts, or even not splitting it at all. /// - /// Here's an example of an index table configured to have a maximum of 2 rows per bucket: one - /// can see that the 1st and 2nd buckets exceed this maximum in order to uphold the restriction - /// described above: + /// Run the following command to display a visualization of the store's internal + /// datastructures and better understand how everything fits together: /// ```text - /// IndexTable { - /// timeline: frame_nr - /// entity: this/that - /// size: 3 buckets for a total of 256 B across 8 total rows - /// buckets: [ - /// IndexBucket { - /// index time bound: >= #0 - /// size: 96 B across 3 rows - /// - frame_nr: from #41 to #41 (all inclusive) - /// data (sorted=true): - /// +----------+---------------+--------------+--------------------+ - /// | frame_nr | rerun.point2d | rerun.rect2d | rerun.instance_key | - /// +----------+---------------+--------------+--------------------+ - /// | 41 | | | 1 | - /// | 41 | 1 | | 2 | - /// | 41 | | 3 | 2 | - /// +----------+---------------+--------------+--------------------+ - /// - /// } - /// IndexBucket { - /// index time bound: >= #42 - /// size: 96 B across 3 rows - /// - frame_nr: from #42 to #42 (all inclusive) - /// data (sorted=true): - /// +----------+--------------+--------------------+--------------------+ - /// | frame_nr | rerun.rect2d | rerun.instance_key | rerun.point2d | - /// +----------+--------------+--------------------+-------------------+ - /// | 42 | 1 | 2 | | - /// | 42 | | 4 | | - /// | 42 | | 2 | 2 | - /// +----------+--------------+--------------------+-------------------+ - /// - /// } - /// IndexBucket { - /// index time bound: >= #43 - /// size: 64 B across 2 rows - /// - frame_nr: from #43 to #44 (all inclusive) - /// data (sorted=true): - /// +----------+--------------+---------------+--------------------+ - /// | frame_nr | rerun.rect2d | rerun.point2d | rerun.instance_key | - /// +----------+--------------+---------------+--------------------+ - /// | 43 | 4 | | 2 | - /// | 44 | | 3 | 2 | - /// +----------+--------------+---------------+--------------------+ - /// - /// } - /// ] - /// } + /// cargo test -p re_arrow_store -- --nocapture datastore_internal_repr /// ``` - pub fn split(&self) -> Option<(TimeInt, Self)> { + // + // TODO(#1524): inline visualization once it's back to a manageable state + fn split(&self) -> Option<(TimeInt, Self)> { let Self { - timeline, indices, .. + timeline, + cluster_key: _, + inner, } = self; - let mut indices = indices.write(); - indices.sort(); + let mut inner1 = inner.write(); + inner1.sort(); - let IndexBucketIndices { + let IndexedBucketInner { is_sorted: _, time_range: time_range1, - times: times1, - indices: indices1, - } = &mut *indices; - - if times1.len() < 2 { + col_time: col_time1, + col_insert_id: col_insert_id1, + col_row_id: col_row_id1, + col_num_instances: col_num_instances1, + columns: columns1, + size_bytes: _, // NOTE: recomputed below + } = &mut *inner1; + + if col_time1.len() < 2 { return None; // early exit: can't split the unsplittable } - if times1.first() == times1.last() { + if col_time1.first() == col_time1.last() { // The entire bucket contains only one timepoint, thus it's impossible to find // a split index to begin with. return None; @@ -698,59 +565,102 @@ impl IndexBucket { crate::profile_function!(); let timeline = *timeline; - // Used down the line to assert that we've left everything in a sane state. - let _total_rows = times1.len(); + + // Used in debug builds to assert that we've left everything in a sane state. + let _num_rows = col_time1.len(); + + fn split_off_column( + column: &mut SmallVec<[T; N]>, + split_idx: usize, + ) -> SmallVec<[T; N]> { + if split_idx >= column.len() { + return SmallVec::default(); + } + + let second_half = SmallVec::from_slice(&column[split_idx..]); + column.truncate(split_idx); + second_half + } let (min2, bucket2) = { - let split_idx = find_split_index(times1).expect("must be splittable at this point"); - - // this updates `time_range1` in-place! - let time_range2 = split_time_range_off(split_idx, times1, time_range1); - - // this updates `times1` in-place! - let times2 = times1.split_off(split_idx); - - // this updates `indices1` in-place! - let indices2: IntMap<_, _> = indices1 - .iter_mut() - .map(|(name, index1)| { - // this updates `index1` in-place! - let index2 = index1.split_off(split_idx); - (*name, index2) - }) - .collect(); - ( - time_range2.min, - Self { - timeline, - indices: RwLock::new(IndexBucketIndices { - is_sorted: true, - time_range: time_range2, - times: times2, - indices: indices2, - }), - cluster_key: self.cluster_key, - }, - ) + let split_idx = find_split_index(col_time1).expect("must be splittable at this point"); + + let (time_range2, col_time2, col_insert_id2, col_row_id2, col_num_instances2) = { + crate::profile_scope!("control"); + ( + // this updates `time_range1` in-place! + split_time_range_off(split_idx, col_time1, time_range1), + // this updates `col_time1` in-place! + split_off_column(col_time1, split_idx), + // this updates `col_insert_id1` in-place! + split_off_column(col_insert_id1, split_idx), + // this updates `col_row_id1` in-place! + split_off_column(col_row_id1, split_idx), + // this updates `col_num_instances1` in-place! + split_off_column(col_num_instances1, split_idx), + ) + }; + + // this updates `columns1` in-place! + let columns2: IntMap<_, _> = { + crate::profile_scope!("data"); + columns1 + .iter_mut() + .map(|(name, column1)| { + if split_idx >= column1.len() { + return (*name, DataCellColumn(SmallVec::default())); + } + + // this updates `column1` in-place! + let column2 = DataCellColumn({ + let second_half = SmallVec::from(&column1.0[split_idx..]); + column1.0.truncate(split_idx); + second_half + }); + (*name, column2) + }) + .collect() + }; + + let inner2 = { + let mut inner2 = IndexedBucketInner { + is_sorted: true, + time_range: time_range2, + col_time: col_time2, + col_insert_id: col_insert_id2, + col_row_id: col_row_id2, + col_num_instances: col_num_instances2, + columns: columns2, + size_bytes: 0, // NOTE: computed below + }; + inner2.compute_size_bytes(); + inner2 + }; + let bucket2 = Self { + timeline, + cluster_key: self.cluster_key, + inner: RwLock::new(inner2), + }; + + (time_range2.min, bucket2) }; + inner1.compute_size_bytes(); + // sanity checks #[cfg(debug_assertions)] { - drop(indices); // sanity checking will grab the lock! + drop(inner1); // sanity checking will grab the lock! self.sanity_check().unwrap(); bucket2.sanity_check().unwrap(); - let total_rows1 = self.total_rows() as i64; - let total_rows2 = bucket2.total_rows() as i64; - debug_assert!( - _total_rows as i64 == total_rows1 + total_rows2, - "expected both buckets to sum up to the length of the original bucket: \ - got bucket={} vs. bucket1+bucket2={}", - _total_rows, - total_rows1 + total_rows2, + let num_rows1 = self.num_rows() as i64; + let num_rows2 = bucket2.num_rows() as i64; + debug_assert_eq!( + _num_rows as i64, + num_rows1 + num_rows2, + "expected both buckets to sum up to the length of the original bucket" ); - debug_assert_eq!(_total_rows as i64, total_rows1 + total_rows2); } Some((min2, bucket2)) @@ -766,7 +676,7 @@ impl IndexBucket { /// /// This function expects `times` to be sorted! /// In debug builds, it will panic if that's not the case. -fn find_split_index(times: &TimeIndex) -> Option { +fn find_split_index(times: &[i64]) -> Option { debug_assert!( times.windows(2).all(|t| t[0] <= t[1]), "time index must be sorted before splitting!" @@ -855,7 +765,7 @@ fn test_find_split_index() { /// The two resulting time range halves are guaranteed to never overlap. fn split_time_range_off( split_idx: usize, - times1: &TimeIndex, + times1: &[i64], time_range1: &mut TimeRange, ) -> TimeRange { let time_range2 = TimeRange::new(times1[split_idx].into(), time_range1.max); @@ -875,242 +785,71 @@ fn split_time_range_off( time_range2 } -// --- Persistent Components --- - -impl PersistentComponentTable { - /// Creates a new timeless component table for the specified component `datatype`. - /// - /// `datatype` must be the type of the component itself, devoid of any wrapping layers - /// (i.e. _not_ a `ListArray<...>`!). - fn new(name: ComponentName, datatype: &DataType) -> Self { - // TODO(#1619): the whole fake row thing needs to go - let chunks = vec![DataCell::from_arrow_empty(name, datatype.clone()).as_arrow_monolist()]; - let total_rows = chunks.iter().map(|values| values.len() as u64).sum(); - let total_size_bytes = chunks - .iter() - .map(|values| arrow2::compute::aggregate::estimated_bytes_size(&**values) as u64) - .sum(); - - Self { - name, - datatype: datatype.clone(), - chunks, - total_rows, - total_size_bytes, - } - } - - /// Pushes `cell` to the end of the bucket, returning the _global_ `RowIndex` of the - /// freshly added row. - pub fn push_cell(&mut self, cell: &DataCell) -> RowIndex { - crate::profile_function!(); - - debug_assert!( - cell.datatype() == &self.datatype, - "trying to insert data of the wrong datatype in a component table, \ - expected {:?}, got {:?}", - &self.datatype, - cell.datatype(), - ); - - // TODO(#1619): don't use raw arrays - let values = cell.as_arrow_monolist(); - - self.total_rows += 1; - // Warning: this is surprisingly costly! - self.total_size_bytes += arrow2::compute::aggregate::estimated_bytes_size(&*values) as u64; - - // TODO(#589): support for non-unit-length chunks - self.chunks.push(values); - - RowIndex::from_u63(RowIndexKind::Timeless, self.chunks.len() as u64 - 1) - } -} - -// --- Components --- +// --- Timeless --- -impl ComponentTable { - /// Creates a new component table for the specified component `datatype`. - /// - /// `datatype` must be the type of the component itself, devoid of any wrapping layers - /// (i.e. _not_ a `ListArray<...>`!). - fn new(name: ComponentName, datatype: &DataType) -> Self { - ComponentTable { - name, - datatype: datatype.clone(), - buckets: [ComponentBucket::new(name, datatype, 0u64)].into(), - } - } - - /// Finds the appropriate bucket in this component table and pushes `cell` at the - /// end of it, returning the _global_ `RowIndex` for this new row. - pub fn push_cell( +impl PersistentIndexedTable { + fn insert_row( &mut self, - config: &DataStoreConfig, - time_point: &TimePoint, - cell: &DataCell, - ) -> RowIndex { + insert_id: Option, + generated_cluster_cell: Option, + row: &DataRow, + ) { crate::profile_function!(); - debug_assert!( - cell.datatype() == &self.datatype, - "trying to insert data of the wrong datatype in a component table, \ - expected {:?}, got {:?}", - &self.datatype, - cell.datatype() - ); + let num_rows = self.num_rows() as usize; - // All component tables spawn with an initial bucket at row offset 0, thus this cannot - // fail. - let active_bucket = self.buckets.back_mut().unwrap(); + let Self { + ent_path: _, + cluster_key: _, + col_insert_id, + col_row_id, + col_num_instances, + columns, + } = self; - let size = active_bucket.total_size_bytes(); - let size_overflow = active_bucket.total_size_bytes() > config.component_bucket_size_bytes; + let components: IntSet<_> = row.component_names().collect(); - let len = active_bucket.total_rows(); - let len_overflow = len > config.component_bucket_nb_rows; + // --- update all control columns --- - if size_overflow || len_overflow { - trace!( - kind = "insert", - component = self.name.as_str(), - size_limit = config.component_bucket_size_bytes, - len_limit = config.component_bucket_nb_rows, - size, - size_overflow, - len, - len_overflow, - "allocating new component bucket, previous one overflowed" - ); - - if config.enable_compaction { - active_bucket.archive(); - } - - let row_offset = active_bucket.row_offset + len; - self.buckets - .push_back(ComponentBucket::new(self.name, &self.datatype, row_offset)); + if let Some(insert_id) = insert_id { + col_insert_id.push(insert_id); } + col_row_id.push(row.row_id()); + col_num_instances.push(row.num_instances()); - // Two possible cases: - // - If the table has not just underwent an overflow, then this is panic-safe for the - // same reason as above: all component tables spawn with an initial bucket at row - // offset 0, thus this cannot fail. - // - If the table has just overflowed, then we've just pushed a bucket to the dequeue. - let active_bucket = self.buckets.back_mut().unwrap(); - let row_idx = RowIndex::from_u63( - RowIndexKind::Temporal, - active_bucket.push_cell(time_point, cell) + active_bucket.row_offset, - ); - - trace!( - kind = "insert", - timelines = ?time_point.into_iter() - .map(|(timeline, time)| (timeline.name(), timeline.typ().format(*time))) - .collect::>(), - component = self.name.as_str(), - %row_idx, - "pushed into component table" - ); - - row_idx - } -} - -impl ComponentBucket { - /// Creates a new component bucket for the specified component `datatype`. - /// - /// `datatype` must be the type of the component itself, devoid of any wrapping layers - /// (i.e. _not_ a `ListArray<...>`!). - pub fn new(name: ComponentName, datatype: &DataType, row_offset: u64) -> Self { - // If this is the first bucket of this table, we need to insert an empty list at - // row index #0! - // TODO(#1619): the whole fake row thing needs to go - let chunks = if row_offset == 0 { - vec![DataCell::from_arrow_empty(name, datatype.clone()).as_arrow_monolist()] - } else { - vec![] - }; + // --- append components to their respective columns (2-way merge) --- - let total_rows = chunks.iter().map(|values| values.len() as u64).sum(); - let total_size_bytes = chunks - .iter() - .map(|values| arrow2::compute::aggregate::estimated_bytes_size(&**values) as u64) - .sum(); - - Self { - name, - row_offset, - archived: false, - time_ranges: Default::default(), - chunks, - total_rows, - total_size_bytes, + // insert auto-generated cluster cell if present + if let Some(cluster_cell) = generated_cluster_cell { + let column = columns + .entry(cluster_cell.component_name()) + .or_insert_with(|| DataCellColumn::empty(num_rows)); + column.0.push(Some(cluster_cell)); } - } - - /// Pushes `cell` to the end of the bucket, returning the _local_ index of the - /// freshly added row. - pub fn push_cell(&mut self, timepoint: &TimePoint, cell: &DataCell) -> u64 { - crate::profile_function!(); - // Keep track of all affected time ranges, for garbage collection purposes. - for (timeline, &time) in timepoint { - self.time_ranges - .entry(*timeline) - .and_modify(|range| { - *range = TimeRange::new(range.min.min(time), range.max.max(time)); - }) - .or_insert_with(|| TimeRange::new(time, time)); + // 2-way merge, step 1: left-to-right + for cell in row.cells().iter() { + let column = columns + .entry(cell.component_name()) + .or_insert_with(|| DataCellColumn::empty(num_rows)); + column.0.push(Some(cell.clone() /* shallow */)); } - // TODO(cmc): don't use raw arrays - let values = cell.as_arrow_monolist(); - - self.total_rows += 1; - // Warning: this is surprisingly costly! - self.total_size_bytes += arrow2::compute::aggregate::estimated_bytes_size(&*values) as u64; - - // TODO(#589): support for non-unit-length chunks - self.chunks.push(values); - - self.chunks.len() as u64 - 1 - } - - /// Archives the bucket as a new one is about to take its place. - /// - /// This is a good opportunity to run compaction and other maintenance related tasks. - #[allow(dead_code)] - pub fn archive(&mut self) { - crate::profile_function!(); - - debug_assert!( - !self.archived, - "archiving an already archived bucket, something is likely wrong" - ); - - // Chunk compaction - // Compacts the bucket by concatenating all chunks of data into a single one. - { - use arrow2::compute::concatenate::concatenate; - - let chunks = self.chunks.iter().map(|chunk| &**chunk).collect::>(); - // Only two reasons this can ever fail: - // - // * `chunks` is empty: - // This can never happen, buckets always spawn with an initial chunk. - // - // * the various chunks contain data with different datatypes: - // This can never happen as that would first panic during insertion. - let values = concatenate(&chunks).unwrap(); - - // Recompute the size as we've just discarded a bunch of list headers. - self.total_size_bytes = - arrow2::compute::aggregate::estimated_bytes_size(&*values) as u64; + // 2-way merge, step 2: right-to-left + // + // fill unimpacted secondary indices with null values + for (component, column) in columns.iter_mut() { + // The cluster key always gets added one way or another, don't try to force fill it! + if *component == self.cluster_key { + continue; + } - self.chunks = vec![values]; + if !components.contains(component) { + column.0.push(None); + } } - self.archived = true; + #[cfg(debug_assertions)] + self.sanity_check().unwrap(); } } diff --git a/crates/re_arrow_store/src/test_util.rs b/crates/re_arrow_store/src/test_util.rs index 870624782a2c..35dc129bddd4 100644 --- a/crates/re_arrow_store/src/test_util.rs +++ b/crates/re_arrow_store/src/test_util.rs @@ -1,110 +1,65 @@ -use crate::DataStoreConfig; +use crate::{DataStore, DataStoreConfig}; // --- #[doc(hidden)] #[macro_export] macro_rules! test_row { - ($entity:ident @ $frames:tt => $n:expr; [$c0:expr $(,)*]) => { - ::re_log_types::DataRow::from_cells1( - ::re_log_types::MsgId::random(), + ($entity:ident @ $frames:tt => $n:expr; [$c0:expr $(,)*]) => {{ + let mut row = ::re_log_types::DataRow::from_cells1( + ::re_log_types::RowId::random(), $entity.clone(), $frames, $n, $c0, - ) - }; - ($entity:ident @ $frames:tt => $n:expr; [$c0:expr, $c1:expr $(,)*]) => { - ::re_log_types::DataRow::from_cells2( - ::re_log_types::MsgId::random(), + ); + row.compute_all_size_bytes(); + row + }}; + ($entity:ident @ $frames:tt => $n:expr; [$c0:expr, $c1:expr $(,)*]) => {{ + let mut row = ::re_log_types::DataRow::from_cells2( + ::re_log_types::RowId::random(), $entity.clone(), $frames, $n, ($c0, $c1), - ) - }; + ); + row.compute_all_size_bytes(); + row + }}; } pub fn all_configs() -> impl Iterator { - const COMPONENT_CONFIGS: &[DataStoreConfig] = &[ - DataStoreConfig::DEFAULT, - DataStoreConfig { - component_bucket_nb_rows: 0, - ..DataStoreConfig::DEFAULT - }, - DataStoreConfig { - component_bucket_nb_rows: 1, - ..DataStoreConfig::DEFAULT - }, - DataStoreConfig { - component_bucket_nb_rows: 2, - ..DataStoreConfig::DEFAULT - }, - DataStoreConfig { - component_bucket_nb_rows: 3, - ..DataStoreConfig::DEFAULT - }, - DataStoreConfig { - component_bucket_size_bytes: 0, - ..DataStoreConfig::DEFAULT - }, - DataStoreConfig { - component_bucket_size_bytes: 16, - ..DataStoreConfig::DEFAULT - }, - DataStoreConfig { - component_bucket_size_bytes: 32, - ..DataStoreConfig::DEFAULT - }, - DataStoreConfig { - component_bucket_size_bytes: 64, - ..DataStoreConfig::DEFAULT - }, - ]; - const INDEX_CONFIGS: &[DataStoreConfig] = &[ DataStoreConfig::DEFAULT, DataStoreConfig { - index_bucket_nb_rows: 0, - ..DataStoreConfig::DEFAULT - }, - DataStoreConfig { - index_bucket_nb_rows: 1, - ..DataStoreConfig::DEFAULT - }, - DataStoreConfig { - index_bucket_nb_rows: 2, - ..DataStoreConfig::DEFAULT - }, - DataStoreConfig { - index_bucket_nb_rows: 3, + indexed_bucket_num_rows: 0, ..DataStoreConfig::DEFAULT }, DataStoreConfig { - index_bucket_size_bytes: 0, + indexed_bucket_num_rows: 1, ..DataStoreConfig::DEFAULT }, DataStoreConfig { - index_bucket_size_bytes: 16, + indexed_bucket_num_rows: 2, ..DataStoreConfig::DEFAULT }, DataStoreConfig { - index_bucket_size_bytes: 32, - ..DataStoreConfig::DEFAULT - }, - DataStoreConfig { - index_bucket_size_bytes: 64, + indexed_bucket_num_rows: 3, ..DataStoreConfig::DEFAULT }, ]; - COMPONENT_CONFIGS.iter().flat_map(|comp| { - INDEX_CONFIGS.iter().map(|idx| DataStoreConfig { - component_bucket_size_bytes: comp.component_bucket_size_bytes, - component_bucket_nb_rows: comp.component_bucket_nb_rows, - index_bucket_size_bytes: idx.index_bucket_size_bytes, - index_bucket_nb_rows: idx.index_bucket_nb_rows, - store_insert_ids: comp.store_insert_ids || idx.store_insert_ids, - enable_compaction: comp.enable_compaction || idx.enable_compaction, - }) + INDEX_CONFIGS.iter().map(|idx| DataStoreConfig { + indexed_bucket_num_rows: idx.indexed_bucket_num_rows, + store_insert_ids: idx.store_insert_ids, + enable_typecheck: idx.enable_typecheck, }) } + +pub fn sanity_unwrap(store: &mut DataStore) { + if let err @ Err(_) = store.sanity_check() { + store.sort_indices_if_needed(); + eprintln!("{store}"); + err.unwrap(); + } +} diff --git a/crates/re_arrow_store/tests/correctness.rs b/crates/re_arrow_store/tests/correctness.rs index 74ec6a8a7640..fba86298332e 100644 --- a/crates/re_arrow_store/tests/correctness.rs +++ b/crates/re_arrow_store/tests/correctness.rs @@ -7,15 +7,15 @@ use std::sync::atomic::{AtomicBool, Ordering::SeqCst}; use rand::Rng; use re_arrow_store::{ - test_row, DataStore, DataStoreConfig, GarbageCollectionTarget, LatestAtQuery, WriteError, + test_row, test_util::sanity_unwrap, DataStore, DataStoreConfig, DataStoreStats, + GarbageCollectionTarget, LatestAtQuery, WriteError, }; use re_log_types::{ component_types::InstanceKey, datagen::{ build_frame_nr, build_log_time, build_some_colors, build_some_instances, build_some_point2d, }, - external::arrow2_convert::deserialize::arrow_array_deserialize_iterator, - Component as _, DataCell, Duration, EntityPath, MsgId, Time, TimeType, Timeline, + Component as _, DataCell, Duration, EntityPath, Time, TimeType, Timeline, }; // --- @@ -102,11 +102,7 @@ fn latest_at_emptiness_edge_cases_impl(store: &mut DataStore) { ] => num_instances; [build_some_instances(num_instances as _)])) .unwrap(); - if let err @ Err(_) = store.sanity_check() { - store.sort_indices_if_needed(); - eprintln!("{store}"); - err.unwrap(); - } + sanity_unwrap(store); let timeline_wrong_name = Timeline::new("lag_time", TimeType::Time); let timeline_wrong_kind = Timeline::new("log_time", TimeType::Sequence); @@ -115,41 +111,41 @@ fn latest_at_emptiness_edge_cases_impl(store: &mut DataStore) { // empty frame_nr { - let row_indices = store.latest_at( + let cells = store.latest_at( &LatestAtQuery::new(timeline_frame_nr, frame39), &ent_path, InstanceKey::name(), &[InstanceKey::name()], ); - assert!(row_indices.is_none()); + assert!(cells.is_none()); } // empty log_time { - let row_indices = store.latest_at( + let cells = store.latest_at( &LatestAtQuery::new(timeline_log_time, now_minus_1s_nanos), &ent_path, InstanceKey::name(), &[InstanceKey::name()], ); - assert!(row_indices.is_none()); + assert!(cells.is_none()); } // wrong entity path { - let row_indices = store.latest_at( + let cells = store.latest_at( &LatestAtQuery::new(timeline_frame_nr, frame40), &EntityPath::from("does/not/exist"), InstanceKey::name(), &[InstanceKey::name()], ); - assert!(row_indices.is_none()); + assert!(cells.is_none()); } // bunch of non-existing components { let components = &["they".into(), "dont".into(), "exist".into()]; - let row_indices = store + let (_, cells) = store .latest_at( &LatestAtQuery::new(timeline_frame_nr, frame40), &ent_path, @@ -157,13 +153,12 @@ fn latest_at_emptiness_edge_cases_impl(store: &mut DataStore) { components, ) .unwrap(); - let rows = store.get(components, &row_indices); - rows.iter().all(|row| row.is_none()); + cells.iter().all(|cell| cell.is_none()); } // empty component list { - let row_indices = store + let (_, cells) = store .latest_at( &LatestAtQuery::new(timeline_frame_nr, frame40), &ent_path, @@ -171,29 +166,29 @@ fn latest_at_emptiness_edge_cases_impl(store: &mut DataStore) { &[], ) .unwrap(); - assert!(row_indices.is_empty()); + assert!(cells.is_empty()); } // wrong timeline name { - let row_indices = store.latest_at( + let cells = store.latest_at( &LatestAtQuery::new(timeline_wrong_name, frame40), &EntityPath::from("does/not/exist"), InstanceKey::name(), &[InstanceKey::name()], ); - assert!(row_indices.is_none()); + assert!(cells.is_none()); } // wrong timeline kind { - let row_indices = store.latest_at( + let cells = store.latest_at( &LatestAtQuery::new(timeline_wrong_kind, frame40), &EntityPath::from("does/not/exist"), InstanceKey::name(), &[InstanceKey::name()], ); - assert!(row_indices.is_none()); + assert!(cells.is_none()); } } @@ -282,11 +277,12 @@ fn gc_correct() { let mut store = DataStore::new( InstanceKey::name(), DataStoreConfig { - component_bucket_nb_rows: 0, ..Default::default() }, ); + let stats_empty = DataStoreStats::from_store(&store); + let mut rng = rand::thread_rng(); let num_frames = rng.gen_range(0..=100); @@ -305,69 +301,37 @@ fn gc_correct() { } } - if let err @ Err(_) = store.sanity_check() { - store.sort_indices_if_needed(); - eprintln!("{store}"); - err.unwrap(); - } + sanity_unwrap(&mut store); check_still_readable(&store); - let msg_id_chunks = store.gc( - GarbageCollectionTarget::DropAtLeastPercentage(1.0), - Timeline::new("frame_nr", TimeType::Sequence), - MsgId::name(), - ); + let stats = DataStoreStats::from_store(&store); - let msg_ids = msg_id_chunks - .iter() - .flat_map(|chunk| arrow_array_deserialize_iterator::>(&**chunk).unwrap()) - .map(Option::unwrap) // MsgId is always present - .collect::>(); - assert!(!msg_ids.is_empty()); - - if let err @ Err(_) = store.sanity_check() { - store.sort_indices_if_needed(); - eprintln!("{store}"); - err.unwrap(); - } - check_still_readable(&store); - for msg_id in &msg_ids { - assert!(store.get_msg_metadata(msg_id).is_some()); - } + let (row_ids, stats_diff) = store.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0)); + let stats_diff = stats_diff + stats_empty; // account for fixed overhead - store.clear_msg_metadata(&msg_ids); + assert_eq!(row_ids.len() as u64, stats.total.num_rows); + assert_eq!( + stats.metadata_registry.num_rows, + stats_diff.metadata_registry.num_rows + ); + assert_eq!( + stats.metadata_registry.num_bytes, + stats_diff.metadata_registry.num_bytes + ); + assert_eq!(stats.temporal.num_rows, stats_diff.temporal.num_rows); - if let err @ Err(_) = store.sanity_check() { - store.sort_indices_if_needed(); - eprintln!("{store}"); - err.unwrap(); - } + sanity_unwrap(&mut store); check_still_readable(&store); - for msg_id in &msg_ids { - assert!(store.get_msg_metadata(msg_id).is_none()); + for row_id in &row_ids { + assert!(store.get_msg_metadata(row_id).is_none()); } - let msg_id_chunks = store.gc( - GarbageCollectionTarget::DropAtLeastPercentage(1.0), - Timeline::new("frame_nr", TimeType::Sequence), - MsgId::name(), - ); + let (row_ids, stats_diff) = store.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0)); + assert!(row_ids.is_empty()); + assert_eq!(DataStoreStats::default(), stats_diff); - let msg_ids = msg_id_chunks - .iter() - .flat_map(|chunk| arrow_array_deserialize_iterator::>(&**chunk).unwrap()) - .map(Option::unwrap) // MsgId is always present - .collect::>(); - assert!(msg_ids.is_empty()); - - if let err @ Err(_) = store.sanity_check() { - store.sort_indices_if_needed(); - eprintln!("{store}"); - err.unwrap(); - } + sanity_unwrap(&mut store); check_still_readable(&store); - - assert_eq!(2, store.total_temporal_component_rows()); } fn check_still_readable(_store: &DataStore) { diff --git a/crates/re_arrow_store/tests/data_store.rs b/crates/re_arrow_store/tests/data_store.rs index d6a0ac79ffea..5ba2a722bf80 100644 --- a/crates/re_arrow_store/tests/data_store.rs +++ b/crates/re_arrow_store/tests/data_store.rs @@ -11,8 +11,8 @@ use polars_core::{prelude::*, series::Series}; use polars_ops::prelude::DataFrameJoinOps; use rand::Rng; use re_arrow_store::{ - polars_util, test_row, DataStore, DataStoreConfig, GarbageCollectionTarget, LatestAtQuery, - RangeQuery, TimeInt, TimeRange, + polars_util, test_row, test_util::sanity_unwrap, DataStore, DataStoreConfig, DataStoreStats, + GarbageCollectionTarget, LatestAtQuery, RangeQuery, TimeInt, TimeRange, }; use re_log_types::{ component_types::{ColorRGBA, InstanceKey, Point2D, Rect2D}, @@ -20,8 +20,8 @@ use re_log_types::{ build_frame_nr, build_some_colors, build_some_instances, build_some_instances_from, build_some_point2d, build_some_rects, }, - external::arrow2_convert::deserialize::arrow_array_deserialize_iterator, - Component as _, ComponentName, DataCell, DataRow, EntityPath, MsgId, TimeType, Timeline, + Component as _, ComponentName, DataCell, DataRow, DataTable, EntityPath, TableId, TimeType, + Timeline, }; // TODO(#1619): introduce batching in the testing matrix @@ -42,6 +42,19 @@ fn all_components() { let assert_latest_components_at = |store: &mut DataStore, ent_path: &EntityPath, expected: Option<&[ComponentName]>| { + // Stress test save-to-disk & load-from-disk + let mut store2 = DataStore::new(store.cluster_key(), store.config().clone()); + for table in store.to_data_tables(None) { + store2.insert_table(&table).unwrap(); + } + + // Stress test GC + store2.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0)); + for table in store.to_data_tables(None) { + store2.insert_table(&table).unwrap(); + } + + let mut store = store2; let timeline = Timeline::new("frame_nr", TimeType::Sequence); let components = store.all_components(&timeline, ent_path); @@ -69,30 +82,23 @@ fn all_components() { let mut store = DataStore::new( InstanceKey::name(), DataStoreConfig { - component_bucket_nb_rows: u64::MAX, - index_bucket_nb_rows: u64::MAX, + indexed_bucket_num_rows: u64::MAX, ..Default::default() }, ); let cluster_key = store.cluster_key(); let components_a = &[ - ColorRGBA::name(), // added by us, timeless - Rect2D::name(), // added by us + ColorRGBA::name(), // added by test, timeless + Rect2D::name(), // added by test cluster_key, // always here - MsgId::name(), // automatically appended by DataRow - #[cfg(debug_assertions)] - DataStore::insert_id_key(), // automatically added in debug ]; let components_b = &[ - ColorRGBA::name(), // added by us, timeless - Point2D::name(), // added by us - Rect2D::name(), // added by us + ColorRGBA::name(), // added by test, timeless + Point2D::name(), // added by test + Rect2D::name(), // added by test cluster_key, // always here - MsgId::name(), // automatically appended by DataRow - #[cfg(debug_assertions)] - DataStore::insert_id_key(), // automatically added in debug ]; let row = test_row!(ent_path @ [] => 2; [build_some_colors(2)]); @@ -110,11 +116,7 @@ fn all_components() { assert_latest_components_at(&mut store, &ent_path, Some(components_b)); - if let err @ Err(_) = store.sanity_check() { - store.sort_indices_if_needed(); - eprintln!("{store}"); - err.unwrap(); - } + sanity_unwrap(&mut store); } // Tiny buckets, demonstrating the harder-to-reason-about cases. @@ -122,20 +124,19 @@ fn all_components() { let mut store = DataStore::new( InstanceKey::name(), DataStoreConfig { - component_bucket_nb_rows: 0, - index_bucket_nb_rows: 0, + indexed_bucket_num_rows: 0, ..Default::default() }, ); let cluster_key = store.cluster_key(); // β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” - // β”‚ frame_nr ┆ rect2d ┆ msg_id ┆ insert_id ┆ instance β”‚ + // β”‚ frame_nr ┆ rect2d ┆ row_id ┆ insert_id ┆ instance β”‚ // β•žβ•β•β•β•β•β•β•β•β•β•β•ͺ════════β•ͺ════════β•ͺ═══════════β•ͺ══════════║ // β”‚ 1 ┆ 1 ┆ 1 ┆ 1 ┆ 1 β”‚ // β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ // β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” - // β”‚ frame_nr ┆ rect2d ┆ point2d ┆ msg_id ┆ insert_id ┆ instance β”‚ + // β”‚ frame_nr ┆ rect2d ┆ point2d ┆ row_id ┆ insert_id ┆ instance β”‚ // β•žβ•β•β•β•β•β•β•β•β•β•β•ͺ════════β•ͺ═════════β•ͺ════════β•ͺ═══════════β•ͺ══════════║ // β”‚ 2 ┆ - ┆ - ┆ 2 ┆ 2 ┆ 2 β”‚ // β”œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”€ @@ -143,22 +144,16 @@ fn all_components() { // β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ let components_a = &[ - ColorRGBA::name(), // added by us, timeless - Rect2D::name(), // added by us + ColorRGBA::name(), // added by test, timeless + Rect2D::name(), // added by test cluster_key, // always here - MsgId::name(), // automatically appended by DataRow - #[cfg(debug_assertions)] - DataStore::insert_id_key(), // automatically added in debug ]; let components_b = &[ - ColorRGBA::name(), // added by us, timeless + ColorRGBA::name(), // added by test, timeless Rect2D::name(), // ⚠ inherited before the buckets got split apart! - Point2D::name(), // added by us + Point2D::name(), // added by test cluster_key, // always here - MsgId::name(), // automatically appended by DataRow - #[cfg(debug_assertions)] - DataStore::insert_id_key(), // automatically added in debug ]; let row = test_row!(ent_path @ [] => 2; [build_some_colors(2)]); @@ -179,11 +174,7 @@ fn all_components() { assert_latest_components_at(&mut store, &ent_path, Some(components_b)); - if let err @ Err(_) = store.sanity_check() { - store.sort_indices_if_needed(); - eprintln!("{store}"); - err.unwrap(); - } + sanity_unwrap(&mut store); } // Tiny buckets and tricky splits, demonstrating a case that is not only extremely hard to @@ -192,22 +183,21 @@ fn all_components() { let mut store = DataStore::new( InstanceKey::name(), DataStoreConfig { - component_bucket_nb_rows: 0, - index_bucket_nb_rows: 0, + indexed_bucket_num_rows: 0, ..Default::default() }, ); let cluster_key = store.cluster_key(); // β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” - // β”‚ frame_nr ┆ rect2d ┆ point2d ┆ msg_id ┆ insert_id ┆ instance β”‚ + // β”‚ frame_nr ┆ rect2d ┆ point2d ┆ row_id ┆ insert_id ┆ instance β”‚ // β•žβ•β•β•β•β•β•β•β•β•β•β•ͺ════════β•ͺ═════════β•ͺ════════β•ͺ═══════════β•ͺ══════════║ // β”‚ 1 ┆ - ┆ 1 ┆ 4 ┆ 4 ┆ 1 β”‚ // β”œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”€ // β”‚ 2 ┆ 1 ┆ - ┆ 1 ┆ 1 ┆ 1 β”‚ // β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ // β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” - // β”‚ frame_nr ┆ rect2d ┆ msg_id ┆ insert_id ┆ instance β”‚ + // β”‚ frame_nr ┆ rect2d ┆ row_id ┆ insert_id ┆ instance β”‚ // β•žβ•β•β•β•β•β•β•β•β•β•β•ͺ════════β•ͺ════════β•ͺ═══════════β•ͺ══════════║ // β”‚ 3 ┆ 2 ┆ 2 ┆ 2 ┆ 1 β”‚ // β”œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”€ @@ -215,22 +205,16 @@ fn all_components() { // β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ let components_a = &[ - ColorRGBA::name(), // added by us, timeless - Rect2D::name(), // added by us + ColorRGBA::name(), // added by test, timeless + Rect2D::name(), // added by test cluster_key, // always here - MsgId::name(), // automatically appended by DataRow - #[cfg(debug_assertions)] - DataStore::insert_id_key(), // automatically added in debug ]; let components_b = &[ - ColorRGBA::name(), // added by us, timeless - Point2D::name(), // added by us but not contained in the second bucket - Rect2D::name(), // added by use + ColorRGBA::name(), // added by test, timeless + Point2D::name(), // added by test but not contained in the second bucket + Rect2D::name(), // added by test cluster_key, // always here - MsgId::name(), // automatically appended by DataRow - #[cfg(debug_assertions)] - DataStore::insert_id_key(), // automatically added in debug ]; let row = test_row!(ent_path @ [] => 2; [build_some_colors(2)]); @@ -256,11 +240,7 @@ fn all_components() { assert_latest_components_at(&mut store, &ent_path, Some(components_b)); - if let err @ Err(_) = store.sanity_check() { - store.sort_indices_if_needed(); - eprintln!("{store}"); - err.unwrap(); - } + sanity_unwrap(&mut store); } } @@ -273,12 +253,6 @@ fn latest_at() { for config in re_arrow_store::test_util::all_configs() { let mut store = DataStore::new(InstanceKey::name(), config.clone()); latest_at_impl(&mut store); - store.gc( - GarbageCollectionTarget::DropAtLeastPercentage(1.0), - Timeline::new("frame_nr", TimeType::Sequence), - MsgId::name(), - ); - latest_at_impl(&mut store); } } @@ -293,45 +267,57 @@ fn latest_at_impl(store: &mut DataStore) { let frame3: TimeInt = 3.into(); let frame4: TimeInt = 4.into(); - // helper to insert a row both as a temporal and timeless payload - let insert = |store: &mut DataStore, row| { + // helper to insert a table both as a temporal and timeless payload + let insert_table = |store: &mut DataStore, table: &DataTable| { // insert temporal - store.insert_row(row).unwrap(); + store.insert_table(table).unwrap(); // insert timeless - let mut row_timeless = (*row).clone(); - row_timeless.timepoint = Default::default(); - store.insert_row(&row_timeless).unwrap(); + let mut table_timeless = table.clone(); + table_timeless.col_timelines = Default::default(); + store.insert_table(&table_timeless).unwrap(); }; let (instances1, colors1) = (build_some_instances(3), build_some_colors(3)); let row1 = test_row!(ent_path @ [build_frame_nr(frame1)] => 3; [instances1.clone(), colors1]); - insert(store, &row1); let points2 = build_some_point2d(3); let row2 = test_row!(ent_path @ [build_frame_nr(frame2)] => 3; [instances1, points2]); - insert(store, &row2); let points3 = build_some_point2d(10); let row3 = test_row!(ent_path @ [build_frame_nr(frame3)] => 10; [points3]); - insert(store, &row3); let colors4 = build_some_colors(5); let row4 = test_row!(ent_path @ [build_frame_nr(frame4)] => 5; [colors4]); - insert(store, &row4); - if let err @ Err(_) = store.sanity_check() { - store.sort_indices_if_needed(); - eprintln!("{store}"); - err.unwrap(); + insert_table( + store, + &DataTable::from_rows( + TableId::random(), + [row1.clone(), row2.clone(), row3.clone(), row4.clone()], + ), + ); + + // Stress test save-to-disk & load-from-disk + let mut store2 = DataStore::new(store.cluster_key(), store.config().clone()); + for table in store.to_data_tables(None) { + store2.insert_table(&table).unwrap(); } + // Stress test GC + store2.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0)); + for table in store.to_data_tables(None) { + store2.insert_table(&table).unwrap(); + } + let mut store = store2; + + sanity_unwrap(&mut store); let mut assert_latest_components = |frame_nr: TimeInt, rows: &[(ComponentName, &DataRow)]| { let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); let components_all = &[ColorRGBA::name(), Point2D::name()]; let df = polars_util::latest_components( - store, + &store, &LatestAtQuery::new(timeline_frame_nr, frame_nr), &ent_path, components_all, @@ -443,21 +429,29 @@ fn range_impl(store: &mut DataStore) { let row4_4 = test_row!(ent_path @ [build_frame_nr(frame4)] => 5; [insts4_3, points4_4]); insert(store, &row4_4); - if let err @ Err(_) = store.sanity_check() { - store.sort_indices_if_needed(); - eprintln!("{store}"); - err.unwrap(); - } + sanity_unwrap(store); // Each entry in `rows_at_times` corresponds to a dataframe that's expected to be returned // by the range query. // A single timepoint might have several of those! That's one of the behaviors specific to // range queries. #[allow(clippy::type_complexity)] - let mut assert_range_components = + let assert_range_components = |time_range: TimeRange, components: [ComponentName; 2], rows_at_times: &[(Option, &[(ComponentName, &DataRow)])]| { + // Stress test save-to-disk & load-from-disk + let mut store2 = DataStore::new(store.cluster_key(), store.config().clone()); + for table in store.to_data_tables(None) { + store2.insert_table(&table).unwrap(); + } + store2.wipe_timeless_data(); + store2.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0)); + for table in store.to_data_tables(None) { + store2.insert_table(&table).unwrap(); + } + let mut store = store2; + let mut expected_timeless = Vec::::new(); let mut expected_at_times: IntMap> = Default::default(); @@ -477,7 +471,7 @@ fn range_impl(store: &mut DataStore) { let components = [InstanceKey::name(), components[0], components[1]]; let query = RangeQuery::new(timeline_frame_nr, time_range); let dfs = polars_util::range_components( - store, + &store, &query, &ent_path, components[1], @@ -810,14 +804,14 @@ fn joint_df(cluster_key: ComponentName, rows: &[(ComponentName, &DataRow)]) -> D .iter() .map(|(component, row)| { let cluster_comp = if let Some(idx) = row.find_cell(&cluster_key) { - Series::try_from((cluster_key.as_str(), row.cells[idx].as_arrow_monolist())) + Series::try_from((cluster_key.as_str(), row.cells[idx].to_arrow_monolist())) .unwrap() } else { let num_instances = row.num_instances(); Series::try_from(( cluster_key.as_str(), DataCell::from_component::(0..num_instances as u64) - .as_arrow_monolist(), + .to_arrow_monolist(), )) .unwrap() }; @@ -825,7 +819,7 @@ fn joint_df(cluster_key: ComponentName, rows: &[(ComponentName, &DataRow)]) -> D let comp_idx = row.find_cell(component).unwrap(); let df = DataFrame::new(vec![ cluster_comp, - Series::try_from((component.as_str(), row.cells[comp_idx].as_arrow_monolist())) + Series::try_from((component.as_str(), row.cells[comp_idx].to_arrow_monolist())) .unwrap(), ]) .unwrap(); @@ -876,34 +870,32 @@ fn gc_impl(store: &mut DataStore) { } } - if let err @ Err(_) = store.sanity_check() { - store.sort_indices_if_needed(); - eprintln!("{store}"); - err.unwrap(); - } + sanity_unwrap(store); _ = store.to_dataframe(); // simple way of checking that everything is still readable - let msg_id_chunks = store.gc( - GarbageCollectionTarget::DropAtLeastPercentage(1.0 / 3.0), - Timeline::new("frame_nr", TimeType::Sequence), - MsgId::name(), - ); + let stats = DataStoreStats::from_store(store); - let msg_ids = msg_id_chunks - .iter() - .flat_map(|chunk| arrow_array_deserialize_iterator::>(&**chunk).unwrap()) - .map(Option::unwrap) // MsgId is always present - .collect::>(); - - for msg_id in &msg_ids { - assert!(store.get_msg_metadata(msg_id).is_some()); + let (row_ids, stats_diff) = + store.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0 / 3.0)); + for row_id in &row_ids { + assert!(store.get_msg_metadata(row_id).is_none()); } - store.clear_msg_metadata(&msg_ids); - - for msg_id in &msg_ids { - assert!(store.get_msg_metadata(msg_id).is_none()); - } + // NOTE: only temporal data and row metadata get purged! + let num_bytes_dropped = + (stats_diff.temporal.num_bytes + stats_diff.metadata_registry.num_bytes) as f64; + let num_bytes_dropped_expected_min = + (stats.temporal.num_bytes + stats.metadata_registry.num_bytes) as f64 * 0.95 / 3.0; + let num_bytes_dropped_expected_max = + (stats.temporal.num_bytes + stats.metadata_registry.num_bytes) as f64 * 1.05 / 3.0; + assert!( + num_bytes_dropped_expected_min <= num_bytes_dropped + && num_bytes_dropped <= num_bytes_dropped_expected_max, + "{} <= {} <= {}", + re_format::format_bytes(num_bytes_dropped_expected_min), + re_format::format_bytes(num_bytes_dropped), + re_format::format_bytes(num_bytes_dropped_expected_max), + ); } } diff --git a/crates/re_arrow_store/tests/dump.rs b/crates/re_arrow_store/tests/dump.rs new file mode 100644 index 000000000000..5a18c4d962e9 --- /dev/null +++ b/crates/re_arrow_store/tests/dump.rs @@ -0,0 +1,238 @@ +//! Dumping a datastore to log messages and back. + +use std::sync::atomic::{AtomicBool, Ordering}; + +use itertools::Itertools; +use re_arrow_store::{ + test_row, test_util::sanity_unwrap, DataStore, DataStoreStats, GarbageCollectionTarget, + TimeInt, TimeRange, Timeline, +}; +use re_log_types::{ + component_types::InstanceKey, + datagen::{ + build_frame_nr, build_log_time, build_some_colors, build_some_instances, build_some_point2d, + }, + Component as _, DataTable, EntityPath, TableId, +}; + +// --- Dump --- + +#[test] +fn data_store_dump() { + init_logs(); + + for mut config in re_arrow_store::test_util::all_configs() { + // NOTE: insert IDs aren't serialized and can be different across runs. + config.store_insert_ids = false; + + let mut store1 = DataStore::new(InstanceKey::name(), config.clone()); + let mut store2 = DataStore::new(InstanceKey::name(), config.clone()); + let mut store3 = DataStore::new(InstanceKey::name(), config.clone()); + + data_store_dump_impl(&mut store1, &mut store2, &mut store3); + + // stress-test GC impl + store1.wipe_timeless_data(); + store1.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0)); + store2.wipe_timeless_data(); + store2.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0)); + store3.wipe_timeless_data(); + store3.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0)); + + data_store_dump_impl(&mut store1, &mut store2, &mut store3); + } +} + +fn data_store_dump_impl(store1: &mut DataStore, store2: &mut DataStore, store3: &mut DataStore) { + // helper to insert a table both as a temporal and timeless payload + let insert_table = |store: &mut DataStore, table: &DataTable| { + // insert temporal + store.insert_table(table).unwrap(); + + // insert timeless + let mut table_timeless = table.clone(); + table_timeless.col_timelines = Default::default(); + store.insert_table(&table_timeless).unwrap(); + }; + + let ent_paths = ["this/that", "other", "yet/another/one"]; + let tables = ent_paths + .iter() + .map(|ent_path| create_insert_table(*ent_path)) + .collect_vec(); + + // Fill the first store. + for table in &tables { + insert_table(store1, table); + } + sanity_unwrap(store1); + + // Dump the first store into the second one. + for table in store1.to_data_tables(None) { + store2.insert_table(&table).unwrap(); + } + sanity_unwrap(store2); + + // Dump the second store into the third one. + for table in store2.to_data_tables(None) { + store3.insert_table(&table).unwrap(); + } + sanity_unwrap(store3); + + let store1_df = store1.to_dataframe(); + let store2_df = store2.to_dataframe(); + let store3_df = store3.to_dataframe(); + assert!( + store1_df == store2_df, + "First & second stores differ:\n{store1_df}\n{store2_df}" + ); + assert!( + store1_df == store3_df, + "First & third stores differ:\n{store1_df}\n{store3_df}" + ); + + let store1_stats = DataStoreStats::from_store(store1); + let store2_stats = DataStoreStats::from_store(store2); + let store3_stats = DataStoreStats::from_store(store3); + assert!( + store1_stats.temporal.num_bytes <= store2_stats.temporal.num_bytes + && store1_stats.timeless.num_bytes <= store2_stats.timeless.num_bytes, + "First store should have <= amount of data of second store:\n\ + {store1_stats:#?}\n{store2_stats:#?}" + ); + assert!( + store2_stats.temporal.num_bytes <= store3_stats.temporal.num_bytes + && store2_stats.timeless.num_bytes <= store3_stats.timeless.num_bytes, + "Second store should have <= amount of data of third store:\n\ + {store2_stats:#?}\n{store3_stats:#?}" + ); +} + +// --- Time-based filtering --- + +#[test] +fn data_store_dump_filtered() { + init_logs(); + + for mut config in re_arrow_store::test_util::all_configs() { + // NOTE: insert IDs aren't serialized and can be different across runs. + config.store_insert_ids = false; + + let mut store1 = DataStore::new(InstanceKey::name(), config.clone()); + let mut store2 = DataStore::new(InstanceKey::name(), config.clone()); + + data_store_dump_filtered_impl(&mut store1, &mut store2); + + // stress-test GC impl + store1.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0)); + store2.gc(GarbageCollectionTarget::DropAtLeastFraction(1.0)); + + data_store_dump_filtered_impl(&mut store1, &mut store2); + } +} + +fn data_store_dump_filtered_impl(store1: &mut DataStore, store2: &mut DataStore) { + let timeline_frame_nr = Timeline::new_sequence("frame_nr"); + let timeline_log_time = Timeline::new_temporal("log_time"); + let frame1: TimeInt = 1.into(); + let frame2: TimeInt = 2.into(); + let frame3: TimeInt = 3.into(); + let frame4: TimeInt = 4.into(); + + let ent_paths = ["this/that", "other", "yet/another/one"]; + let tables = ent_paths + .iter() + .map(|ent_path| create_insert_table(*ent_path)) + .collect_vec(); + + // Fill the first store. + for table in &tables { + store1.insert_table(table).unwrap(); + } + sanity_unwrap(store1); + + // Dump frame1 from the first store into the second one. + for table in store1.to_data_tables((timeline_frame_nr, TimeRange::new(frame1, frame1)).into()) { + store2.insert_table(&table).unwrap(); + } + // Dump frame2 from the first store into the second one. + for table in store1.to_data_tables((timeline_frame_nr, TimeRange::new(frame2, frame2)).into()) { + store2.insert_table(&table).unwrap(); + } + // Dump frame3 from the first store into the second one. + for table in store1.to_data_tables((timeline_frame_nr, TimeRange::new(frame3, frame3)).into()) { + store2.insert_table(&table).unwrap(); + } + // Dump the other frame3 from the first store into the second one. + for table in store1.to_data_tables((timeline_log_time, TimeRange::new(frame3, frame3)).into()) { + store2.insert_table(&table).unwrap(); + } + // Dump frame4 from the first store into the second one. + for table in store1.to_data_tables((timeline_frame_nr, TimeRange::new(frame4, frame4)).into()) { + store2.insert_table(&table).unwrap(); + } + sanity_unwrap(store2); + + let store1_df = store1.to_dataframe(); + let store2_df = store2.to_dataframe(); + assert!( + store1_df == store2_df, + "First & second stores differ:\n{store1_df}\n{store2_df}" + ); + + let store1_stats = DataStoreStats::from_store(store1); + let store2_stats = DataStoreStats::from_store(store2); + assert!( + store1_stats.temporal.num_bytes <= store2_stats.temporal.num_bytes + && store1_stats.timeless.num_bytes <= store2_stats.timeless.num_bytes, + "First store should have <= amount of data of second store:\n\ + {store1_stats:#?}\n{store2_stats:#?}" + ); +} + +// --- + +pub fn init_logs() { + static INIT: AtomicBool = AtomicBool::new(false); + + if INIT + .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) + .is_ok() + { + re_log::setup_native_logging(); + } +} + +fn create_insert_table(ent_path: impl Into) -> DataTable { + let ent_path = ent_path.into(); + + let frame1: TimeInt = 1.into(); + let frame2: TimeInt = 2.into(); + let frame3: TimeInt = 3.into(); + let frame4: TimeInt = 4.into(); + + let (instances1, colors1) = (build_some_instances(3), build_some_colors(3)); + let row1 = test_row!(ent_path @ [ + build_frame_nr(frame1), + ] => 3; [instances1.clone(), colors1]); + + let points2 = build_some_point2d(3); + let row2 = test_row!(ent_path @ [ + build_frame_nr(frame2), + ] => 3; [instances1, points2]); + + let points3 = build_some_point2d(10); + let row3 = test_row!(ent_path @ [ + build_log_time(frame3.into()) /* ! */, build_frame_nr(frame3), + ] => 10; [points3]); + + let colors4 = build_some_colors(5); + let row4 = test_row!(ent_path @ [ + build_frame_nr(frame4), + ] => 5; [colors4]); + + let mut table = DataTable::from_rows(TableId::random(), [row1, row2, row3, row4]); + table.compute_all_size_bytes(); + + table +} diff --git a/crates/re_arrow_store/tests/internals.rs b/crates/re_arrow_store/tests/internals.rs index d666e40b5675..63888bf65b21 100644 --- a/crates/re_arrow_store/tests/internals.rs +++ b/crates/re_arrow_store/tests/internals.rs @@ -8,7 +8,7 @@ use re_arrow_store::{DataStore, DataStoreConfig}; use re_log_types::{ component_types::InstanceKey, datagen::{build_frame_nr, build_some_instances}, - Component as _, DataRow, EntityPath, MsgId, TimePoint, + Component as _, DataRow, EntityPath, RowId, TimePoint, }; // --- Internals --- @@ -29,14 +29,14 @@ fn pathological_bucket_topology() { let mut store_forward = DataStore::new( InstanceKey::name(), DataStoreConfig { - index_bucket_nb_rows: 10, + indexed_bucket_num_rows: 10, ..Default::default() }, ); let mut store_backward = DataStore::new( InstanceKey::name(), DataStoreConfig { - index_bucket_nb_rows: 10, + indexed_bucket_num_rows: 10, ..Default::default() }, ); @@ -53,7 +53,7 @@ fn pathological_bucket_topology() { let timepoint = TimePoint::from([build_frame_nr(frame_nr.into())]); for _ in 0..num { let row = DataRow::from_cells1( - MsgId::ZERO, + RowId::random(), ent_path.clone(), timepoint.clone(), num_instances, @@ -62,7 +62,7 @@ fn pathological_bucket_topology() { store_forward.insert_row(&row).unwrap(); let row = DataRow::from_cells1( - MsgId::ZERO, + RowId::random(), ent_path.clone(), timepoint.clone(), num_instances, @@ -84,7 +84,7 @@ fn pathological_bucket_topology() { .map(|frame_nr| { let timepoint = TimePoint::from([build_frame_nr(frame_nr.into())]); DataRow::from_cells1( - MsgId::ZERO, + RowId::random(), ent_path.clone(), timepoint, num_instances, @@ -112,7 +112,7 @@ fn pathological_bucket_topology() { { let num_buckets = store_forward .iter_indices() - .flat_map(|(_, table)| table.iter_buckets()) + .flat_map(|(_, table)| table.buckets.values()) .count(); assert_eq!( 7usize, @@ -127,7 +127,7 @@ fn pathological_bucket_topology() { { let num_buckets = store_backward .iter_indices() - .flat_map(|(_, table)| table.iter_buckets()) + .flat_map(|(_, table)| table.buckets.values()) .count(); assert_eq!( 8usize, diff --git a/crates/re_data_store/Cargo.toml b/crates/re_data_store/Cargo.toml index a77bdcb40096..607efc62ef3b 100644 --- a/crates/re_data_store/Cargo.toml +++ b/crates/re_data_store/Cargo.toml @@ -25,11 +25,12 @@ serde = ["dep:serde", "re_log_types/serde"] [dependencies] re_arrow_store.workspace = true +re_format.workspace = true re_int_histogram.workspace = true -re_log_types.workspace = true re_log.workspace = true +re_log_encoding = { workspace = true, optional = true } +re_log_types.workspace = true re_smart_channel.workspace = true -re_string_interner.workspace = true ahash.workspace = true document-features = "0.2" @@ -47,7 +48,7 @@ puffin.workspace = true criterion = "0.4" mimalloc.workspace = true rand = "0.8" -re_log_types = { workspace = true, features = ["load", "save"] } +re_log_encoding = { workspace = true, features = ["decoder", "encoder"] } [lib] bench = false @@ -55,4 +56,4 @@ bench = false [[example]] name = "memory_usage" path = "examples/memory_usage.rs" -required-features = ["re_log_types/load", "re_log_types/save"] +required-features = ["re_log_encoding/decoder", "re_log_encoding/encoder"] diff --git a/crates/re_data_store/examples/memory_usage.rs b/crates/re_data_store/examples/memory_usage.rs index 105d7e1a0014..3e62a48367bb 100644 --- a/crates/re_data_store/examples/memory_usage.rs +++ b/crates/re_data_store/examples/memory_usage.rs @@ -48,7 +48,7 @@ fn live_bytes() -> usize { // ---------------------------------------------------------------------------- -use re_log_types::{entity_path, DataRow, MsgId}; +use re_log_types::{entity_path, DataRow, RecordingId, RowId}; fn main() { log_messages(); @@ -57,7 +57,7 @@ fn main() { fn log_messages() { use re_log_types::{ datagen::{build_frame_nr, build_some_point2d}, - ArrowMsg, LogMsg, TimeInt, TimePoint, Timeline, + LogMsg, TimeInt, TimePoint, Timeline, }; // Note: we use Box in this function so that we also count the "static" @@ -65,12 +65,12 @@ fn log_messages() { fn encode_log_msg(log_msg: &LogMsg) -> Vec { let mut bytes = vec![]; - re_log_types::encoding::encode(std::iter::once(log_msg), &mut bytes).unwrap(); + re_log_encoding::encoder::encode(std::iter::once(log_msg), &mut bytes).unwrap(); bytes } fn decode_log_msg(mut bytes: &[u8]) -> LogMsg { - let mut messages = re_log_types::encoding::Decoder::new(&mut bytes) + let mut messages = re_log_encoding::decoder::Decoder::new(&mut bytes) .unwrap() .collect::, _>>() .unwrap(); @@ -91,6 +91,7 @@ fn log_messages() { const NUM_POINTS: usize = 1_000; + let recording_id = RecordingId::random(); let timeline = Timeline::new_sequence("frame_nr"); let mut time_point = TimePoint::default(); time_point.insert(timeline, TimeInt::from(0)); @@ -107,7 +108,7 @@ fn log_messages() { let used_bytes_start = live_bytes(); let table = Box::new( DataRow::from_cells1( - MsgId::random(), + RowId::random(), entity_path!("points"), [build_frame_nr(0.into())], 1, @@ -116,7 +117,10 @@ fn log_messages() { .into_table(), ); let table_bytes = live_bytes() - used_bytes_start; - let log_msg = Box::new(LogMsg::ArrowMsg(ArrowMsg::try_from(&*table).unwrap())); + let log_msg = Box::new(LogMsg::ArrowMsg( + recording_id, + table.to_arrow_msg().unwrap(), + )); let log_msg_bytes = live_bytes() - used_bytes_start; println!("Arrow payload containing a Pos2 uses {table_bytes} bytes in RAM"); let encoded = encode_log_msg(&log_msg); @@ -130,7 +134,7 @@ fn log_messages() { let used_bytes_start = live_bytes(); let table = Box::new( DataRow::from_cells1( - MsgId::random(), + RowId::random(), entity_path!("points"), [build_frame_nr(0.into())], NUM_POINTS as _, @@ -139,7 +143,10 @@ fn log_messages() { .into_table(), ); let table_bytes = live_bytes() - used_bytes_start; - let log_msg = Box::new(LogMsg::ArrowMsg(ArrowMsg::try_from(&*table).unwrap())); + let log_msg = Box::new(LogMsg::ArrowMsg( + recording_id, + table.to_arrow_msg().unwrap(), + )); let log_msg_bytes = live_bytes() - used_bytes_start; println!("Arrow payload containing a Pos2 uses {table_bytes} bytes in RAM"); let encoded = encode_log_msg(&log_msg); diff --git a/crates/re_data_store/src/entity_properties.rs b/crates/re_data_store/src/entity_properties.rs index 0b69cd442893..9929ca4c13c3 100644 --- a/crates/re_data_store/src/entity_properties.rs +++ b/crates/re_data_store/src/entity_properties.rs @@ -1,8 +1,5 @@ use re_arrow_store::LatestAtQuery; -use re_log_types::{ - external::arrow2_convert::deserialize::arrow_array_deserialize_iterator, - DeserializableComponent, EntityPath, -}; +use re_log_types::{DeserializableComponent, EntityPath}; use crate::log_db::EntityDb; @@ -71,6 +68,22 @@ pub struct EntityProperties { pub backproject_radius_scale: EditableAutoValue, } +#[cfg(feature = "serde")] +impl Default for EntityProperties { + fn default() -> Self { + Self { + visible: true, + visible_history: ExtraQueryHistory::default(), + interactive: true, + color_mapper: EditableAutoValue::default(), + pinhole_image_plane_distance: EditableAutoValue::default(), + backproject_depth: EditableAutoValue::Auto(true), + depth_from_world_scale: EditableAutoValue::default(), + backproject_radius_scale: EditableAutoValue::Auto(1.0), + } + } +} + #[cfg(feature = "serde")] impl EntityProperties { /// Multiply/and these together. @@ -100,22 +113,6 @@ impl EntityProperties { } } -#[cfg(feature = "serde")] -impl Default for EntityProperties { - fn default() -> Self { - Self { - visible: true, - visible_history: ExtraQueryHistory::default(), - interactive: true, - color_mapper: EditableAutoValue::default(), - pinhole_image_plane_distance: EditableAutoValue::default(), - backproject_depth: EditableAutoValue::Auto(true), - depth_from_world_scale: EditableAutoValue::default(), - backproject_radius_scale: EditableAutoValue::Auto(1.0), - } - } -} - // ---------------------------------------------------------------------------- /// When showing an entity in the history view, add this much history to it. @@ -143,25 +140,27 @@ impl ExtraQueryHistory { #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -pub enum ColorMap { +pub enum Colormap { + /// Perceptually even Grayscale, + + Inferno, + Magma, + Plasma, #[default] Turbo, Viridis, - Plasma, - Magma, - Inferno, } -impl std::fmt::Display for ColorMap { +impl std::fmt::Display for Colormap { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(match self { - ColorMap::Grayscale => "Grayscale", - ColorMap::Turbo => "Turbo", - ColorMap::Viridis => "Viridis", - ColorMap::Plasma => "Plasma", - ColorMap::Magma => "Magma", - ColorMap::Inferno => "Inferno", + Colormap::Grayscale => "Grayscale", + Colormap::Inferno => "Inferno", + Colormap::Magma => "Magma", + Colormap::Plasma => "Plasma", + Colormap::Turbo => "Turbo", + Colormap::Viridis => "Viridis", }) } } @@ -170,7 +169,7 @@ impl std::fmt::Display for ColorMap { #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub enum ColorMapper { /// Use a well-known color map, pre-implemented as a wgsl module. - ColorMap(ColorMap), + Colormap(Colormap), // TODO(cmc): support textures. // TODO(cmc): support custom transfer functions. } @@ -178,7 +177,7 @@ pub enum ColorMapper { impl std::fmt::Display for ColorMapper { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - ColorMapper::ColorMap(colormap) => colormap.fmt(f), + ColorMapper::Colormap(colormap) => colormap.fmt(f), } } } @@ -186,7 +185,7 @@ impl std::fmt::Display for ColorMapper { impl Default for ColorMapper { #[inline] fn default() -> Self { - Self::ColorMap(ColorMap::default()) + Self::Colormap(Colormap::default()) } } @@ -211,14 +210,10 @@ where // single components this is easy enough. let data_store = &entity_db.data_store; - let components = [C::name()]; - - let row_indices = data_store.latest_at(query, entity_path, C::name(), &components)?; - - let results = data_store.get(&components, &row_indices); - let arr = results.get(0)?.as_ref()?.as_ref(); + let (_, cells) = data_store.latest_at(query, entity_path, C::name(), &[C::name()])?; + let cell = cells.get(0)?.as_ref()?; - let mut iter = arrow_array_deserialize_iterator::(arr).ok()?; + let mut iter = cell.try_to_native::().ok()?; let component = iter.next(); diff --git a/crates/re_data_store/src/entity_tree.rs b/crates/re_data_store/src/entity_tree.rs index 20ef41769734..d3a702ee9653 100644 --- a/crates/re_data_store/src/entity_tree.rs +++ b/crates/re_data_store/src/entity_tree.rs @@ -2,7 +2,7 @@ use std::collections::{BTreeMap, BTreeSet}; use itertools::Itertools; use re_log_types::{ - ComponentName, ComponentPath, EntityPath, EntityPathPart, MsgId, PathOp, TimeInt, TimePoint, + ComponentName, ComponentPath, EntityPath, EntityPathPart, PathOp, RowId, TimeInt, TimePoint, Timeline, }; @@ -40,7 +40,6 @@ impl TimeHistogramPerTimeline { // ---------------------------------------------------------------------------- /// Number of messages per time per timeline -#[derive(Default)] pub struct TimesPerTimeline(BTreeMap>); impl TimesPerTimeline { @@ -79,6 +78,13 @@ impl TimesPerTimeline { } } +// Always ensure we have a default "log_time" timeline. +impl Default for TimesPerTimeline { + fn default() -> Self { + Self(BTreeMap::from([(Timeline::log_time(), Default::default())])) + } +} + // ---------------------------------------------------------------------------- /// Tree of entity paths, plus components at the leaves. @@ -97,10 +103,10 @@ pub struct EntityTree { num_timeless_messages: usize, /// Book-keeping around whether we should clear fields when data is added - pub nonrecursive_clears: BTreeMap, + pub nonrecursive_clears: BTreeMap, /// Book-keeping around whether we should clear recursively when data is added - pub recursive_clears: BTreeMap, + pub recursive_clears: BTreeMap, /// Data logged at this entity path. pub components: BTreeMap, @@ -111,7 +117,7 @@ impl EntityTree { Self::new(EntityPath::root(), Default::default()) } - pub fn new(path: EntityPath, recursive_clears: BTreeMap) -> Self { + pub fn new(path: EntityPath, recursive_clears: BTreeMap) -> Self { Self { path, children: Default::default(), @@ -141,7 +147,7 @@ impl EntityTree { &mut self, time_point: &TimePoint, component_path: &ComponentPath, - ) -> Vec<(MsgId, TimePoint)> { + ) -> Vec<(RowId, TimePoint)> { crate::profile_function!(); let leaf = @@ -172,7 +178,7 @@ impl EntityTree { /// insertion. pub fn add_path_op( &mut self, - msg_id: MsgId, + row_id: RowId, time_point: &TimePoint, path_op: &PathOp, ) -> Vec { @@ -189,7 +195,7 @@ impl EntityTree { // Track that any future fields need a Null at the right // time-point when added. leaf.nonrecursive_clears - .entry(msg_id) + .entry(row_id) .or_insert_with(|| time_point.clone()); // For every existing field return a clear event @@ -209,13 +215,13 @@ impl EntityTree { // Track that any future children need a Null at the right // time-point when added. next.recursive_clears - .entry(msg_id) + .entry(row_id) .or_insert_with(|| time_point.clone()); // Track that any future fields need a Null at the right // time-point when added. next.nonrecursive_clears - .entry(msg_id) + .entry(row_id) .or_insert_with(|| time_point.clone()); // For every existing field append a clear event into the @@ -280,7 +286,7 @@ impl EntityTree { pub fn purge( &mut self, cutoff_times: &BTreeMap, - drop_msg_ids: &ahash::HashSet, + drop_row_ids: &ahash::HashSet, ) { let Self { path: _, @@ -300,11 +306,11 @@ impl EntityTree { } { crate::profile_scope!("nonrecursive_clears"); - nonrecursive_clears.retain(|msg_id, _| !drop_msg_ids.contains(msg_id)); + nonrecursive_clears.retain(|row_id, _| !drop_row_ids.contains(row_id)); } { crate::profile_scope!("recursive_clears"); - recursive_clears.retain(|msg_id, _| !drop_msg_ids.contains(msg_id)); + recursive_clears.retain(|row_id, _| !drop_row_ids.contains(row_id)); } { @@ -315,7 +321,7 @@ impl EntityTree { } for child in children.values_mut() { - child.purge(cutoff_times, drop_msg_ids); + child.purge(cutoff_times, drop_row_ids); } } diff --git a/crates/re_data_store/src/log_db.rs b/crates/re_data_store/src/log_db.rs index 50a5ce57a703..b520262aa4f9 100644 --- a/crates/re_data_store/src/log_db.rs +++ b/crates/re_data_store/src/log_db.rs @@ -1,12 +1,12 @@ +use std::collections::BTreeMap; + use nohash_hasher::IntMap; -use re_arrow_store::{DataStoreConfig, GarbageCollectionTarget, TimeInt}; +use re_arrow_store::{DataStoreConfig, TimeInt}; use re_log_types::{ - component_types::InstanceKey, - external::arrow2_convert::deserialize::arrow_array_deserialize_iterator, ArrowMsg, - BeginRecordingMsg, Component as _, ComponentPath, DataCell, DataRow, DataTable, EntityPath, - EntityPathHash, EntityPathOpMsg, LogMsg, MsgId, PathOp, RecordingId, RecordingInfo, TimePoint, - Timeline, + component_types::InstanceKey, ArrowMsg, BeginRecordingMsg, Component as _, ComponentPath, + DataCell, DataRow, DataTable, EntityPath, EntityPathHash, EntityPathOpMsg, LogMsg, PathOp, + RecordingId, RecordingInfo, RowId, TimePoint, Timeline, }; use crate::{Error, TimesPerTimeline}; @@ -36,29 +36,7 @@ impl Default for EntityDb { tree: crate::EntityTree::root(), data_store: re_arrow_store::DataStore::new( InstanceKey::name(), - DataStoreConfig { - // Garbage collection of the datastore is currently driven by the `MsgId` - // component column, as a workaround for the `MsgId` mismatch issue. - // - // Since this component is only a few bytes large, trying to trigger a GC - // based on bucket size is a lost cause, so make sure to have a small enough - // row limit. - // - // TODO(cmc): Reasses once the whole `MsgId` mismatch issue is resolved - // (probably once batching is implemented). - component_bucket_nb_rows: 128, - component_bucket_size_bytes: 10 * 1024 * 1024, // 10 MiB - // We do not garbage collect index buckets at the moment, and so the size of - // individual index buckets is irrelevant, only their total number of rows - // matter. - // See https://github.com/rerun-io/rerun/pull/1558 for details. - // - // TODO(cmc): Bring back index GC once the whole `MsgId` mismatch issue is - // resolved (probably once batching is implemented). - index_bucket_size_bytes: u64::MAX, - index_bucket_nb_rows: 2048, - ..Default::default() - }, + DataStoreConfig::default(), ), } } @@ -77,10 +55,14 @@ impl EntityDb { } fn try_add_arrow_msg(&mut self, msg: &ArrowMsg) -> Result<(), Error> { - let table: DataTable = msg.try_into()?; + crate::profile_function!(); + + // TODO(#1760): Compute the size of the datacells in the batching threads on the clients. + let mut table = DataTable::from_arrow_msg(msg)?; + table.compute_all_size_bytes(); // TODO(#1619): batch all of this - for row in table.as_rows() { + for row in table.to_rows() { self.try_add_data_row(&row)?; } @@ -97,19 +79,16 @@ impl EntityDb { for cell in row.cells().iter() { let component_path = ComponentPath::new(row.entity_path().clone(), cell.component_name()); - if cell.component_name() == MsgId::name() { - continue; - } let pending_clears = self.tree.add_data_msg(row.timepoint(), &component_path); - for (msg_id, time_point) in pending_clears { + for (row_id, time_point) in pending_clears { // Create and insert an empty component into the arrow store // TODO(jleibs): Faster empty-array creation let cell = DataCell::from_arrow_empty(cell.component_name(), cell.datatype().clone()); let row = DataRow::from_cells1( - msg_id, + row_id, row.entity_path.clone(), time_point.clone(), cell.num_instances(), @@ -125,20 +104,20 @@ impl EntityDb { self.data_store.insert_row(row).map_err(Into::into) } - fn add_path_op(&mut self, msg_id: MsgId, time_point: &TimePoint, path_op: &PathOp) { - let cleared_paths = self.tree.add_path_op(msg_id, time_point, path_op); + fn add_path_op(&mut self, row_id: RowId, time_point: &TimePoint, path_op: &PathOp) { + let cleared_paths = self.tree.add_path_op(row_id, time_point, path_op); for component_path in cleared_paths { if let Some(data_type) = self .data_store - .lookup_data_type(&component_path.component_name) + .lookup_datatype(&component_path.component_name) { // Create and insert an empty component into the arrow store // TODO(jleibs): Faster empty-array creation let cell = DataCell::from_arrow_empty(component_path.component_name, data_type.clone()); let row = DataRow::from_cells1( - msg_id, + row_id, component_path.entity_path.clone(), time_point.clone(), cell.num_instances(), @@ -154,7 +133,7 @@ impl EntityDb { pub fn purge( &mut self, cutoff_times: &std::collections::BTreeMap, - drop_msg_ids: &ahash::HashSet, + drop_row_ids: &ahash::HashSet, ) { crate::profile_function!(); @@ -172,7 +151,7 @@ impl EntityDb { { crate::profile_scope!("tree"); - tree.purge(cutoff_times, drop_msg_ids); + tree.purge(cutoff_times, drop_row_ids); } } } @@ -182,33 +161,31 @@ impl EntityDb { /// A in-memory database built from a stream of [`LogMsg`]es. #[derive(Default)] pub struct LogDb { - /// Messages in the order they arrived - chronological_message_ids: Vec, - log_messages: ahash::HashMap, - - /// Data that was logged with [`TimePoint::timeless`]. - /// We need to re-insert those in any new timelines - /// that are created after they were logged. - timeless_message_ids: Vec, + /// All [`EntityPathOpMsg`]s ever received. + entity_op_msgs: BTreeMap, /// Set by whomever created this [`LogDb`]. pub data_source: Option, /// Comes in a special message, [`LogMsg::BeginRecordingMsg`]. - recording_info: Option, + recording_msg: Option, /// Where we store the entities. pub entity_db: EntityDb, } impl LogDb { + pub fn recording_msg(&self) -> Option<&BeginRecordingMsg> { + self.recording_msg.as_ref() + } + pub fn recording_info(&self) -> Option<&RecordingInfo> { - self.recording_info.as_ref() + self.recording_msg().map(|msg| &msg.info) } pub fn recording_id(&self) -> RecordingId { - if let Some(info) = &self.recording_info { - info.recording_id + if let Some(msg) = &self.recording_msg { + msg.info.recording_id } else { RecordingId::ZERO } @@ -226,53 +203,47 @@ impl LogDb { self.entity_db.tree.num_timeless_messages() } + pub fn num_rows(&self) -> usize { + self.entity_db.data_store.num_timeless_rows() as usize + + self.entity_db.data_store.num_temporal_rows() as usize + } + pub fn is_empty(&self) -> bool { - self.log_messages.is_empty() + self.num_rows() == 0 } - pub fn add(&mut self, msg: LogMsg) -> Result<(), Error> { + pub fn add(&mut self, msg: &LogMsg) -> Result<(), Error> { crate::profile_function!(); match &msg { LogMsg::BeginRecordingMsg(msg) => self.add_begin_recording_msg(msg), - LogMsg::EntityPathOpMsg(msg) => { + LogMsg::EntityPathOpMsg(_, msg) => { let EntityPathOpMsg { - msg_id, + row_id, time_point, path_op, } = msg; - self.entity_db.add_path_op(*msg_id, time_point, path_op); + self.entity_op_msgs.insert(*row_id, msg.clone()); + self.entity_db.add_path_op(*row_id, time_point, path_op); } - LogMsg::ArrowMsg(inner) => self.entity_db.try_add_arrow_msg(inner)?, + LogMsg::ArrowMsg(_, inner) => self.entity_db.try_add_arrow_msg(inner)?, LogMsg::Goodbye(_) => {} } - // TODO(#1619): the following only makes sense because, while we support sending and - // receiving batches, we don't actually do so yet. - // We need to stop storing raw `LogMsg`s before we can benefit from our batching. - self.chronological_message_ids.push(msg.id()); - self.log_messages.insert(msg.id(), msg); - Ok(()) } fn add_begin_recording_msg(&mut self, msg: &BeginRecordingMsg) { - self.recording_info = Some(msg.info.clone()); - } - - pub fn len(&self) -> usize { - self.log_messages.len() + self.recording_msg = Some(msg.clone()); } - /// In the order they arrived - pub fn chronological_log_messages(&self) -> impl Iterator { - self.chronological_message_ids - .iter() - .filter_map(|id| self.get_log_msg(id)) + /// Returns an iterator over all [`EntityPathOpMsg`]s that have been written to this `LogDb`. + pub fn iter_entity_op_msgs(&self) -> impl Iterator { + self.entity_op_msgs.values() } - pub fn get_log_msg(&self, msg_id: &MsgId) -> Option<&LogMsg> { - self.log_messages.get(msg_id) + pub fn get_entity_op_msg(&self, row_id: &RowId) -> Option<&EntityPathOpMsg> { + self.entity_op_msgs.get(row_id) } /// Free up some RAM by forgetting the older parts of all timelines. @@ -280,47 +251,30 @@ impl LogDb { crate::profile_function!(); assert!((0.0..=1.0).contains(&fraction_to_purge)); - let drop_msg_ids = { - let msg_id_chunks = self.entity_db.data_store.gc( - GarbageCollectionTarget::DropAtLeastPercentage(fraction_to_purge as _), - Timeline::log_time(), - MsgId::name(), - ); - - msg_id_chunks - .iter() - .flat_map(|chunk| { - arrow_array_deserialize_iterator::>(&**chunk).unwrap() - }) - .map(Option::unwrap) // MsgId is always present - .collect::>() - }; + let (drop_row_ids, stats_diff) = self.entity_db.data_store.gc( + re_arrow_store::GarbageCollectionTarget::DropAtLeastFraction(fraction_to_purge as _), + ); + re_log::debug!( + num_row_ids_dropped = drop_row_ids.len(), + size_bytes_dropped = re_format::format_bytes(stats_diff.total.num_bytes as _), + "purged datastore" + ); + let drop_row_ids: ahash::HashSet<_> = drop_row_ids.into_iter().collect(); let cutoff_times = self.entity_db.data_store.oldest_time_per_timeline(); let Self { - chronological_message_ids, - log_messages, - timeless_message_ids, + entity_op_msgs, data_source: _, - recording_info: _, + recording_msg: _, entity_db, } = self; { - crate::profile_scope!("chronological_message_ids"); - chronological_message_ids.retain(|msg_id| !drop_msg_ids.contains(msg_id)); - } - - { - crate::profile_scope!("log_messages"); - log_messages.retain(|msg_id, _| !drop_msg_ids.contains(msg_id)); - } - { - crate::profile_scope!("timeless_message_ids"); - timeless_message_ids.retain(|msg_id| !drop_msg_ids.contains(msg_id)); + crate::profile_scope!("entity_op_msgs"); + entity_op_msgs.retain(|row_id, _| !drop_row_ids.contains(row_id)); } - entity_db.purge(&cutoff_times, &drop_msg_ids); + entity_db.purge(&cutoff_times, &drop_row_ids); } } diff --git a/crates/re_format/Cargo.toml b/crates/re_format/Cargo.toml index de103832902e..198768511bc0 100644 --- a/crates/re_format/Cargo.toml +++ b/crates/re_format/Cargo.toml @@ -15,7 +15,8 @@ version.workspace = true [package.metadata.docs.rs] all-features = true - [dependencies] arrow2.workspace = true +arrow2_convert.workspace = true comfy-table.workspace = true +re_tuid.workspace = true diff --git a/crates/re_format/src/arrow.rs b/crates/re_format/src/arrow.rs index 5ac404970522..fcc8a4133cee 100644 --- a/crates/re_format/src/arrow.rs +++ b/crates/re_format/src/arrow.rs @@ -3,11 +3,80 @@ use std::fmt::Formatter; use arrow2::{ - array::{get_display, Array}, + array::{get_display, Array, ListArray, StructArray}, datatypes::{DataType, IntervalUnit, TimeUnit}, }; +use arrow2_convert::deserialize::TryIntoCollection; use comfy_table::{presets, Cell, Table}; +use re_tuid::Tuid; + +// --- + +// TODO(#1775): Registering custom formatters should be done from other crates: +// A) Because `re_format` cannot depend on other crates (cyclic deps) +// B) Because how to deserialize and inspect some type is a private implementation detail of that +// type, re_format shouldn't know how to deserialize a TUID... + +type CustomFormatter<'a, F> = Box std::fmt::Result + 'a>; + +pub fn get_custom_display<'a, F: std::fmt::Write + 'a>( + _column_name: &'a str, + array: &'a dyn Array, + null: &'static str, +) -> CustomFormatter<'a, F> { + // NOTE: If the top-level array is a list, it's probably not the type we're looking for: we're + // interested in the type of the array that's underneath. + let datatype = (|| match array.data_type().to_logical_type() { + DataType::List(_) => array + .as_any() + .downcast_ref::>()? + .iter() + .next()? + .map(|array| array.data_type().clone()), + _ => Some(array.data_type().clone()), + })(); + + if let Some(DataType::Extension(name, _, _)) = datatype { + match name.as_str() { + // TODO(#1775): This should be registered dynamically. + // NOTE: Can't call `Tuid::name()`, `Component` lives in `re_log_types`. + "rerun.tuid" => Box::new(|w, index| { + if let Some(tuid) = parse_tuid(array, index) { + w.write_fmt(format_args!("{tuid}")) + } else { + w.write_str("") + } + }), + _ => get_display(array, null), + } + } else { + get_display(array, null) + } +} + +// TODO(#1775): This should be defined and registered by the `re_tuid` crate. +fn parse_tuid(array: &dyn Array, index: usize) -> Option { + let (array, index) = match array.data_type().to_logical_type() { + // Legacy MsgId lists: just grab the first value, they're all identical + DataType::List(_) => ( + array + .as_any() + .downcast_ref::>()? + .value(index), + 0, + ), + // New control columns: it's not a list to begin with! + _ => (array.to_boxed(), index), + }; + let array = array.as_any().downcast_ref::()?; + + let tuids: Vec = TryIntoCollection::try_into_collection(array.to_boxed()).ok()?; + tuids.get(index).copied() +} + +// --- + //TODO(john) move this and the Display impl upstream into arrow2 #[repr(transparent)] pub struct DisplayTimeUnit(TimeUnit); @@ -15,10 +84,10 @@ pub struct DisplayTimeUnit(TimeUnit); impl std::fmt::Display for DisplayTimeUnit { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let s = match self.0 { - arrow2::datatypes::TimeUnit::Second => "s", - arrow2::datatypes::TimeUnit::Millisecond => "ms", - arrow2::datatypes::TimeUnit::Microsecond => "us", - arrow2::datatypes::TimeUnit::Nanosecond => "ns", + TimeUnit::Second => "s", + TimeUnit::Millisecond => "ms", + TimeUnit::Microsecond => "us", + TimeUnit::Nanosecond => "ns", }; f.write_str(s) } @@ -133,11 +202,19 @@ where let mut table = Table::new(); table.load_preset(presets::UTF8_FULL); + let names = names + .into_iter() + .map(|name| name.as_ref().to_owned()) + .collect::>(); let arrays = columns.into_iter().collect::>(); let (displayers, lengths): (Vec<_>, Vec<_>) = arrays .iter() - .map(|array| (get_display(array.as_ref(), "-"), array.as_ref().len())) + .zip(names.iter()) + .map(|(array, name)| { + let formatter = get_custom_display(name, array.as_ref(), "-"); + (formatter, array.as_ref().len()) + }) .unzip(); if displayers.is_empty() { @@ -145,12 +222,12 @@ where } let header = names - .into_iter() + .iter() .zip(arrays.iter().map(|array| array.as_ref().data_type())) .map(|(name, data_type)| { Cell::new(format!( "{}\n---\n{}", - name.as_ref(), + name, DisplayDataType(data_type.clone()) )) }); diff --git a/crates/re_int_histogram/Cargo.toml b/crates/re_int_histogram/Cargo.toml index 460f25ac935c..ca35db2fa70f 100644 --- a/crates/re_int_histogram/Cargo.toml +++ b/crates/re_int_histogram/Cargo.toml @@ -17,7 +17,7 @@ all-features = true [dependencies] -smallvec = "1.10" +smallvec.workspace = true static_assertions = "1.1" diff --git a/crates/re_log/src/lib.rs b/crates/re_log/src/lib.rs index 68f77b420b09..653451a37405 100644 --- a/crates/re_log/src/lib.rs +++ b/crates/re_log/src/lib.rs @@ -35,6 +35,11 @@ pub use { setup::*, }; +/// Re-exports of other crates. +pub mod external { + pub use log; +} + /// Never log anything less serious than a `WARN` from these crates. const CRATES_AT_WARN_LEVEL: [&str; 3] = [ // wgpu crates spam a lot on info level, which is really annoying diff --git a/crates/re_log_encoding/Cargo.toml b/crates/re_log_encoding/Cargo.toml new file mode 100644 index 000000000000..b1c5c2943af6 --- /dev/null +++ b/crates/re_log_encoding/Cargo.toml @@ -0,0 +1,69 @@ +[package] +name = "re_log_encoding" +authors.workspace = true +description = "Helpers for encoding and transporting Rerun log messages" +edition.workspace = true +homepage.workspace = true +include.workspace = true +license.workspace = true +publish = true +readme = "README.md" +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[package.metadata.docs.rs] +all-features = true + + +[features] +default = [] + +## Enable loading data from an .rrd file. +decoder = ["dep:rmp-serde", "dep:zstd", "dep:ruzstd"] + +# Enable encoding of log messages to an .rrd file/stream: +encoder = ["dep:rmp-serde", "dep:zstd"] + + +[dependencies] + +# Rerun: +re_build_info.workspace = true +re_log_types = { workspace = true, features = ["serde"] } +re_log.workspace = true +re_smart_channel.workspace = true + +# External: +ehttp = "0.2" +parking_lot.workspace = true +thiserror.workspace = true + +# Optional external dependencies: +rmp-serde = { version = "1", optional = true } + +# Native dependencies: +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +puffin.workspace = true +zstd = { version = "0.11.0", optional = true } # native only + +# Web dependencies: +[target.'cfg(target_arch = "wasm32")'.dependencies] +instant = { version = "0.1", features = ["wasm-bindgen"] } +js-sys = "0.3" +ruzstd = { version = "0.3.0", optional = true } # works on wasm, in contrast to zstd +wasm-bindgen = "0.2" +wasm-bindgen-futures = "0.4" +web-sys = { version = "0.3.52", features = ["Window"] } + +[dev-dependencies] +criterion = "0.4" +mimalloc.workspace = true +serde_test = { version = "1" } + +[lib] +bench = false + +[[bench]] +name = "msg_encode_benchmark" +harness = false diff --git a/crates/re_log_encoding/README.md b/crates/re_log_encoding/README.md new file mode 100644 index 000000000000..8b3e7ff4f875 --- /dev/null +++ b/crates/re_log_encoding/README.md @@ -0,0 +1,10 @@ +# re_log_encoding + +Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates. + +[![Latest version](https://img.shields.io/crates/v/re_log_encoding.svg)](https://crates.io/crates/re_log_encoding) +[![Documentation](https://docs.rs/re_log_encoding/badge.svg)](https://docs.rs/re_log_encoding) +![MIT](https://img.shields.io/badge/license-MIT-blue.svg) +![Apache](https://img.shields.io/badge/license-Apache-blue.svg) + +Helper library for encoding Rerun log messages. diff --git a/crates/re_log_types/benches/msg_encode_benchmark.rs b/crates/re_log_encoding/benches/msg_encode_benchmark.rs similarity index 78% rename from crates/re_log_types/benches/msg_encode_benchmark.rs rename to crates/re_log_encoding/benches/msg_encode_benchmark.rs index d9131ef9f9f9..ddc7fc9740d9 100644 --- a/crates/re_log_types/benches/msg_encode_benchmark.rs +++ b/crates/re_log_encoding/benches/msg_encode_benchmark.rs @@ -1,12 +1,12 @@ -#[cfg(not(all(feature = "save", feature = "load")))] -compile_error!("msg_encode_benchmark requires 'save' and 'load' features."); +#[cfg(not(all(feature = "decoder", feature = "encoder")))] +compile_error!("msg_encode_benchmark requires 'decoder' and 'encoder' features."); #[global_allocator] static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; use re_log_types::{ datagen::{build_frame_nr, build_some_colors, build_some_point2d}, - entity_path, ArrowMsg, DataRow, DataTable, Index, LogMsg, MsgId, + entity_path, DataRow, DataTable, Index, LogMsg, RecordingId, RowId, TableId, }; use criterion::{criterion_group, criterion_main, Criterion}; @@ -28,13 +28,13 @@ criterion_main!(benches); fn encode_log_msgs(messages: &[LogMsg]) -> Vec { let mut bytes = vec![]; - re_log_types::encoding::encode(messages.iter(), &mut bytes).unwrap(); + re_log_encoding::encoder::encode(messages.iter(), &mut bytes).unwrap(); assert!(bytes.len() > messages.len()); bytes } fn decode_log_msgs(mut bytes: &[u8]) -> Vec { - let messages = re_log_types::encoding::Decoder::new(&mut bytes) + let messages = re_log_encoding::decoder::Decoder::new(&mut bytes) .unwrap() .collect::, _>>() .unwrap(); @@ -42,10 +42,10 @@ fn decode_log_msgs(mut bytes: &[u8]) -> Vec { messages } -fn generate_messages(tables: &[DataTable]) -> Vec { +fn generate_messages(recording_id: RecordingId, tables: &[DataTable]) -> Vec { tables .iter() - .map(|table| LogMsg::ArrowMsg(ArrowMsg::try_from(table).unwrap())) + .map(|table| LogMsg::ArrowMsg(recording_id, table.to_arrow_msg().unwrap())) .collect() } @@ -53,8 +53,8 @@ fn decode_tables(messages: &[LogMsg]) -> Vec { messages .iter() .map(|log_msg| { - if let LogMsg::ArrowMsg(arrow_msg) = log_msg { - DataTable::try_from(arrow_msg).unwrap() + if let LogMsg::ArrowMsg(_, arrow_msg) = log_msg { + DataTable::from_arrow_msg(arrow_msg).unwrap() } else { unreachable!() } @@ -67,9 +67,9 @@ fn mono_points_arrow(c: &mut Criterion) { (0..NUM_POINTS) .map(|i| { DataTable::from_rows( - MsgId::ZERO, + TableId::ZERO, [DataRow::from_cells2( - MsgId::ZERO, + RowId::ZERO, entity_path!("points", Index::Sequence(i as _)), [build_frame_nr(0.into())], 1, @@ -81,6 +81,7 @@ fn mono_points_arrow(c: &mut Criterion) { } { + let recording_id = RecordingId::random(); let mut group = c.benchmark_group("mono_points_arrow"); group.throughput(criterion::Throughput::Elements(NUM_POINTS as _)); group.bench_function("generate_message_bundles", |b| { @@ -88,14 +89,14 @@ fn mono_points_arrow(c: &mut Criterion) { }); let tables = generate_tables(); group.bench_function("generate_messages", |b| { - b.iter(|| generate_messages(&tables)); + b.iter(|| generate_messages(recording_id, &tables)); }); - let messages = generate_messages(&tables); + let messages = generate_messages(recording_id, &tables); group.bench_function("encode_log_msg", |b| { b.iter(|| encode_log_msgs(&messages)); }); group.bench_function("encode_total", |b| { - b.iter(|| encode_log_msgs(&generate_messages(&generate_tables()))); + b.iter(|| encode_log_msgs(&generate_messages(recording_id, &generate_tables()))); }); let encoded = encode_log_msgs(&messages); @@ -122,10 +123,10 @@ fn mono_points_arrow(c: &mut Criterion) { fn mono_points_arrow_batched(c: &mut Criterion) { fn generate_table() -> DataTable { DataTable::from_rows( - MsgId::ZERO, + TableId::ZERO, (0..NUM_POINTS).map(|i| { DataRow::from_cells2( - MsgId::ZERO, + RowId::ZERO, entity_path!("points", Index::Sequence(i as _)), [build_frame_nr(0.into())], 1, @@ -136,6 +137,7 @@ fn mono_points_arrow_batched(c: &mut Criterion) { } { + let recording_id = RecordingId::random(); let mut group = c.benchmark_group("mono_points_arrow_batched"); group.throughput(criterion::Throughput::Elements(NUM_POINTS as _)); group.bench_function("generate_message_bundles", |b| { @@ -143,14 +145,14 @@ fn mono_points_arrow_batched(c: &mut Criterion) { }); let tables = [generate_table()]; group.bench_function("generate_messages", |b| { - b.iter(|| generate_messages(&tables)); + b.iter(|| generate_messages(recording_id, &tables)); }); - let messages = generate_messages(&tables); + let messages = generate_messages(recording_id, &tables); group.bench_function("encode_log_msg", |b| { b.iter(|| encode_log_msgs(&messages)); }); group.bench_function("encode_total", |b| { - b.iter(|| encode_log_msgs(&generate_messages(&[generate_table()]))); + b.iter(|| encode_log_msgs(&generate_messages(recording_id, &[generate_table()]))); }); let encoded = encode_log_msgs(&messages); @@ -177,9 +179,9 @@ fn mono_points_arrow_batched(c: &mut Criterion) { fn batch_points_arrow(c: &mut Criterion) { fn generate_tables() -> Vec { vec![DataTable::from_rows( - MsgId::ZERO, + TableId::ZERO, [DataRow::from_cells2( - MsgId::ZERO, + RowId::ZERO, entity_path!("points"), [build_frame_nr(0.into())], NUM_POINTS as _, @@ -192,6 +194,7 @@ fn batch_points_arrow(c: &mut Criterion) { } { + let recording_id = RecordingId::random(); let mut group = c.benchmark_group("batch_points_arrow"); group.throughput(criterion::Throughput::Elements(NUM_POINTS as _)); group.bench_function("generate_message_bundles", |b| { @@ -199,14 +202,14 @@ fn batch_points_arrow(c: &mut Criterion) { }); let tables = generate_tables(); group.bench_function("generate_messages", |b| { - b.iter(|| generate_messages(&tables)); + b.iter(|| generate_messages(recording_id, &tables)); }); - let messages = generate_messages(&tables); + let messages = generate_messages(recording_id, &tables); group.bench_function("encode_log_msg", |b| { b.iter(|| encode_log_msgs(&messages)); }); group.bench_function("encode_total", |b| { - b.iter(|| encode_log_msgs(&generate_messages(&generate_tables()))); + b.iter(|| encode_log_msgs(&generate_messages(recording_id, &generate_tables()))); }); let encoded = encode_log_msgs(&messages); diff --git a/crates/re_log_types/src/encoding.rs b/crates/re_log_encoding/src/decoder.rs similarity index 58% rename from crates/re_log_types/src/encoding.rs rename to crates/re_log_encoding/src/decoder.rs index d1e2299c2509..a4ba2592b757 100644 --- a/crates/re_log_types/src/encoding.rs +++ b/crates/re_log_encoding/src/decoder.rs @@ -1,117 +1,6 @@ -//! Encoding/decoding [`LogMsg`]:es as `.rrd` files. +//! Decoding [`LogMsg`]:es from `.rrd` files/streams. -use crate::LogMsg; - -// ---------------------------------------------------------------------------- -// native encode: - -#[cfg(feature = "save")] -#[cfg(not(target_arch = "wasm32"))] -mod encoder { - use std::io::Write as _; - - use crate::LogMsg; - - /// On failure to encode or serialize a [`LogMsg`]. - #[derive(thiserror::Error, Debug)] - pub enum EncodeError { - #[error("Failed to write: {0}")] - Write(std::io::Error), - - #[error("Zstd error: {0}")] - Zstd(std::io::Error), - - #[error("MsgPack error: {0}")] - MsgPack(#[from] rmp_serde::encode::Error), - - #[error("Called append on already finished encoder")] - AlreadyFinished, - } - - /// Encode a stream of [`LogMsg`] into an `.rrd` file. - pub struct Encoder { - /// Set to None when finished. - zstd_encoder: Option>, - buffer: Vec, - } - - impl Drop for Encoder { - fn drop(&mut self) { - if self.zstd_encoder.is_some() { - re_log::warn!("Encoder dropped without calling finish()!"); - if let Err(err) = self.finish() { - re_log::error!("Failed to finish encoding: {err}"); - } - } - } - } - - impl Encoder { - pub fn new(mut write: W) -> Result { - let rerun_version = re_build_info::CrateVersion::parse(env!("CARGO_PKG_VERSION")); - - write.write_all(b"RRF0").map_err(EncodeError::Write)?; - write - .write_all(&rerun_version.to_bytes()) - .map_err(EncodeError::Write)?; - - let level = 3; - let zstd_encoder = - zstd::stream::Encoder::new(write, level).map_err(EncodeError::Zstd)?; - - Ok(Self { - zstd_encoder: Some(zstd_encoder), - buffer: vec![], - }) - } - - pub fn append(&mut self, message: &LogMsg) -> Result<(), EncodeError> { - let Self { - zstd_encoder, - buffer, - } = self; - - if let Some(zstd_encoder) = zstd_encoder { - buffer.clear(); - rmp_serde::encode::write_named(buffer, message)?; - - zstd_encoder - .write_all(&(buffer.len() as u64).to_le_bytes()) - .map_err(EncodeError::Zstd)?; - zstd_encoder.write_all(buffer).map_err(EncodeError::Zstd)?; - - Ok(()) - } else { - Err(EncodeError::AlreadyFinished) - } - } - - pub fn finish(&mut self) -> Result<(), EncodeError> { - if let Some(zstd_encoder) = self.zstd_encoder.take() { - zstd_encoder.finish().map_err(EncodeError::Zstd)?; - Ok(()) - } else { - re_log::warn!("Encoder::finish called twice"); - Ok(()) - } - } - } - - pub fn encode<'a>( - messages: impl Iterator, - write: impl std::io::Write, - ) -> Result<(), EncodeError> { - let mut encoder = Encoder::new(write)?; - for message in messages { - encoder.append(message)?; - } - encoder.finish() - } -} - -#[cfg(feature = "save")] -#[cfg(not(target_arch = "wasm32"))] -pub use encoder::*; +use re_log_types::LogMsg; // ---------------------------------------------------------------------------- @@ -135,7 +24,6 @@ fn warn_on_version_mismatch(encoded_version: [u8; 4]) { // ---------------------------------------------------------------------------- /// On failure to encode or serialize a [`LogMsg`]. -#[cfg(feature = "load")] #[derive(thiserror::Error, Debug)] pub enum DecodeError { #[error("Not an .rrd file")] @@ -163,14 +51,12 @@ pub enum DecodeError { // ---------------------------------------------------------------------------- // native decode: -#[cfg(feature = "load")] #[cfg(not(target_arch = "wasm32"))] pub struct Decoder<'r, R: std::io::BufRead> { zdecoder: zstd::stream::Decoder<'r, R>, buffer: Vec, } -#[cfg(feature = "load")] #[cfg(not(target_arch = "wasm32"))] impl<'r, R: std::io::Read> Decoder<'r, std::io::BufReader> { pub fn new(mut read: R) -> Result { @@ -192,7 +78,6 @@ impl<'r, R: std::io::Read> Decoder<'r, std::io::BufReader> { } } -#[cfg(feature = "load")] #[cfg(not(target_arch = "wasm32"))] impl<'r, R: std::io::BufRead> Iterator for Decoder<'r, R> { type Item = Result; @@ -225,14 +110,12 @@ impl<'r, R: std::io::BufRead> Iterator for Decoder<'r, R> { // ---------------------------------------------------------------------------- // wasm decode: -#[cfg(feature = "load")] #[cfg(target_arch = "wasm32")] pub struct Decoder { zdecoder: ruzstd::StreamingDecoder, buffer: Vec, } -#[cfg(feature = "load")] #[cfg(target_arch = "wasm32")] impl Decoder { pub fn new(mut read: R) -> Result { @@ -254,7 +137,6 @@ impl Decoder { } } -#[cfg(feature = "load")] #[cfg(target_arch = "wasm32")] impl Iterator for Decoder { type Item = Result; @@ -286,19 +168,22 @@ impl Iterator for Decoder { // ---------------------------------------------------------------------------- -#[cfg(all(feature = "load", feature = "save"))] +#[cfg(all(feature = "decoder", feature = "encoder"))] #[test] fn test_encode_decode() { - use crate::{BeginRecordingMsg, LogMsg, MsgId, Time}; + use re_log_types::{ + ApplicationId, BeginRecordingMsg, LogMsg, RecordingId, RecordingInfo, RecordingSource, + RowId, Time, + }; let messages = vec![LogMsg::BeginRecordingMsg(BeginRecordingMsg { - msg_id: MsgId::random(), - info: crate::RecordingInfo { - application_id: crate::ApplicationId("test".to_owned()), - recording_id: crate::RecordingId::random(), + row_id: RowId::random(), + info: RecordingInfo { + application_id: ApplicationId("test".to_owned()), + recording_id: RecordingId::random(), is_official_example: true, started: Time::now(), - recording_source: crate::RecordingSource::RustSdk { + recording_source: RecordingSource::RustSdk { rustc_version: String::new(), llvm_version: String::new(), }, @@ -306,7 +191,7 @@ fn test_encode_decode() { })]; let mut file = vec![]; - encode(messages.iter(), &mut file).unwrap(); + crate::encoder::encode(messages.iter(), &mut file).unwrap(); let decoded_messages = Decoder::new(&mut file.as_slice()) .unwrap() diff --git a/crates/re_log_encoding/src/encoder.rs b/crates/re_log_encoding/src/encoder.rs new file mode 100644 index 000000000000..6d444746b6e1 --- /dev/null +++ b/crates/re_log_encoding/src/encoder.rs @@ -0,0 +1,111 @@ +//! Encoding of [`LogMsg`]es as a binary stream, e.g. to store in an `.rrd` file, or send over network. + +use std::io::Write as _; + +use re_log_types::LogMsg; + +/// On failure to encode or serialize a [`LogMsg`]. +#[derive(thiserror::Error, Debug)] +pub enum EncodeError { + #[error("Failed to write: {0}")] + Write(std::io::Error), + + #[error("Zstd error: {0}")] + Zstd(std::io::Error), + + #[error("MsgPack error: {0}")] + MsgPack(#[from] rmp_serde::encode::Error), + + #[error("Called append on already finished encoder")] + AlreadyFinished, +} + +/// Encode a stream of [`LogMsg`] into an `.rrd` file. +pub struct Encoder { + /// Set to None when finished. + zstd_encoder: Option>, + buffer: Vec, +} + +impl Drop for Encoder { + fn drop(&mut self) { + if self.zstd_encoder.is_some() { + re_log::warn!("Encoder dropped without calling finish()!"); + if let Err(err) = self.finish() { + re_log::error!("Failed to finish encoding: {err}"); + } + } + } +} + +impl Encoder { + pub fn new(mut write: W) -> Result { + let rerun_version = re_build_info::CrateVersion::parse(env!("CARGO_PKG_VERSION")); + + write.write_all(b"RRF0").map_err(EncodeError::Write)?; + write + .write_all(&rerun_version.to_bytes()) + .map_err(EncodeError::Write)?; + + let level = 3; + let zstd_encoder = zstd::stream::Encoder::new(write, level).map_err(EncodeError::Zstd)?; + + Ok(Self { + zstd_encoder: Some(zstd_encoder), + buffer: vec![], + }) + } + + pub fn append(&mut self, message: &LogMsg) -> Result<(), EncodeError> { + let Self { + zstd_encoder, + buffer, + } = self; + + if let Some(zstd_encoder) = zstd_encoder { + buffer.clear(); + rmp_serde::encode::write_named(buffer, message)?; + + zstd_encoder + .write_all(&(buffer.len() as u64).to_le_bytes()) + .map_err(EncodeError::Zstd)?; + zstd_encoder.write_all(buffer).map_err(EncodeError::Zstd)?; + + Ok(()) + } else { + Err(EncodeError::AlreadyFinished) + } + } + + pub fn finish(&mut self) -> Result<(), EncodeError> { + if let Some(zstd_encoder) = self.zstd_encoder.take() { + zstd_encoder.finish().map_err(EncodeError::Zstd)?; + Ok(()) + } else { + re_log::warn!("Encoder::finish called twice"); + Ok(()) + } + } +} + +pub fn encode<'a>( + messages: impl Iterator, + write: &mut impl std::io::Write, +) -> Result<(), EncodeError> { + let mut encoder = Encoder::new(write)?; + for message in messages { + encoder.append(message)?; + } + encoder.finish() +} + +pub fn encode_owned( + messages: impl Iterator, + write: impl std::io::Write, +) -> Result<(), EncodeError> { + let mut encoder = Encoder::new(write)?; + for message in messages { + encoder.append(&message)?; + } + encoder.finish() +} diff --git a/crates/re_sdk/src/file_sink.rs b/crates/re_log_encoding/src/file_sink.rs similarity index 89% rename from crates/re_sdk/src/file_sink.rs rename to crates/re_log_encoding/src/file_sink.rs index f5f7e69f6fe1..121383553eb2 100644 --- a/crates/re_sdk/src/file_sink.rs +++ b/crates/re_log_encoding/src/file_sink.rs @@ -17,7 +17,7 @@ pub enum FileSinkError { /// Error encoding a log message. #[error("Failed to encode LogMsg: {0}")] - LogMsgEncode(#[from] re_log_types::encoding::EncodeError), + LogMsgEncode(#[from] crate::encoder::EncodeError), } /// Stream log messages to an `.rrd` file. @@ -47,7 +47,7 @@ impl FileSink { let file = std::fs::File::create(&path) .map_err(|err| FileSinkError::CreateFile(path.clone(), err))?; - let mut encoder = re_log_types::encoding::Encoder::new(file)?; + let mut encoder = crate::encoder::Encoder::new(file)?; let join_handle = std::thread::Builder::new() .name("file_writer".into()) @@ -71,10 +71,8 @@ impl FileSink { join_handle: Some(join_handle), }) } -} -impl crate::sink::LogSink for FileSink { - fn send(&self, msg: LogMsg) { - self.tx.lock().send(Some(msg)).ok(); + pub fn send(&self, log_msg: LogMsg) { + self.tx.lock().send(Some(log_msg)).ok(); } } diff --git a/crates/re_log_encoding/src/lib.rs b/crates/re_log_encoding/src/lib.rs new file mode 100644 index 000000000000..16b883448803 --- /dev/null +++ b/crates/re_log_encoding/src/lib.rs @@ -0,0 +1,42 @@ +//! Crate that handles encoding of rerun log types. + +#[cfg(feature = "decoder")] +pub mod decoder; +#[cfg(feature = "encoder")] +#[cfg(not(target_arch = "wasm32"))] // we do no yet support encoding LogMsgs in the browser +pub mod encoder; + +#[cfg(feature = "encoder")] +#[cfg(not(target_arch = "wasm32"))] +mod file_sink; + +#[cfg(feature = "decoder")] +pub mod stream_rrd_from_http; + +// --------------------------------------------------------------------- + +#[cfg(feature = "encoder")] +#[cfg(not(target_arch = "wasm32"))] +pub use file_sink::{FileSink, FileSinkError}; + +// --------------------------------------------------------------------------- + +/// Profiling macro for feature "puffin" +#[doc(hidden)] +#[macro_export] +macro_rules! profile_function { + ($($arg: tt)*) => { + #[cfg(not(target_arch = "wasm32"))] + puffin::profile_function!($($arg)*); + }; +} + +/// Profiling macro for feature "puffin" +#[doc(hidden)] +#[macro_export] +macro_rules! profile_scope { + ($($arg: tt)*) => { + #[cfg(not(target_arch = "wasm32"))] + puffin::profile_scope!($($arg)*); + }; +} diff --git a/crates/re_viewer/src/stream_rrd_from_http.rs b/crates/re_log_encoding/src/stream_rrd_from_http.rs similarity index 59% rename from crates/re_viewer/src/stream_rrd_from_http.rs rename to crates/re_log_encoding/src/stream_rrd_from_http.rs index b0997d4494a5..065365d3258d 100644 --- a/crates/re_viewer/src/stream_rrd_from_http.rs +++ b/crates/re_log_encoding/src/stream_rrd_from_http.rs @@ -1,19 +1,21 @@ -pub fn stream_rrd_from_http_to_channel( - url: String, -) -> re_smart_channel::Receiver { +use std::sync::Arc; + +use re_log_types::LogMsg; + +pub fn stream_rrd_from_http_to_channel(url: String) -> re_smart_channel::Receiver { let (tx, rx) = re_smart_channel::smart_channel(re_smart_channel::Source::RrdHttpStream { url: url.clone(), }); stream_rrd_from_http( url, - Box::new(move |msg| { + Arc::new(move |msg| { tx.send(msg).ok(); }), ); rx } -pub fn stream_rrd_from_http(url: String, on_msg: Box) { +pub fn stream_rrd_from_http(url: String, on_msg: Arc) { re_log::debug!("Downloading .rrd file from {url:?}…"); // TODO(emilk): stream the http request, progressively decoding the .rrd file. @@ -36,10 +38,51 @@ pub fn stream_rrd_from_http(url: String, on_msg: Box) { + let window = web_sys::window().expect("no global `window` exists"); + let closure = + Closure::wrap(Box::new( + move |event: JsValue| match event.dyn_into::() { + Ok(message_event) => { + let uint8_array = Uint8Array::new(&message_event.data()); + let result: Vec = uint8_array.to_vec(); + + crate::stream_rrd_from_http::decode_rrd(result, on_msg.clone()); + } + Err(js_val) => { + re_log::error!("Incoming event was not a MessageEvent. {:?}", js_val); + } + }, + ) as Box); + window + .add_event_listener_with_callback("message", closure.as_ref().unchecked_ref()) + .unwrap(); + closure.forget(); + } +} + +#[cfg(target_arch = "wasm32")] +pub use web_event_listener::stream_rrd_from_event_listener; + #[cfg(not(target_arch = "wasm32"))] #[allow(clippy::needless_pass_by_value)] // must match wasm version -fn decode_rrd(rrd_bytes: Vec, on_msg: Box) { - match re_log_types::encoding::Decoder::new(rrd_bytes.as_slice()) { +fn decode_rrd(rrd_bytes: Vec, on_msg: Arc) { + match crate::decoder::Decoder::new(rrd_bytes.as_slice()) { Ok(decoder) => { for msg in decoder { match msg { @@ -60,20 +103,20 @@ fn decode_rrd(rrd_bytes: Vec, on_msg: Box, on_msg: Box) { + use re_log_types::LogMsg; + use std::sync::Arc; + + pub fn decode_rrd(rrd_bytes: Vec, on_msg: Arc) { wasm_bindgen_futures::spawn_local(decode_rrd_async(rrd_bytes, on_msg)); } /// Decodes the file in chunks, with an yield between each chunk. /// /// This is cooperative multi-tasking. - async fn decode_rrd_async( - rrd_bytes: Vec, - on_msg: Box, - ) { + async fn decode_rrd_async(rrd_bytes: Vec, on_msg: Arc) { let mut last_yield = instant::Instant::now(); - match re_log_types::encoding::Decoder::new(rrd_bytes.as_slice()) { + match crate::decoder::Decoder::new(rrd_bytes.as_slice()) { Ok(decoder) => { for msg in decoder { match msg { diff --git a/crates/re_log_types/Cargo.toml b/crates/re_log_types/Cargo.toml index 12ae7e9e5463..3af826321b61 100644 --- a/crates/re_log_types/Cargo.toml +++ b/crates/re_log_types/Cargo.toml @@ -17,7 +17,7 @@ all-features = true [features] -default = ["arrow_datagen", "anyhow"] +default = ["arrow_datagen"] ## Enables the `datagen` module, which exposes a number of tools for generating random data for ## tests and benchmarks. @@ -30,13 +30,7 @@ ecolor = ["dep:ecolor"] glam = ["dep:glam", "dep:macaw"] ## Integration with the [`image`](https://crates.io/crates/image/) crate. -image = ["dep:image"] - -## Enable loading data from a file. -load = ["anyhow", "rmp-serde", "serde", "zstd", "ruzstd"] - -## Enable saving data to a file. -save = ["anyhow", "rmp-serde", "serde", "zstd"] +image = ["dep:ecolor", "dep:image"] ## Enable (de)serialization using serde. serde = [ @@ -51,11 +45,10 @@ serde = [ [dependencies] # Rerun -re_build_info.workspace = true re_format.workspace = true re_log.workspace = true re_string_interner.workspace = true -re_tuid.workspace = true +re_tuid = { workspace = true, features = ["arrow2_convert"] } # External ahash.workspace = true @@ -63,7 +56,9 @@ array-init = "2.1.0" arrow2 = { workspace = true, features = [ "io_ipc", "io_print", + "compute_aggregate", "compute_concatenate", + "compute_aggregate", ] } arrow2_convert.workspace = true bytemuck = "1.11" @@ -76,7 +71,7 @@ ndarray.workspace = true nohash-hasher = "0.2" num-derive = "0.3" num-traits = "0.2" -smallvec = "1.10" +smallvec.workspace = true thiserror.workspace = true time = { workspace = true, default-features = false, features = [ "formatting", @@ -87,7 +82,6 @@ uuid = { version = "1.1", features = ["serde", "v4", "js"] } # Optional dependencies: -anyhow = { workspace = true, optional = true } ecolor = { workspace = true, optional = true } glam = { workspace = true, optional = true } image = { workspace = true, optional = true, default-features = false, features = [ @@ -95,32 +89,13 @@ image = { workspace = true, optional = true, default-features = false, features ] } macaw = { workspace = true, optional = true } rand = { version = "0.8", optional = true } -rmp-serde = { version = "1", optional = true } serde = { version = "1", optional = true, features = ["derive", "rc"] } serde_bytes = { version = "0.11", optional = true } # Native dependencies: [target.'cfg(not(target_arch = "wasm32"))'.dependencies] puffin.workspace = true -zstd = { version = "0.11.0", optional = true } # native only -# Web dependencies: -[target.'cfg(target_arch = "wasm32")'.dependencies] -ruzstd = { version = "0.3.0", optional = true } # works on wasm [dev-dependencies] -criterion = "0.4" -mimalloc.workspace = true -serde_test = { version = "1" } -arrow2 = { workspace = true, features = [ - "io_ipc", - "io_print", - "compute_concatenate", -] } - -[lib] -bench = false - -[[bench]] -name = "msg_encode_benchmark" -harness = false +rmp-serde = "1.1" diff --git a/crates/re_log_types/src/arrow_msg.rs b/crates/re_log_types/src/arrow_msg.rs index 81f48c032057..54e5c01b68e5 100644 --- a/crates/re_log_types/src/arrow_msg.rs +++ b/crates/re_log_types/src/arrow_msg.rs @@ -3,7 +3,7 @@ //! We have custom implementations of [`serde::Serialize`] and [`serde::Deserialize`] that wraps //! the inner Arrow serialization of [`Schema`] and [`Chunk`]. -use crate::{MsgId, TimePoint}; +use crate::{TableId, TimePoint}; use arrow2::{array::Array, chunk::Chunk, datatypes::Schema}; /// Message containing an Arrow payload @@ -14,7 +14,7 @@ pub struct ArrowMsg { /// /// NOTE(#1619): While we're in the process of transitioning towards end-to-end batching, the /// `table_id` is always the same as the `row_id` as the first and only row. - pub table_id: MsgId, + pub table_id: TableId, /// The maximum values for all timelines across the entire batch of data. /// @@ -79,7 +79,7 @@ impl<'de> serde::Deserialize<'de> for ArrowMsg { where A: serde::de::SeqAccess<'de>, { - let table_id: Option = seq.next_element()?; + let table_id: Option = seq.next_element()?; let timepoint_min: Option = seq.next_element()?; let buf: Option = seq.next_element()?; @@ -87,7 +87,14 @@ impl<'de> serde::Deserialize<'de> for ArrowMsg { (table_id, timepoint_min, buf) { let mut cursor = std::io::Cursor::new(buf); - let metadata = read_stream_metadata(&mut cursor).unwrap(); + let metadata = match read_stream_metadata(&mut cursor) { + Ok(metadata) => metadata, + Err(err) => { + return Err(serde::de::Error::custom(format!( + "Failed to read stream metadata: {err}" + ))) + } + }; let mut stream = StreamReader::new(cursor, metadata, None); let chunk = stream .find_map(|state| match state { @@ -126,24 +133,32 @@ mod tests { use crate::{ datagen::{build_frame_nr, build_some_point2d, build_some_rects}, - DataRow, DataTable, MsgId, + DataRow, DataTable, RowId, }; #[test] fn arrow_msg_roundtrip() { let row = DataRow::from_cells2( - MsgId::random(), + RowId::random(), "world/rects", [build_frame_nr(0.into())], 1, (build_some_point2d(1), build_some_rects(1)), ); - let table_in = row.into_table(); - let msg_in: ArrowMsg = (&table_in).try_into().unwrap(); + let table_in = { + let mut table = row.into_table(); + table.compute_all_size_bytes(); + table + }; + let msg_in = table_in.to_arrow_msg().unwrap(); let buf = rmp_serde::to_vec(&msg_in).unwrap(); let msg_out: ArrowMsg = rmp_serde::from_slice(&buf).unwrap(); - let table_out: DataTable = (&msg_out).try_into().unwrap(); + let table_out = { + let mut table = DataTable::from_arrow_msg(&msg_out).unwrap(); + table.compute_all_size_bytes(); + table + }; assert_eq!(msg_in, msg_out); assert_eq!(table_in, table_out); diff --git a/crates/re_log_types/src/component_types/instance_key.rs b/crates/re_log_types/src/component_types/instance_key.rs index 4cddbad56db7..d2cbbee1e096 100644 --- a/crates/re_log_types/src/component_types/instance_key.rs +++ b/crates/re_log_types/src/component_types/instance_key.rs @@ -36,6 +36,12 @@ impl InstanceKey { /// for example all points in a point cloud entity. pub const SPLAT: Self = Self(u64::MAX); + #[allow(clippy::should_implement_trait)] + #[inline] + pub fn from_iter(it: impl IntoIterator>) -> Vec { + it.into_iter().map(Into::into).collect::>() + } + /// Are we referring to all instances of the entity (e.g. all points in a point cloud entity)? /// /// The opposite of [`Self::is_specific`]. @@ -57,6 +63,16 @@ impl InstanceKey { pub fn specific_index(self) -> Option { self.is_specific().then_some(self) } + + /// Creates a new [`InstanceKey`] that identifies a 2d coordinate. + pub fn from_2d_image_coordinate([x, y]: [u32; 2], image_width: u64) -> Self { + Self((x as u64) + (y as u64) * image_width) + } + + /// Retrieves 2d image coordinates (x, y) encoded in an instance key + pub fn to_2d_image_coordinate(self, image_width: u64) -> [u32; 2] { + [(self.0 % image_width) as u32, (self.0 / image_width) as u32] + } } impl std::fmt::Display for InstanceKey { diff --git a/crates/re_log_types/src/component_types/mod.rs b/crates/re_log_types/src/component_types/mod.rs index ea682c24bdb0..f8032c4e66df 100644 --- a/crates/re_log_types/src/component_types/mod.rs +++ b/crates/re_log_types/src/component_types/mod.rs @@ -30,7 +30,6 @@ mod label; mod linestrip; mod mat; mod mesh3d; -mod msg_id; mod node_graph; mod point; mod quaternion; @@ -56,7 +55,6 @@ pub use label::Label; pub use linestrip::{LineStrip2D, LineStrip3D}; pub use mat::Mat3x3; pub use mesh3d::{EncodedMesh3D, Mesh3D, MeshFormat, MeshId, RawMesh3D}; -pub use msg_id::MsgId; pub use node_graph::NodeGraph; pub use point::{Point2D, Point3D}; pub use quaternion::Quaternion; @@ -64,19 +62,18 @@ pub use radius::Radius; pub use rect::Rect2D; pub use scalar::{Scalar, ScalarPlotProps}; pub use size::Size3D; -#[cfg(feature = "image")] -pub use tensor::TensorImageError; pub use tensor::{ - Tensor, TensorCastError, TensorData, TensorDataMeaning, TensorDimension, TensorId, TensorTrait, + Tensor, TensorCastError, TensorData, TensorDataMeaning, TensorDimension, TensorId, }; - +#[cfg(feature = "image")] +pub use tensor::{TensorImageLoadError, TensorImageSaveError}; pub use text_entry::TextEntry; pub use transform::{Pinhole, Rigid3, Transform}; pub use vec::{Vec2D, Vec3D, Vec4D}; lazy_static! { //TODO(john): use a run-time type registry - static ref FIELDS: [Field; 28] = [ + static ref FIELDS: [Field; 27] = [ ::field(), ::field(), ::field(), @@ -88,7 +85,6 @@ lazy_static! { ::field(), ::field(), ::field(), - ::field(), ::field(), ::field(), ::field(), diff --git a/crates/re_log_types/src/component_types/msg_id.rs b/crates/re_log_types/src/component_types/msg_id.rs deleted file mode 100644 index 104f444cac09..000000000000 --- a/crates/re_log_types/src/component_types/msg_id.rs +++ /dev/null @@ -1,90 +0,0 @@ -use arrow2_convert::{ArrowDeserialize, ArrowField, ArrowSerialize}; - -use crate::{Component, ComponentName}; - -/// A unique id per [`crate::LogMsg`]. -/// -/// ## Examples -/// -/// ``` -/// # use re_log_types::component_types::MsgId; -/// # use arrow2_convert::field::ArrowField; -/// # use arrow2::datatypes::{DataType, Field}; -/// assert_eq!( -/// MsgId::data_type(), -/// DataType::Struct(vec![ -/// Field::new("time_ns", DataType::UInt64, false), -/// Field::new("inc", DataType::UInt64, false), -/// ]) -/// ); -/// ``` -#[derive( - Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, ArrowField, ArrowSerialize, ArrowDeserialize, -)] -#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -#[arrow_field(transparent)] -pub struct MsgId(re_tuid::Tuid); - -impl std::fmt::Debug for MsgId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:032X}", self.0.as_u128()) - } -} - -impl std::fmt::Display for MsgId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:032X}", self.0.as_u128()) - } -} - -impl MsgId { - /// All zeroes. - pub const ZERO: Self = Self(re_tuid::Tuid::ZERO); - - /// All ones. - pub const MAX: Self = Self(re_tuid::Tuid::MAX); - - #[inline] - #[cfg(not(target_arch = "wasm32"))] - pub fn random() -> Self { - Self(re_tuid::Tuid::random()) - } - - #[inline] - pub fn as_u128(&self) -> u128 { - self.0.as_u128() - } - - #[inline] - pub fn nanoseconds_since_epoch(&self) -> u64 { - self.0.nanoseconds_since_epoch() - } - - /// A shortened string representation of the message id. - #[inline] - pub fn short_string(&self) -> String { - // We still want this to look like a part of the full message id (i.e. what is printed on std::fmt::Display). - // Per Thread randomness plus increment is in the last part, so show only that. - // (the first half is time in nanoseconds which for the _most part_ doesn't change that often) - let str = self.to_string(); - str[(str.len() - 8)..].to_string() - } -} - -impl Component for MsgId { - #[inline] - fn name() -> ComponentName { - "rerun.msg_id".into() - } -} - -#[test] -fn test_msgid_roundtrip() { - use arrow2::array::Array; - use arrow2_convert::{deserialize::TryIntoCollection, serialize::TryIntoArrow}; - - let msg_ids_in = vec![MsgId::random(), MsgId::random()]; - let array: Box = msg_ids_in.try_into_arrow().unwrap(); - let msg_ids_out: Vec = TryIntoCollection::try_into_collection(array).unwrap(); - assert_eq!(msg_ids_in, msg_ids_out); -} diff --git a/crates/re_log_types/src/component_types/point.rs b/crates/re_log_types/src/component_types/point.rs index 764aa96b8b76..934a69b076fc 100644 --- a/crates/re_log_types/src/component_types/point.rs +++ b/crates/re_log_types/src/component_types/point.rs @@ -64,6 +64,14 @@ impl From for glam::Vec2 { } } +#[cfg(feature = "glam")] +impl From for glam::Vec3 { + #[inline] + fn from(pt: Point2D) -> Self { + Self::new(pt.x, pt.y, 0.0) + } +} + /// A point in 3D space. /// /// ``` diff --git a/crates/re_log_types/src/component_types/tensor.rs b/crates/re_log_types/src/component_types/tensor.rs index 18dccea0399c..5afd98c9915e 100644 --- a/crates/re_log_types/src/component_types/tensor.rs +++ b/crates/re_log_types/src/component_types/tensor.rs @@ -9,18 +9,6 @@ use crate::{TensorDataType, TensorElement}; use super::arrow_convert_shims::BinaryBuffer; -pub trait TensorTrait { - fn id(&self) -> TensorId; - fn shape(&self) -> &[TensorDimension]; - fn num_dim(&self) -> usize; - fn is_shaped_like_an_image(&self) -> bool; - fn is_vector(&self) -> bool; - fn meaning(&self) -> TensorDataMeaning; - fn get(&self, index: &[u64]) -> Option; - fn dtype(&self) -> TensorDataType; - fn size_in_bytes(&self) -> usize; -} - // ---------------------------------------------------------------------------- /// A unique id per [`Tensor`]. @@ -154,7 +142,7 @@ impl ArrowDeserialize for TensorId { /// ), /// ); /// ``` -#[derive(Clone, Debug, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)] +#[derive(Clone, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)] #[arrow_field(type = "dense")] pub enum TensorData { U8(BinaryBuffer), @@ -208,6 +196,41 @@ impl TensorData { pub fn is_empty(&self) -> bool { self.size_in_bytes() == 0 } + + pub fn is_compressed_image(&self) -> bool { + match self { + Self::U8(_) + | Self::U16(_) + | Self::U32(_) + | Self::U64(_) + | Self::I8(_) + | Self::I16(_) + | Self::I32(_) + | Self::I64(_) + | Self::F32(_) + | Self::F64(_) => false, + + Self::JPEG(_) => true, + } + } +} + +impl std::fmt::Debug for TensorData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::U8(_) => write!(f, "U8({} bytes)", self.size_in_bytes()), + Self::U16(_) => write!(f, "U16({} bytes)", self.size_in_bytes()), + Self::U32(_) => write!(f, "U32({} bytes)", self.size_in_bytes()), + Self::U64(_) => write!(f, "U64({} bytes)", self.size_in_bytes()), + Self::I8(_) => write!(f, "I8({} bytes)", self.size_in_bytes()), + Self::I16(_) => write!(f, "I16({} bytes)", self.size_in_bytes()), + Self::I32(_) => write!(f, "I32({} bytes)", self.size_in_bytes()), + Self::I64(_) => write!(f, "I64({} bytes)", self.size_in_bytes()), + Self::F32(_) => write!(f, "F32({} bytes)", self.size_in_bytes()), + Self::F64(_) => write!(f, "F64({} bytes)", self.size_in_bytes()), + Self::JPEG(_) => write!(f, "JPEG({} bytes)", self.size_in_bytes()), + } + } } /// Flattened `Tensor` data payload @@ -365,23 +388,40 @@ pub struct Tensor { pub meter: Option, } -impl TensorTrait for Tensor { +impl Tensor { #[inline] - fn id(&self) -> TensorId { + pub fn id(&self) -> TensorId { self.tensor_id } #[inline] - fn shape(&self) -> &[TensorDimension] { + pub fn shape(&self) -> &[TensorDimension] { self.shape.as_slice() } #[inline] - fn num_dim(&self) -> usize { + pub fn num_dim(&self) -> usize { self.shape.len() } - fn is_shaped_like_an_image(&self) -> bool { + /// If this tensor is shaped as an image, return the height, width, and channels/depth of it. + pub fn image_height_width_channels(&self) -> Option<[u64; 3]> { + if self.shape.len() == 2 { + Some([self.shape[0].size, self.shape[1].size, 1]) + } else if self.shape.len() == 3 { + let channels = self.shape[2].size; + // gray, rgb, rgba + if matches!(channels, 1 | 3 | 4) { + Some([self.shape[0].size, self.shape[1].size, channels]) + } else { + None + } + } else { + None + } + } + + pub fn is_shaped_like_an_image(&self) -> bool { self.num_dim() == 2 || self.num_dim() == 3 && { matches!( @@ -393,17 +433,17 @@ impl TensorTrait for Tensor { } #[inline] - fn is_vector(&self) -> bool { + pub fn is_vector(&self) -> bool { let shape = &self.shape; shape.len() == 1 || { shape.len() == 2 && (shape[0].size == 1 || shape[1].size == 1) } } #[inline] - fn meaning(&self) -> TensorDataMeaning { + pub fn meaning(&self) -> TensorDataMeaning { self.meaning } - fn get(&self, index: &[u64]) -> Option { + pub fn get(&self, index: &[u64]) -> Option { let mut stride: usize = 1; let mut offset: usize = 0; for (TensorDimension { size, .. }, index) in self.shape.iter().zip(index).rev() { @@ -429,11 +469,11 @@ impl TensorTrait for Tensor { } } - fn dtype(&self) -> TensorDataType { + pub fn dtype(&self) -> TensorDataType { self.data.dtype() } - fn size_in_bytes(&self) -> usize { + pub fn size_in_bytes(&self) -> usize { self.data.size_in_bytes() } } @@ -565,9 +605,10 @@ impl<'a> TryFrom<&'a Tensor> for ::ndarray::ArrayViewD<'a, half::f16> { // ---------------------------------------------------------------------------- +/// Errors when loading [`Tensor`] from the [`image`] crate. #[cfg(feature = "image")] #[derive(thiserror::Error, Debug)] -pub enum TensorImageError { +pub enum TensorImageLoadError { #[error(transparent)] Image(#[from] image::ImageError), @@ -581,6 +622,20 @@ pub enum TensorImageError { ReadError(#[from] std::io::Error), } +/// Errors when converting [`Tensor`] to [`image`] images. +#[cfg(feature = "image")] +#[derive(thiserror::Error, Debug)] +pub enum TensorImageSaveError { + #[error("Expected image-shaped tensor, got {0:?}")] + ShapeNotAnImage(Vec), + + #[error("Cannot convert tensor with {0} channels and datatype {1} to an image")] + UnsupportedChannelsDtype(u64, TensorDataType), + + #[error("The tensor data did not match tensor dimensions")] + BadData, +} + impl Tensor { pub fn new( tensor_id: TensorId, @@ -607,7 +662,7 @@ impl Tensor { #[cfg(not(target_arch = "wasm32"))] pub fn tensor_from_jpeg_file( image_path: impl AsRef, - ) -> Result { + ) -> Result { let jpeg_bytes = std::fs::read(image_path)?; Self::tensor_from_jpeg_bytes(jpeg_bytes) } @@ -615,12 +670,12 @@ impl Tensor { /// Construct a tensor from the contents of a JPEG file. /// /// Requires the `image` feature. - pub fn tensor_from_jpeg_bytes(jpeg_bytes: Vec) -> Result { + pub fn tensor_from_jpeg_bytes(jpeg_bytes: Vec) -> Result { use image::ImageDecoder as _; let jpeg = image::codecs::jpeg::JpegDecoder::new(std::io::Cursor::new(&jpeg_bytes))?; if jpeg.color_type() != image::ColorType::Rgb8 { // TODO(emilk): support gray-scale jpeg as well - return Err(TensorImageError::UnsupportedJpegColorType( + return Err(TensorImageLoadError::UnsupportedJpegColorType( jpeg.color_type(), )); } @@ -642,14 +697,14 @@ impl Tensor { /// Construct a tensor from something that can be turned into a [`image::DynamicImage`]. /// /// Requires the `image` feature. - pub fn from_image(image: impl Into) -> Result { + pub fn from_image(image: impl Into) -> Result { Self::from_dynamic_image(image.into()) } /// Construct a tensor from [`image::DynamicImage`]. /// /// Requires the `image` feature. - pub fn from_dynamic_image(image: image::DynamicImage) -> Result { + pub fn from_dynamic_image(image: image::DynamicImage) -> Result { let (w, h) = (image.width(), image.height()); let (depth, data) = match image { @@ -683,7 +738,9 @@ impl Tensor { } _ => { // It is very annoying that DynamicImage is #[non_exhaustive] - return Err(TensorImageError::UnsupportedImageColorType(image.color())); + return Err(TensorImageLoadError::UnsupportedImageColorType( + image.color(), + )); } }; @@ -699,6 +756,119 @@ impl Tensor { meter: None, }) } + + /// Predicts if [`Self::to_dynamic_image`] is likely to succeed, without doing anything expensive + pub fn could_be_dynamic_image(&self) -> bool { + self.is_shaped_like_an_image() + && matches!( + self.dtype(), + TensorDataType::U8 + | TensorDataType::U16 + | TensorDataType::F16 + | TensorDataType::F32 + | TensorDataType::F64 + ) + } + + /// Try to convert an image-like tensor into an [`image::DynamicImage`]. + pub fn to_dynamic_image(&self) -> Result { + use ecolor::{gamma_u8_from_linear_f32, linear_u8_from_linear_f32}; + use image::{DynamicImage, GrayImage, RgbImage, RgbaImage}; + + type Rgb16Image = image::ImageBuffer, Vec>; + type Rgba16Image = image::ImageBuffer, Vec>; + type Gray16Image = image::ImageBuffer, Vec>; + + let [h, w, channels] = self + .image_height_width_channels() + .ok_or_else(|| TensorImageSaveError::ShapeNotAnImage(self.shape.clone()))?; + let w = w as u32; + let h = h as u32; + + let dyn_img_result = + match (channels, &self.data) { + (1, TensorData::U8(buf)) => { + GrayImage::from_raw(w, h, buf.as_slice().to_vec()).map(DynamicImage::ImageLuma8) + } + (1, TensorData::U16(buf)) => Gray16Image::from_raw(w, h, buf.as_slice().to_vec()) + .map(DynamicImage::ImageLuma16), + // TODO(emilk) f16 + (1, TensorData::F32(buf)) => { + let pixels = buf + .iter() + .map(|pixel| gamma_u8_from_linear_f32(*pixel)) + .collect(); + GrayImage::from_raw(w, h, pixels).map(DynamicImage::ImageLuma8) + } + (1, TensorData::F64(buf)) => { + let pixels = buf + .iter() + .map(|&pixel| gamma_u8_from_linear_f32(pixel as f32)) + .collect(); + GrayImage::from_raw(w, h, pixels).map(DynamicImage::ImageLuma8) + } + + (3, TensorData::U8(buf)) => { + RgbImage::from_raw(w, h, buf.as_slice().to_vec()).map(DynamicImage::ImageRgb8) + } + (3, TensorData::U16(buf)) => Rgb16Image::from_raw(w, h, buf.as_slice().to_vec()) + .map(DynamicImage::ImageRgb16), + (3, TensorData::F32(buf)) => { + let pixels = buf.iter().copied().map(gamma_u8_from_linear_f32).collect(); + RgbImage::from_raw(w, h, pixels).map(DynamicImage::ImageRgb8) + } + (3, TensorData::F64(buf)) => { + let pixels = buf + .iter() + .map(|&comp| gamma_u8_from_linear_f32(comp as f32)) + .collect(); + RgbImage::from_raw(w, h, pixels).map(DynamicImage::ImageRgb8) + } + + (4, TensorData::U8(buf)) => { + RgbaImage::from_raw(w, h, buf.as_slice().to_vec()).map(DynamicImage::ImageRgba8) + } + (4, TensorData::U16(buf)) => Rgba16Image::from_raw(w, h, buf.as_slice().to_vec()) + .map(DynamicImage::ImageRgba16), + (4, TensorData::F32(buf)) => { + let rgba: &[[f32; 4]] = bytemuck::cast_slice(buf.as_slice()); + let pixels: Vec = rgba + .iter() + .flat_map(|&[r, g, b, a]| { + let r = gamma_u8_from_linear_f32(r); + let g = gamma_u8_from_linear_f32(g); + let b = gamma_u8_from_linear_f32(b); + let a = linear_u8_from_linear_f32(a); + [r, g, b, a] + }) + .collect(); + RgbaImage::from_raw(w, h, pixels).map(DynamicImage::ImageRgba8) + } + (4, TensorData::F64(buf)) => { + let rgba: &[[f64; 4]] = bytemuck::cast_slice(buf.as_slice()); + let pixels: Vec = rgba + .iter() + .flat_map(|&[r, g, b, a]| { + let r = gamma_u8_from_linear_f32(r as _); + let g = gamma_u8_from_linear_f32(g as _); + let b = gamma_u8_from_linear_f32(b as _); + let a = linear_u8_from_linear_f32(a as _); + [r, g, b, a] + }) + .collect(); + RgbaImage::from_raw(w, h, pixels).map(DynamicImage::ImageRgba8) + } + + (_, _) => { + return Err(TensorImageSaveError::UnsupportedChannelsDtype( + channels, + self.data.dtype(), + )) + } + }; + + dyn_img_result.ok_or(TensorImageSaveError::BadData) + } } // ---------------------------------------------------------------------------- diff --git a/crates/re_log_types/src/data.rs b/crates/re_log_types/src/data.rs index 248c67f3acb7..7317be465c3e 100644 --- a/crates/re_log_types/src/data.rs +++ b/crates/re_log_types/src/data.rs @@ -114,6 +114,25 @@ impl TensorDataType { } } + #[inline] + pub fn min_value(&self) -> f64 { + match self { + Self::U8 => u8::MIN as _, + Self::U16 => u16::MIN as _, + Self::U32 => u32::MIN as _, + Self::U64 => u64::MIN as _, + + Self::I8 => i8::MIN as _, + Self::I16 => i16::MIN as _, + Self::I32 => i32::MIN as _, + Self::I64 => i64::MIN as _, + + Self::F16 => f16::MIN.into(), + Self::F32 => f32::MIN as _, + Self::F64 => f64::MIN, + } + } + #[inline] pub fn max_value(&self) -> f64 { match self { diff --git a/crates/re_log_types/src/data_cell.rs b/crates/re_log_types/src/data_cell.rs index a0fdbdcbaf95..d8b19a9eabc4 100644 --- a/crates/re_log_types/src/data_cell.rs +++ b/crates/re_log_types/src/data_cell.rs @@ -1,6 +1,9 @@ +use std::sync::Arc; + +use arrow2::datatypes::DataType; use itertools::Itertools as _; -use crate::{Component, ComponentName, DeserializableComponent, SerializableComponent}; +use crate::{Component, ComponentName, DeserializableComponent, SerializableComponent, SizeBytes}; // --- @@ -86,16 +89,39 @@ pub type DataCellResult = ::std::result::Result; /// # assert_eq!(3, cell.num_instances()); /// # assert_eq!(cell.datatype(), &Point2D::data_type()); /// # -/// # assert_eq!(points, cell.as_native().collect_vec().as_slice()); +/// # assert_eq!(points, cell.to_native().collect_vec().as_slice()); /// ``` /// #[derive(Debug, Clone, PartialEq)] pub struct DataCell { + /// While the arrow data is already refcounted, the contents of the `DataCell` still have to + /// be wrapped in an `Arc` to work around performance issues in `arrow2`. + /// + /// See [`DataCellInner`] for more information. + pub inner: Arc, +} + +/// The actual contents of a [`DataCell`]. +/// +/// Despite the fact that the arrow data is already refcounted, this has to live separately, behind +/// an `Arc`, to work around performance issues in `arrow2` that stem from its heavy use of nested +/// virtual calls. +/// +/// See #1746 for details. +#[derive(Debug, Clone, PartialEq)] +pub struct DataCellInner { /// Name of the component type used in this cell. // // TODO(#1696): Store this within the datatype itself. pub(crate) name: ComponentName, + /// The pre-computed size of the cell (stack + heap) as well as its underlying arrow data, + /// in bytes. + /// + /// This is always zero unless [`Self::compute_size_bytes`] has been called, which is a very + /// costly operation. + pub(crate) size_bytes: u64, + /// A uniformly typed list of values for the given component type: `[C, C, C, ...]` /// /// Includes the data, its schema and probably soon the component metadata @@ -204,7 +230,13 @@ impl DataCell { name: ComponentName, values: Box, ) -> DataCellResult { - Ok(Self { name, values }) + Ok(Self { + inner: Arc::new(DataCellInner { + name, + size_bytes: 0, + values, + }), + }) } /// Builds a new `DataCell` from an arrow array. @@ -236,9 +268,16 @@ impl DataCell { datatype: arrow2::datatypes::DataType, ) -> DataCellResult { // TODO(cmc): check that it is indeed a component datatype - Ok(Self { + + let mut inner = DataCellInner { name, + size_bytes: 0, values: arrow2::array::new_empty_array(datatype), + }; + inner.compute_size_bytes(); + + Ok(Self { + inner: Arc::new(inner), }) } @@ -260,8 +299,8 @@ impl DataCell { /// If you do use them, try to keep the scope as short as possible: holding on to a raw array /// might prevent the datastore from releasing memory from garbage collected data. #[inline] - pub fn as_arrow(&self) -> Box { - self.values.clone() /* shallow */ + pub fn to_arrow(&self) -> Box { + self.inner.values.clone() /* shallow */ } /// Returns the contents of the cell as a reference to an arrow array. @@ -272,7 +311,7 @@ impl DataCell { /// might prevent the datastore from releasing memory from garbage collected data. #[inline] pub fn as_arrow_ref(&self) -> &dyn arrow2::array::Array { - &*self.values + &*self.inner.values } /// Returns the contents of the cell as an arrow array (shallow clone) wrapped in a unit-length @@ -288,10 +327,10 @@ impl DataCell { // TODO(cmc): effectively, this returns a `DataColumn`... think about that. #[doc(hidden)] #[inline] - pub fn as_arrow_monolist(&self) -> Box { + pub fn to_arrow_monolist(&self) -> Box { use arrow2::{array::ListArray, offset::Offsets}; - let values = self.as_arrow(); + let values = self.to_arrow(); let datatype = self.datatype().clone(); let datatype = ListArray::::default_datatype(datatype); @@ -309,28 +348,58 @@ impl DataCell { // // TODO(#1694): There shouldn't need to be HRTBs (Higher-Rank Trait Bounds) here. #[inline] - pub fn try_as_native( + pub fn try_to_native( &self, ) -> DataCellResult + '_> where for<'a> &'a C::ArrayType: IntoIterator, { use arrow2_convert::deserialize::arrow_array_deserialize_iterator; - arrow_array_deserialize_iterator(&*self.values).map_err(Into::into) + arrow_array_deserialize_iterator(&*self.inner.values).map_err(Into::into) } /// Returns the contents of the cell as an iterator of native components. /// /// Panics if the underlying arrow data cannot be deserialized into `C`. - /// See [`Self::try_as_native`] for a fallible alternative. + /// See [`Self::try_to_native`] for a fallible alternative. // // TODO(#1694): There shouldn't need to be HRTBs here. #[inline] - pub fn as_native(&self) -> impl Iterator + '_ + pub fn to_native(&self) -> impl Iterator + '_ + where + for<'a> &'a C::ArrayType: IntoIterator, + { + self.try_to_native().unwrap() + } + + /// Returns the contents of the cell as an iterator of native optional components. + /// + /// Fails if the underlying arrow data cannot be deserialized into `C`. + // + // TODO(#1694): There shouldn't need to be HRTBs (Higher-Rank Trait Bounds) here. + #[inline] + pub fn try_to_native_opt( + &self, + ) -> DataCellResult> + '_> where for<'a> &'a C::ArrayType: IntoIterator, { - self.try_as_native().unwrap() + use arrow2_convert::deserialize::arrow_array_deserialize_iterator; + arrow_array_deserialize_iterator(&*self.inner.values).map_err(Into::into) + } + + /// Returns the contents of the cell as an iterator of native optional components. + /// + /// Panics if the underlying arrow data cannot be deserialized into `C`. + /// See [`Self::try_to_native_opt`] for a fallible alternative. + // + // TODO(#1694): There shouldn't need to be HRTBs here. + #[inline] + pub fn to_native_opt(&self) -> impl Iterator> + '_ + where + for<'a> &'a C::ArrayType: IntoIterator, + { + self.try_to_native_opt().unwrap() } } @@ -338,24 +407,24 @@ impl DataCell { /// The name of the component type stored in the cell. #[inline] pub fn component_name(&self) -> ComponentName { - self.name + self.inner.name } /// The type of the component stored in the cell, i.e. the cell is an array of that type. #[inline] pub fn datatype(&self) -> &arrow2::datatypes::DataType { - self.values.data_type() + self.inner.values.data_type() } /// The length of the cell's array, i.e. how many component instances are in the cell? #[inline] pub fn num_instances(&self) -> u32 { - self.values.len() as _ + self.inner.values.len() as _ } #[inline] pub fn is_empty(&self) -> bool { - self.values.is_empty() + self.inner.values.is_empty() } /// Returns `true` if the underlying array is dense (no nulls). @@ -375,7 +444,6 @@ impl DataCell { pub fn is_sorted_and_unique(&self) -> DataCellResult { use arrow2::{ array::{Array, PrimitiveArray}, - datatypes::DataType, types::NativeType, }; @@ -435,11 +503,283 @@ impl From<&Vec> for DataCell { impl std::fmt::Display for DataCell { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "DataCell({})", + re_format::format_bytes(self.total_size_bytes() as _) + ))?; re_format::arrow::format_table( // NOTE: wrap in a ListArray so that it looks more cell-like (i.e. single row) - [&*self.as_arrow_monolist()], + [&*self.to_arrow_monolist()], [self.component_name()], ) .fmt(f) } } + +// --- + +impl DataCell { + /// Compute and cache the total size (stack + heap) of the inner cell and its underlying arrow + /// array, in bytes. + /// This does nothing if the size has already been computed and cached before. + /// + /// The caller must the sole owner of this cell, as this requires mutating an `Arc` under the + /// hood. Returns false otherwise. + /// + /// Beware: this is _very_ costly! + #[inline] + pub fn compute_size_bytes(&mut self) -> bool { + if let Some(inner) = Arc::get_mut(&mut self.inner) { + inner.compute_size_bytes(); + return true; + } + false + } +} + +impl SizeBytes for DataCell { + #[inline] + fn heap_size_bytes(&self) -> u64 { + (self.inner.size_bytes > 0) + .then_some(self.inner.size_bytes) + .unwrap_or_else(|| { + re_log::warn_once!( + "called `DataCell::heap_size_bytes() without computing it first" + ); + 0 + }) + } +} + +impl DataCellInner { + /// Compute and cache the total size (stack + heap) of the cell and its underlying arrow array, + /// in bytes. + /// This does nothing if the size has already been computed and cached before. + /// + /// Beware: this is _very_ costly! + #[inline] + pub fn compute_size_bytes(&mut self) { + let Self { + name, + size_bytes, + values, + } = self; + + // NOTE: The computed size cannot ever be zero. + if *size_bytes > 0 { + return; + } + + *size_bytes = name.total_size_bytes() + + size_bytes.total_size_bytes() + + values.data_type().total_size_bytes() + + std::mem::size_of_val(values) as u64 + + arrow2::compute::aggregate::estimated_bytes_size(&**values) as u64; + } +} + +#[test] +fn data_cell_sizes() { + use crate::{component_types::InstanceKey, Component as _}; + use arrow2::array::UInt64Array; + + // not computed + { + let cell = DataCell::from_arrow(InstanceKey::name(), UInt64Array::from_vec(vec![]).boxed()); + assert_eq!(0, cell.heap_size_bytes()); + assert_eq!(0, cell.heap_size_bytes()); + } + + // zero-sized + { + let mut cell = + DataCell::from_arrow(InstanceKey::name(), UInt64Array::from_vec(vec![]).boxed()); + cell.compute_size_bytes(); + + assert_eq!(112, cell.heap_size_bytes()); + assert_eq!(112, cell.heap_size_bytes()); + } + + // anything else + { + let mut cell = DataCell::from_arrow( + InstanceKey::name(), + UInt64Array::from_vec(vec![1, 2, 3]).boxed(), + ); + cell.compute_size_bytes(); + + // zero-sized + 3x u64s + assert_eq!(136, cell.heap_size_bytes()); + assert_eq!(136, cell.heap_size_bytes()); + } +} + +// This test exists because the documentation and online discussions revolving around +// arrow2's `estimated_bytes_size()` function indicate that there's a lot of limitations and +// edge cases to be aware of. +// +// Also, it's just plain hard to be sure that the answer you get is the answer you're looking +// for with these kinds of tools. When in doubt.. test everything we're going to need from it. +// +// In many ways, this is a specification of what we mean when we ask "what's the size of this +// Arrow array?". +#[test] +#[allow(clippy::from_iter_instead_of_collect)] +fn test_arrow_estimated_size_bytes() { + use arrow2::{ + array::{Array, Float64Array, ListArray, StructArray, UInt64Array, Utf8Array}, + compute::aggregate::estimated_bytes_size, + datatypes::{DataType, Field}, + offset::Offsets, + }; + + // empty primitive array + { + let data = vec![]; + let array = UInt64Array::from_vec(data.clone()).boxed(); + let sz = estimated_bytes_size(&*array); + assert_eq!(0, sz); + assert_eq!(std::mem::size_of_val(data.as_slice()), sz); + } + + // simple primitive array + { + let data = vec![42u64; 100]; + let array = UInt64Array::from_vec(data.clone()).boxed(); + assert_eq!( + std::mem::size_of_val(data.as_slice()), + estimated_bytes_size(&*array) + ); + } + + // utf8 strings array + { + let data = vec![Some("some very, very, very long string indeed"); 100]; + let array = Utf8Array::::from(data.clone()).to_boxed(); + + let raw_size_bytes = data + .iter() + // headers + bodies! + .map(|s| std::mem::size_of_val(s) + std::mem::size_of_val(s.unwrap().as_bytes())) + .sum::(); + let arrow_size_bytes = estimated_bytes_size(&*array); + + assert_eq!(5600, raw_size_bytes); + assert_eq!(4404, arrow_size_bytes); // smaller because validity bitmaps instead of opts + } + + // simple primitive list array + { + let data = std::iter::repeat(vec![42u64; 100]) + .take(50) + .collect::>(); + let array = { + let array_flattened = + UInt64Array::from_vec(data.clone().into_iter().flatten().collect()).boxed(); + + ListArray::::new( + ListArray::::default_datatype(DataType::UInt64), + Offsets::try_from_lengths(std::iter::repeat(50).take(50)) + .unwrap() + .into(), + array_flattened, + None, + ) + .boxed() + }; + + let raw_size_bytes = data + .iter() + // headers + bodies! + .map(|s| std::mem::size_of_val(s) + std::mem::size_of_val(s.as_slice())) + .sum::(); + let arrow_size_bytes = estimated_bytes_size(&*array); + + assert_eq!(41200, raw_size_bytes); + assert_eq!(40200, arrow_size_bytes); // smaller because smaller inner headers + } + + // compound type array + { + #[derive(Clone, Copy)] + struct Point { + x: f64, + y: f64, + } + + impl Default for Point { + fn default() -> Self { + Self { x: 42.0, y: 666.0 } + } + } + + let data = vec![Point::default(); 100]; + let array = { + let x = Float64Array::from_vec(data.iter().map(|p| p.x).collect()).boxed(); + let y = Float64Array::from_vec(data.iter().map(|p| p.y).collect()).boxed(); + let fields = vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + ]; + StructArray::new(DataType::Struct(fields), vec![x, y], None).boxed() + }; + + let raw_size_bytes = std::mem::size_of_val(data.as_slice()); + let arrow_size_bytes = estimated_bytes_size(&*array); + + assert_eq!(1600, raw_size_bytes); + assert_eq!(1600, arrow_size_bytes); + } + + // compound type list array + { + #[derive(Clone, Copy)] + struct Point { + x: f64, + y: f64, + } + + impl Default for Point { + fn default() -> Self { + Self { x: 42.0, y: 666.0 } + } + } + + let data = std::iter::repeat(vec![Point::default(); 100]) + .take(50) + .collect::>(); + let array: Box = { + let array = { + let x = + Float64Array::from_vec(data.iter().flatten().map(|p| p.x).collect()).boxed(); + let y = + Float64Array::from_vec(data.iter().flatten().map(|p| p.y).collect()).boxed(); + let fields = vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + ]; + StructArray::new(DataType::Struct(fields), vec![x, y], None) + }; + + ListArray::::new( + ListArray::::default_datatype(array.data_type().clone()), + Offsets::try_from_lengths(std::iter::repeat(50).take(50)) + .unwrap() + .into(), + array.boxed(), + None, + ) + .boxed() + }; + + let raw_size_bytes = data + .iter() + // headers + bodies! + .map(|s| std::mem::size_of_val(s) + std::mem::size_of_val(s.as_slice())) + .sum::(); + let arrow_size_bytes = estimated_bytes_size(&*array); + + assert_eq!(81200, raw_size_bytes); + assert_eq!(80200, arrow_size_bytes); // smaller because smaller inner headers + } +} diff --git a/crates/re_log_types/src/data_row.rs b/crates/re_log_types/src/data_row.rs index 96a9c438f897..6cee609fa8fb 100644 --- a/crates/re_log_types/src/data_row.rs +++ b/crates/re_log_types/src/data_row.rs @@ -2,7 +2,9 @@ use ahash::HashSetExt; use nohash_hasher::IntSet; use smallvec::SmallVec; -use crate::{ComponentName, DataCell, DataCellError, DataTable, EntityPath, MsgId, TimePoint}; +use crate::{ + ComponentName, DataCell, DataCellError, DataTable, EntityPath, SizeBytes, TableId, TimePoint, +}; // --- @@ -87,6 +89,68 @@ impl std::ops::IndexMut for DataCellRow { // --- +/// A unique ID for a [`DataRow`]. +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + arrow2_convert::ArrowField, + arrow2_convert::ArrowSerialize, + arrow2_convert::ArrowDeserialize, +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +#[arrow_field(transparent)] +pub struct RowId(pub(crate) re_tuid::Tuid); + +impl std::fmt::Display for RowId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl RowId { + pub const ZERO: Self = Self(re_tuid::Tuid::ZERO); + + #[inline] + pub fn random() -> Self { + Self(re_tuid::Tuid::random()) + } + + /// Temporary utility while we transition to batching. See #1619. + #[doc(hidden)] + pub fn into_table_id(self) -> TableId { + TableId(self.0) + } +} + +impl SizeBytes for RowId { + #[inline] + fn heap_size_bytes(&self) -> u64 { + 0 + } +} + +impl std::ops::Deref for RowId { + type Target = re_tuid::Tuid; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::DerefMut for RowId { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + /// A row's worth of data, i.e. an event: a list of [`DataCell`]s associated with an auto-generated /// `RowId`, a user-specified [`TimePoint`] and [`EntityPath`], and an expected number of /// instances. @@ -134,11 +198,11 @@ impl std::ops::IndexMut for DataCellRow { /// /// ```rust /// # use re_log_types::{ -/// # component_types::{ColorRGBA, Label, MsgId, Point2D}, -/// # DataRow, Timeline, +/// # component_types::{ColorRGBA, Label, Point2D}, +/// # DataRow, RowId, Timeline, /// # }; /// # -/// # let row_id = MsgId::ZERO; +/// # let row_id = RowId::ZERO; /// # let timepoint = [ /// # (Timeline::new_sequence("frame_nr"), 42.into()), // /// # (Timeline::new_sequence("clock"), 666.into()), // @@ -162,8 +226,7 @@ impl std::ops::IndexMut for DataCellRow { pub struct DataRow { /// Auto-generated `TUID`, uniquely identifying this event and keeping track of the client's /// wall-clock. - // TODO(#1619): introduce RowId & TableId - pub row_id: MsgId, + pub row_id: RowId, /// User-specified [`TimePoint`] for this event. pub timepoint: TimePoint, @@ -190,7 +253,7 @@ impl DataRow { /// - one or more cell isn't 0, 1 or `num_instances` long, /// - two or more cells share the same component type. pub fn try_from_cells( - row_id: MsgId, + row_id: RowId, timepoint: impl Into, entity_path: impl Into, num_instances: u32, @@ -226,26 +289,13 @@ impl DataRow { } } - let mut this = Self { + Ok(Self { row_id, entity_path, timepoint, num_instances, cells, - }; - - // TODO(cmc): Since we don't yet support mixing splatted data within instanced rows, - // we need to craft an array of `MsgId`s that matches the length of the other components. - // TODO(#1619): This goes away once the store supports the new control columns - use crate::Component as _; - if !components.contains(&MsgId::name()) { - let num_instances = this.num_instances(); - this.cells.0.push(DataCell::from_native( - vec![row_id; num_instances as _].iter(), - )); - } - - Ok(this) + }) } /// Builds a new `DataRow` from an iterable of [`DataCell`]s. @@ -256,7 +306,7 @@ impl DataRow { /// /// See [`Self::try_from_cells`] for the fallible alternative. pub fn from_cells( - row_id: MsgId, + row_id: RowId, timepoint: impl Into, entity_path: impl Into, num_instances: u32, @@ -271,13 +321,13 @@ impl DataRow { #[doc(hidden)] #[inline] pub fn into_table(self) -> DataTable { - DataTable::from_rows(self.row_id, [self]) + DataTable::from_rows(self.row_id.into_table_id(), [self]) } } impl DataRow { #[inline] - pub fn row_id(&self) -> MsgId { + pub fn row_id(&self) -> RowId { self.row_id } @@ -297,7 +347,7 @@ impl DataRow { } #[inline] - pub fn components(&self) -> impl ExactSizeIterator + '_ { + pub fn component_names(&self) -> impl ExactSizeIterator + '_ { self.cells.iter().map(|cell| cell.component_name()) } @@ -326,13 +376,25 @@ impl DataRow { .map(|cell| cell.component_name()) .position(|name| name == *component) } + + /// Compute and cache the total (heap) allocated size of each individual underlying + /// [`DataCell`]. + /// This does nothing for cells whose size has already been computed and cached before. + /// + /// Beware: this is _very_ costly! + #[inline] + pub fn compute_all_size_bytes(&mut self) { + for cell in &mut self.cells.0 { + cell.compute_size_bytes(); + } + } } // --- impl DataRow { pub fn from_cells1( - row_id: MsgId, + row_id: RowId, entity_path: impl Into, timepoint: impl Into, num_instances: u32, @@ -351,7 +413,7 @@ impl DataRow { } pub fn try_from_cells1( - row_id: MsgId, + row_id: RowId, entity_path: impl Into, timepoint: impl Into, num_instances: u32, @@ -371,7 +433,7 @@ impl DataRow { } pub fn from_cells2( - row_id: MsgId, + row_id: RowId, entity_path: impl Into, timepoint: impl Into, num_instances: u32, @@ -394,7 +456,7 @@ impl DataRow { } pub fn try_from_cells2( - row_id: MsgId, + row_id: RowId, entity_path: impl Into, timepoint: impl Into, num_instances: u32, @@ -419,7 +481,7 @@ impl DataRow { } pub fn from_cells3( - row_id: MsgId, + row_id: RowId, entity_path: impl Into, timepoint: impl Into, num_instances: u32, @@ -444,7 +506,7 @@ impl DataRow { } pub fn try_from_cells3( - row_id: MsgId, + row_id: RowId, entity_path: impl Into, timepoint: impl Into, num_instances: u32, @@ -482,7 +544,7 @@ impl std::fmt::Display for DataRow { } re_format::arrow::format_table( - self.cells.iter().map(|cell| cell.as_arrow_monolist()), + self.cells.iter().map(|cell| cell.to_arrow_monolist()), self.cells.iter().map(|cell| cell.component_name()), ) .fmt(f) @@ -502,7 +564,7 @@ mod tests { #[test] fn data_row_error_num_instances() { - let row_id = MsgId::ZERO; + let row_id = RowId::ZERO; let timepoint = TimePoint::timeless(); let num_instances = 2; @@ -549,7 +611,7 @@ mod tests { #[test] fn data_row_error_duped_components() { - let row_id = MsgId::ZERO; + let row_id = RowId::ZERO; let timepoint = TimePoint::timeless(); let points: &[Point2D] = &[[10.0, 10.0].into(), [20.0, 20.0].into()]; diff --git a/crates/re_log_types/src/data_table.rs b/crates/re_log_types/src/data_table.rs index 6183c31f1a86..adc9df0e0344 100644 --- a/crates/re_log_types/src/data_table.rs +++ b/crates/re_log_types/src/data_table.rs @@ -1,11 +1,13 @@ +use std::collections::BTreeMap; + use ahash::HashMap; use itertools::Itertools as _; use nohash_hasher::{IntMap, IntSet}; use smallvec::SmallVec; use crate::{ - ArrowMsg, ComponentName, DataCell, DataCellError, DataRow, DataRowError, EntityPath, MsgId, - TimePoint, + ArrowMsg, ComponentName, DataCell, DataCellError, DataRow, DataRowError, EntityPath, RowId, + SizeBytes, TimePoint, Timeline, }; // --- @@ -15,6 +17,11 @@ pub enum DataTableError { #[error("Trying to deserialize data that is missing a column present in the schema: {0:?}")] MissingColumn(String), + #[error( + "Trying to deserialize time column data with invalid datatype: {name:?} ({datatype:#?})" + )] + NotATimeColumn { name: String, datatype: DataType }, + #[error("Trying to deserialize column data that doesn't contain any ListArrays: {0:?}")] NotAColumn(String), @@ -36,11 +43,19 @@ pub type DataTableResult = ::std::result::Result; // --- -type RowIdVec = SmallVec<[MsgId; 4]>; -type TimePointVec = SmallVec<[TimePoint; 4]>; -type EntityPathVec = SmallVec<[EntityPath; 4]>; -type NumInstancesVec = SmallVec<[u32; 4]>; -type DataCellOptVec = SmallVec<[Option; 4]>; +pub type RowIdVec = SmallVec<[RowId; 4]>; + +pub type TimeOptVec = SmallVec<[Option; 4]>; + +pub type TimePointVec = SmallVec<[TimePoint; 4]>; + +pub type ErasedTimeVec = SmallVec<[i64; 4]>; + +pub type EntityPathVec = SmallVec<[EntityPath; 4]>; + +pub type NumInstancesVec = SmallVec<[u32; 4]>; + +pub type DataCellOptVec = SmallVec<[Option; 4]>; /// A column's worth of [`DataCell`]s: a sparse collection of [`DataCell`]s that share the same /// underlying type and likely point to shared, contiguous memory. @@ -58,6 +73,8 @@ impl std::ops::Deref for DataCellColumn { } } +// TODO(cmc): Those Deref don't actually do their job most of the time for some reason... + impl std::ops::DerefMut for DataCellColumn { #[inline] fn deref_mut(&mut self) -> &mut Self::Target { @@ -81,8 +98,95 @@ impl std::ops::IndexMut for DataCellColumn { } } +impl DataCellColumn { + #[inline] + pub fn empty(num_rows: usize) -> Self { + Self(smallvec::smallvec![None; num_rows]) + } + + /// Compute and cache the size of each individual underlying [`DataCell`]. + /// This does nothing for cells whose size has already been computed and cached before. + /// + /// Beware: this is _very_ costly! + #[inline] + pub fn compute_all_size_bytes(&mut self) { + for cell in &mut self.0 { + cell.as_mut().map(|cell| cell.compute_size_bytes()); + } + } +} + +impl SizeBytes for DataCellColumn { + #[inline] + fn heap_size_bytes(&self) -> u64 { + self.0.heap_size_bytes() + } +} + // --- +/// A unique ID for a [`DataTable`]. +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + arrow2_convert::ArrowField, + arrow2_convert::ArrowSerialize, + arrow2_convert::ArrowDeserialize, +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +#[arrow_field(transparent)] +pub struct TableId(pub(crate) re_tuid::Tuid); + +impl std::fmt::Display for TableId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl TableId { + pub const ZERO: Self = Self(re_tuid::Tuid::ZERO); + + #[inline] + pub fn random() -> Self { + Self(re_tuid::Tuid::random()) + } + + /// Temporary utility while we transition to batching. See #1619. + #[doc(hidden)] + pub fn into_row_id(self) -> RowId { + RowId(self.0) + } +} + +impl SizeBytes for TableId { + #[inline] + fn heap_size_bytes(&self) -> u64 { + 0 + } +} + +impl std::ops::Deref for TableId { + type Target = re_tuid::Tuid; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::DerefMut for TableId { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + /// A sparse table's worth of data, i.e. a batch of events: a collection of [`DataRow`]s. /// This is the top-level layer in our data model. /// @@ -121,18 +225,18 @@ impl std::ops::IndexMut for DataCellColumn { /// let points: &[Point2D] = &[[10.0, 10.0].into(), [20.0, 20.0].into()]; /// let colors: &[_] = &[ColorRGBA::from_rgb(128, 128, 128)]; /// let labels: &[Label] = &[]; -/// DataRow::from_cells3(MsgId::random(), "a", timepoint(1, 1), num_instances, (points, colors, labels)) +/// DataRow::from_cells3(RowId::random(), "a", timepoint(1, 1), num_instances, (points, colors, labels)) /// }; /// let row1 = { /// let num_instances = 0; /// let colors: &[ColorRGBA] = &[]; -/// DataRow::from_cells1(MsgId::random(), "b", timepoint(1, 2), num_instances, colors) +/// DataRow::from_cells1(RowId::random(), "b", timepoint(1, 2), num_instances, colors) /// }; /// let row2 = { /// let num_instances = 1; /// let colors: &[_] = &[ColorRGBA::from_rgb(255, 255, 255)]; /// let labels: &[_] = &[Label("hey".into())]; -/// DataRow::from_cells2(MsgId::random(), "c", timepoint(2, 1), num_instances, (colors, labels)) +/// DataRow::from_cells2(RowId::random(), "c", timepoint(2, 1), num_instances, (colors, labels)) /// }; /// let table = DataTable::from_rows(table_id, [row0, row1, row2]); /// ``` @@ -142,26 +246,26 @@ impl std::ops::IndexMut for DataCellColumn { /// /// The table above translates to the following, where each column is contiguous in memory: /// ```text -/// β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -/// β”‚ rerun.row_id ┆ rerun.timepoint ┆ rerun.entity_path ┆ rerun.num_instances ┆ rerun.label ┆ rerun.point2d ┆ rerun.colorrgba β”‚ -/// β•žβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•ͺ═══════════════════════════════════β•ͺ════════════════════β•ͺ═════════════════════β•ͺ═════════════β•ͺ══════════════════════════════════β•ͺ═════════════════║ -/// β”‚ {167967218, 54449486} ┆ [{frame_nr, 1, 1}, {clock, 1, 1}] ┆ a ┆ 2 ┆ [] ┆ [{x: 10, y: 10}, {x: 20, y: 20}] ┆ [2155905279] β”‚ -/// β”œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”€ -/// β”‚ {167967218, 54449486} ┆ [{frame_nr, 1, 1}, {clock, 1, 2}] ┆ b ┆ 0 ┆ - ┆ - ┆ [] β”‚ -/// β”œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”€ -/// β”‚ {167967218, 54449486} ┆ [{frame_nr, 1, 2}, {clock, 1, 1}] ┆ c ┆ 1 ┆ [hey] ┆ - ┆ [4294967295] β”‚ -/// β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +/// β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +/// β”‚ frame_nr ┆ log_time ┆ rerun.row_id ┆ rerun.entity_path ┆ rerun.num_instances ┆ rerun.label ┆ rerun.point2d ┆ rerun.colorrgba β”‚ +/// β•žβ•β•β•β•β•β•β•β•β•β•β•ͺ═══════════════════════════════β•ͺ══════════════════════════════════β•ͺ═══════════════════β•ͺ═════════════════════β•ͺ═════════════β•ͺ══════════════════════════════════β•ͺ═════════════════║ +/// β”‚ 1 ┆ 2023-04-05 09:36:47.188796402 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ a ┆ 2 ┆ [] ┆ [{x: 10, y: 10}, {x: 20, y: 20}] ┆ [2155905279] β”‚ +/// β”œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”€ +/// β”‚ 1 ┆ 2023-04-05 09:36:47.188852222 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ b ┆ 0 ┆ - ┆ - ┆ [] β”‚ +/// β”œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”€ +/// β”‚ 2 ┆ 2023-04-05 09:36:47.188855872 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ c ┆ 1 ┆ [hey] ┆ - ┆ [4294967295] β”‚ +/// β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ /// ``` /// /// ## Example /// /// ```rust /// # use re_log_types::{ -/// # component_types::{ColorRGBA, Label, MsgId, Point2D}, -/// # DataRow, DataTable, Timeline, TimePoint, +/// # component_types::{ColorRGBA, Label, Point2D}, +/// # DataRow, DataTable, RowId, TableId, Timeline, TimePoint, /// # }; /// # -/// # let table_id = MsgId::ZERO; // not used (yet) +/// # let table_id = TableId::random(); /// # /// # let timepoint = |frame_nr: i64, clock: i64| { /// # TimePoint::from([ @@ -177,7 +281,7 @@ impl std::ops::IndexMut for DataCellColumn { /// let labels: &[Label] = &[]; /// /// DataRow::from_cells3( -/// MsgId::random(), +/// RowId::random(), /// "a", /// timepoint(1, 1), /// num_instances, @@ -189,7 +293,7 @@ impl std::ops::IndexMut for DataCellColumn { /// let num_instances = 0; /// let colors: &[ColorRGBA] = &[]; /// -/// DataRow::from_cells1(MsgId::random(), "b", timepoint(1, 2), num_instances, colors) +/// DataRow::from_cells1(RowId::random(), "b", timepoint(1, 2), num_instances, colors) /// }; /// /// let row2 = { @@ -198,7 +302,7 @@ impl std::ops::IndexMut for DataCellColumn { /// let labels: &[_] = &[Label("hey".into())]; /// /// DataRow::from_cells2( -/// MsgId::random(), +/// RowId::random(), /// "c", /// timepoint(2, 1), /// num_instances, @@ -218,25 +322,32 @@ impl std::ops::IndexMut for DataCellColumn { /// # /// # assert_eq!(table_in, table_out); /// ``` -// TODO(#1619): introduce RowId & TableId #[derive(Debug, Clone, PartialEq)] pub struct DataTable { /// Auto-generated `TUID`, uniquely identifying this batch of data and keeping track of the /// client's wall-clock. - // TODO(#1619): use once batching lands - pub table_id: MsgId, + pub table_id: TableId, /// The entire column of `RowId`s. - pub row_id: RowIdVec, + /// + /// Keeps track of the unique identifier for each row that was generated by the clients. + pub col_row_id: RowIdVec, - /// The entire column of [`TimePoint`]s. - pub timepoint: TimePointVec, + /// All the rows for all the time columns. + /// + /// The times are optional since not all rows are guaranteed to have a timestamp for every + /// single timeline (though it is highly likely to be the case in practice). + pub col_timelines: BTreeMap, /// The entire column of [`EntityPath`]s. - pub entity_path: EntityPathVec, + /// + /// The entity each row relates to, respectively. + pub col_entity_path: EntityPathVec, /// The entire column of `num_instances`. - pub num_instances: NumInstancesVec, + /// + /// Keeps track of the expected number of instances in each row. + pub col_num_instances: NumInstancesVec, /// All the rows for all the component columns. /// @@ -247,19 +358,19 @@ pub struct DataTable { impl DataTable { /// Creates a new empty table with the given ID. - pub fn new(table_id: MsgId) -> Self { + pub fn new(table_id: TableId) -> Self { Self { table_id, - row_id: Default::default(), - timepoint: Default::default(), - entity_path: Default::default(), - num_instances: Default::default(), + col_row_id: Default::default(), + col_timelines: Default::default(), + col_entity_path: Default::default(), + col_num_instances: Default::default(), columns: Default::default(), } } /// Builds a new `DataTable` from an iterable of [`DataRow`]s. - pub fn from_rows(table_id: MsgId, rows: impl IntoIterator) -> Self { + pub fn from_rows(table_id: TableId, rows: impl IntoIterator) -> Self { crate::profile_function!(); let rows = rows.into_iter(); @@ -267,7 +378,7 @@ impl DataTable { // Explode all rows into columns, and keep track of which components are involved. let mut components = IntSet::default(); #[allow(clippy::type_complexity)] - let (row_id, timepoint, entity_path, num_instances, column): ( + let (col_row_id, col_timepoint, col_entity_path, col_num_instances, column): ( RowIdVec, TimePointVec, EntityPathVec, @@ -275,7 +386,7 @@ impl DataTable { Vec<_>, ) = rows .map(|row| { - components.extend(row.components()); + components.extend(row.component_names()); let DataRow { row_id, timepoint, @@ -287,6 +398,31 @@ impl DataTable { }) .multiunzip(); + // All time columns. + let mut col_timelines: BTreeMap = BTreeMap::default(); + for (i, timepoint) in col_timepoint.iter().enumerate() { + for (timeline, time) in timepoint.iter() { + match col_timelines.entry(*timeline) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry + .insert(smallvec::smallvec![None; i]) + .push(Some(time.as_i64())); + } + std::collections::btree_map::Entry::Occupied(mut entry) => { + let entry = entry.get_mut(); + entry.push(Some(time.as_i64())); + } + } + } + + // handle potential sparseness + for (timeline, col_time) in &mut col_timelines { + if timepoint.get(timeline).is_none() { + col_time.push(None); + } + } + } + // Pre-allocate all columns (one per component). let mut columns = IntMap::default(); for component in components { @@ -305,7 +441,7 @@ impl DataTable { } } - if row_id.len() > 1 { + if col_row_id.len() > 1 { re_log::warn_once!( "batching features are not ready for use, use single-row data tables instead!" ); @@ -313,10 +449,10 @@ impl DataTable { Self { table_id, - row_id, - timepoint, - entity_path, - num_instances, + col_row_id, + col_timelines, + col_entity_path, + col_num_instances, columns, } } @@ -325,19 +461,19 @@ impl DataTable { impl DataTable { #[inline] pub fn num_rows(&self) -> u32 { - self.row_id.len() as _ + self.col_row_id.len() as _ } #[inline] - pub fn as_rows(&self) -> impl ExactSizeIterator + '_ { + pub fn to_rows(&self) -> impl ExactSizeIterator + '_ { let num_rows = self.num_rows() as usize; let Self { table_id: _, - row_id, - timepoint, - entity_path, - num_instances, + col_row_id, + col_timelines, + col_entity_path, + col_num_instances, columns, } = self; @@ -347,10 +483,17 @@ impl DataTable { .filter_map(|rows| rows[i].clone() /* shallow */); DataRow::from_cells( - row_id[i], - timepoint[i].clone(), - entity_path[i].clone(), - num_instances[i], + col_row_id[i], + TimePoint::from( + col_timelines + .iter() + .filter_map(|(timeline, times)| { + times[i].map(|time| (*timeline, time.into())) + }) + .collect::>(), + ), + col_entity_path[i].clone(), + col_num_instances[i], cells, ) }) @@ -360,20 +503,58 @@ impl DataTable { /// and returns the corresponding [`TimePoint`]. #[inline] pub fn timepoint_max(&self) -> TimePoint { - self.timepoint - .iter() - .fold(TimePoint::timeless(), |acc, tp| acc.union_max(tp)) + let mut timepoint = TimePoint::timeless(); + for (timeline, col_time) in &self.col_timelines { + if let Some(time) = col_time.iter().flatten().max().copied() { + timepoint.insert(*timeline, time.into()); + } + } + timepoint + } + + /// Compute and cache the total (heap) allocated size of each individual underlying + /// [`DataCell`]. + /// This does nothing for cells whose size has already been computed and cached before. + /// + /// Beware: this is _very_ costly! + #[inline] + pub fn compute_all_size_bytes(&mut self) { + for column in self.columns.values_mut() { + column.compute_all_size_bytes(); + } + } +} + +impl SizeBytes for DataTable { + #[inline] + fn heap_size_bytes(&self) -> u64 { + let Self { + table_id, + col_row_id, + col_timelines, + col_entity_path, + col_num_instances, + columns, + } = self; + + table_id.heap_size_bytes() + + col_row_id.heap_size_bytes() + + col_timelines.heap_size_bytes() + + col_entity_path.heap_size_bytes() + + col_num_instances.heap_size_bytes() + + columns.heap_size_bytes() } } // --- Serialization --- use arrow2::{ - array::{Array, ListArray}, + array::{Array, ListArray, PrimitiveArray}, bitmap::Bitmap, chunk::Chunk, - datatypes::{DataType, Field, Schema}, + datatypes::{DataType, Field, Schema, TimeUnit}, offset::Offsets, + types::NativeType, }; use arrow2_convert::{ deserialize::TryIntoCollection, field::ArrowField, serialize::ArrowSerialize, @@ -382,6 +563,7 @@ use arrow2_convert::{ // TODO(#1696): Those names should come from the datatypes themselves. +pub const COLUMN_INSERT_ID: &str = "rerun.insert_id"; pub const COLUMN_ROW_ID: &str = "rerun.row_id"; pub const COLUMN_TIMEPOINT: &str = "rerun.timepoint"; pub const COLUMN_ENTITY_PATH: &str = "rerun.entity_path"; @@ -390,6 +572,7 @@ pub const COLUMN_NUM_INSTANCES: &str = "rerun.num_instances"; pub const METADATA_KIND: &str = "rerun.kind"; pub const METADATA_KIND_DATA: &str = "data"; pub const METADATA_KIND_CONTROL: &str = "control"; +pub const METADATA_KIND_TIME: &str = "time"; pub const METADATA_TABLE_ID: &str = "rerun.table_id"; impl DataTable { @@ -400,7 +583,10 @@ impl DataTable { /// * Control columns are those that drive the behavior of the storage systems. /// They are always present, always dense, and always deserialized upon reception by the /// server. - /// * Data columns are the one that hold component data. + /// Internally, time columns are (de)serialized separately from the rest of the control + /// columns for efficiency/QOL concerns: that doesn't change the fact that they are control + /// columns all the same! + /// * Data columns are the ones that hold component data. /// They are optional, potentially sparse, and never deserialized on the server-side (not by /// the storage systems, at least). pub fn serialize(&self) -> DataTableResult<(Schema, Chunk>)> { @@ -409,6 +595,13 @@ impl DataTable { let mut schema = Schema::default(); let mut columns = Vec::new(); + { + let (control_schema, control_columns) = self.serialize_time_columns(); + schema.fields.extend(control_schema.fields); + schema.metadata.extend(control_schema.metadata); + columns.extend(control_columns.into_iter()); + } + { let (control_schema, control_columns) = self.serialize_control_columns()?; schema.fields.extend(control_schema.fields); @@ -426,6 +619,43 @@ impl DataTable { Ok((schema, Chunk::new(columns))) } + /// Serializes all time columns into an arrow payload and schema. + fn serialize_time_columns(&self) -> (Schema, Vec>) { + crate::profile_function!(); + + fn serialize_time_column( + timeline: Timeline, + times: &TimeOptVec, + ) -> (Field, Box) { + let data = PrimitiveArray::from(times.as_slice()).to(timeline.datatype()); + + let field = Field::new(timeline.name().as_str(), data.data_type().clone(), false) + .with_metadata([(METADATA_KIND.to_owned(), METADATA_KIND_TIME.to_owned())].into()); + + (field, data.boxed()) + } + + let Self { + table_id: _, + col_row_id: _, + col_timelines, + col_entity_path: _, + col_num_instances: _, + columns: _, + } = self; + + let mut schema = Schema::default(); + let mut columns = Vec::new(); + + for (timeline, col_time) in col_timelines { + let (time_field, time_column) = serialize_time_column(*timeline, col_time); + schema.fields.push(time_field); + columns.push(time_column); + } + + (schema, columns) + } + /// Serializes all controls columns into an arrow payload and schema. /// /// Control columns are those that drive the behavior of the storage systems. @@ -434,28 +664,47 @@ impl DataTable { fn serialize_control_columns(&self) -> DataTableResult<(Schema, Vec>)> { crate::profile_function!(); - /// Serializes an iterable of dense arrow-like data. - fn serialize_dense_column + 'static>( - name: &str, - values: &[C], - ) -> DataTableResult<(Field, Box)> { - let data: Box = values.try_into_arrow()?; - // let data = unit_values_to_unit_lists(data); + let Self { + table_id, + col_row_id, + col_timelines: _, + col_entity_path, + col_num_instances, + columns: _, + } = self; - let mut field = Field::new(name, data.data_type().clone(), false).with_metadata( - [(METADATA_KIND.to_owned(), METADATA_KIND_CONTROL.to_owned())].into(), - ); + let mut schema = Schema::default(); + let mut columns = Vec::new(); - // TODO(cmc): why do we have to do this manually on the way out, but it's done - // automatically on our behalf on the way in...? - if let DataType::Extension(name, _, _) = data.data_type() { - field - .metadata - .extend([("ARROW:extension:name".to_owned(), name.clone())]); - } + let (row_id_field, row_id_column) = + Self::serialize_control_column(COLUMN_ROW_ID, col_row_id)?; + schema.fields.push(row_id_field); + columns.push(row_id_column); - Ok((field, data)) - } + let (entity_path_field, entity_path_column) = + Self::serialize_control_column(COLUMN_ENTITY_PATH, col_entity_path)?; + schema.fields.push(entity_path_field); + columns.push(entity_path_column); + + let (num_instances_field, num_instances_column) = Self::serialize_primitive_column( + COLUMN_NUM_INSTANCES, + col_num_instances.as_slice(), + None, + )?; + schema.fields.push(num_instances_field); + columns.push(num_instances_column); + + schema.metadata = [(METADATA_TABLE_ID.into(), table_id.to_string())].into(); + + Ok((schema, columns)) + } + + /// Serializes a single control column: an iterable of dense arrow-like data. + pub fn serialize_control_column + 'static>( + name: &str, + values: &[C], + ) -> DataTableResult<(Field, Box)> { + crate::profile_function!(); /// Transforms an array of unit values into a list of unit arrays. /// @@ -473,41 +722,44 @@ impl DataTable { ListArray::::new(datatype, offsets, array, validity).boxed() } - let Self { - table_id, - row_id, - timepoint, - entity_path, - num_instances, - columns: _, - } = self; + let data: Box = values.try_into_arrow()?; + // let data = unit_values_to_unit_lists(data); - let mut schema = Schema::default(); - let mut columns = Vec::new(); + let mut field = Field::new(name, data.data_type().clone(), false) + .with_metadata([(METADATA_KIND.to_owned(), METADATA_KIND_CONTROL.to_owned())].into()); - let (row_id_field, row_id_column) = serialize_dense_column(COLUMN_ROW_ID, row_id)?; - schema.fields.push(row_id_field); - columns.push(row_id_column); + if let DataType::Extension(name, _, _) = data.data_type() { + field + .metadata + .extend([("ARROW:extension:name".to_owned(), name.clone())]); + } - let (timepoint_field, timepoint_column) = - serialize_dense_column(COLUMN_TIMEPOINT, timepoint)?; - schema.fields.push(timepoint_field); - columns.push(timepoint_column); + Ok((field, data)) + } - let (entity_path_field, entity_path_column) = - serialize_dense_column(COLUMN_ENTITY_PATH, entity_path)?; - schema.fields.push(entity_path_field); - columns.push(entity_path_column); + /// Serializes a single control column; optimized path for primitive datatypes. + pub fn serialize_primitive_column( + name: &str, + values: &[T], + datatype: Option, + ) -> DataTableResult<(Field, Box)> { + crate::profile_function!(); - // TODO(#1712): This is unnecessarily slow... - let (num_instances_field, num_instances_column) = - serialize_dense_column(COLUMN_NUM_INSTANCES, num_instances)?; - schema.fields.push(num_instances_field); - columns.push(num_instances_column); + let data = PrimitiveArray::from_slice(values); - schema.metadata = [(METADATA_TABLE_ID.into(), table_id.to_string())].into(); + let datatype = datatype.unwrap_or(data.data_type().clone()); + let data = data.to(datatype.clone()).boxed(); - Ok((schema, columns)) + let mut field = Field::new(name, datatype.clone(), false) + .with_metadata([(METADATA_KIND.to_owned(), METADATA_KIND_CONTROL.to_owned())].into()); + + if let DataType::Extension(name, _, _) = datatype { + field + .metadata + .extend([("ARROW:extension:name".to_owned(), name)]); + } + + Ok((field, data)) } /// Serializes all data columns into an arrow payload and schema. @@ -519,57 +771,56 @@ impl DataTable { let Self { table_id: _, - row_id: _, - timepoint: _, - entity_path: _, - num_instances: _, + col_row_id: _, + col_timelines: _, + col_entity_path: _, + col_num_instances: _, columns: table, } = self; let mut schema = Schema::default(); let mut columns = Vec::new(); - fn serialize_sparse_column( - name: &str, - column: &[Option], - ) -> DataTableResult<(Field, Box)> { - // TODO(cmc): All we're doing here is allocating and filling a nice contiguous array so - // our `ListArray`s can compute their indices and for the serializer to work with... - // In a far enough future, we could imagine having much finer grain control over the - // serializer and doing all of this at once, bypassing all the mem copies and - // allocations. - - let cell_refs = column - .iter() - .flatten() - .map(|cell| cell.as_arrow_ref()) - .collect_vec(); - - // NOTE: Avoid paying for the cost of the concatenation machinery if there's a single - // row in the column. - let data = if cell_refs.len() == 1 { - data_to_lists(column, cell_refs[0].to_boxed()) - } else { - // NOTE: This is a column of cells, it shouldn't ever fail to concatenate since - // they share the same underlying type. - let data = arrow2::compute::concatenate::concatenate(cell_refs.as_slice())?; - data_to_lists(column, data) - }; + for (component, rows) in table { + let (field, column) = Self::serialize_data_column(component.as_str(), rows)?; + schema.fields.push(field); + columns.push(column); + } - let field = Field::new(name, data.data_type().clone(), false) - .with_metadata([(METADATA_KIND.to_owned(), METADATA_KIND_DATA.to_owned())].into()); + Ok((schema, columns)) + } - Ok((field, data)) - } + /// Serializes a single data column. + pub fn serialize_data_column( + name: &str, + column: &[Option], + ) -> DataTableResult<(Field, Box)> { + crate::profile_function!(); /// Create a list-array out of a flattened array of cell values. /// /// * Before: `[C, C, C, C, C, C, C, ...]` /// * After: `ListArray[ [[C, C], [C, C, C], None, [C], [C], ...] ]` - fn data_to_lists(column: &[Option], data: Box) -> Box { + fn data_to_lists( + column: &[Option], + data: Box, + ext_name: Option, + ) -> Box { let datatype = data.data_type().clone(); - let datatype = ListArray::::default_datatype(datatype); + let field = { + let mut field = Field::new("item", datatype, true); + + if let Some(name) = ext_name { + field + .metadata + .extend([("ARROW:extension:name".to_owned(), name)]); + } + + field + }; + + let datatype = DataType::List(Box::new(field)); let offsets = Offsets::try_from_lengths(column.iter().map(|cell| { cell.as_ref() .map_or(0, |cell| cell.num_instances() as usize) @@ -584,25 +835,76 @@ impl DataTable { ListArray::::new(datatype, offsets, data, validity.into()).boxed() } - for (component, rows) in table { - let (field, column) = serialize_sparse_column(component.as_str(), rows)?; - schema.fields.push(field); - columns.push(column); - } + // TODO(cmc): All we're doing here is allocating and filling a nice contiguous array so + // our `ListArray`s can compute their indices and for the serializer to work with... + // In a far enough future, we could imagine having much finer grain control over the + // serializer and doing all of this at once, bypassing all the mem copies and + // allocations. - Ok((schema, columns)) + let cell_refs = column + .iter() + .flatten() + .map(|cell| cell.as_arrow_ref()) + .collect_vec(); + + let ext_name = cell_refs.first().and_then(|cell| match cell.data_type() { + DataType::Extension(name, _, _) => Some(name), + _ => None, + }); + + // NOTE: Avoid paying for the cost of the concatenation machinery if there's a single + // row in the column. + let data = if cell_refs.len() == 1 { + data_to_lists(column, cell_refs[0].to_boxed(), ext_name.cloned()) + } else { + // NOTE: This is a column of cells, it shouldn't ever fail to concatenate since + // they share the same underlying type. + let data = + arrow2::compute::concatenate::concatenate(cell_refs.as_slice()).map_err(|err| { + re_log::warn_once!("failed to concatenate cells for column {name}"); + err + })?; + data_to_lists(column, data, ext_name.cloned()) + }; + + let field = Field::new(name, data.data_type().clone(), false) + .with_metadata([(METADATA_KIND.to_owned(), METADATA_KIND_DATA.to_owned())].into()); + + Ok((field, data)) } } impl DataTable { /// Deserializes an entire table from an arrow payload and schema. pub fn deserialize( - table_id: MsgId, + table_id: TableId, schema: &Schema, chunk: &Chunk>, ) -> DataTableResult { crate::profile_function!(); + // --- Time --- + + let col_timelines: DataTableResult<_> = schema + .fields + .iter() + .enumerate() + .filter_map(|(i, field)| { + field.metadata.get(METADATA_KIND).and_then(|kind| { + (kind == METADATA_KIND_TIME).then_some((field.name.as_str(), i)) + }) + }) + .map(|(name, index)| { + chunk + .get(index) + .ok_or(DataTableError::MissingColumn(name.to_owned())) + .and_then(|column| Self::deserialize_time_column(name, &**column)) + }) + .collect(); + let col_timelines = col_timelines?; + + // --- Control --- + let control_indices: HashMap<&str, usize> = schema .fields .iter() @@ -621,16 +923,16 @@ impl DataTable { }; // NOTE: the unwrappings cannot fail since control_index() makes sure the index is valid - let row_id = + let col_row_id = (&**chunk.get(control_index(COLUMN_ROW_ID)?).unwrap()).try_into_collection()?; - let timepoint = - (&**chunk.get(control_index(COLUMN_TIMEPOINT)?).unwrap()).try_into_collection()?; - let entity_path = + let col_entity_path = (&**chunk.get(control_index(COLUMN_ENTITY_PATH)?).unwrap()).try_into_collection()?; // TODO(#1712): This is unnecessarily slow... - let num_instances = + let col_num_instances = (&**chunk.get(control_index(COLUMN_NUM_INSTANCES)?).unwrap()).try_into_collection()?; + // --- Components --- + let columns: DataTableResult<_> = schema .fields .iter() @@ -655,25 +957,57 @@ impl DataTable { Ok(Self { table_id, - row_id, - timepoint, - entity_path, - num_instances, + col_row_id, + col_timelines, + col_entity_path, + col_num_instances, columns, }) } + /// Deserializes a sparse time column. + fn deserialize_time_column( + name: &str, + column: &dyn Array, + ) -> DataTableResult<(Timeline, TimeOptVec)> { + crate::profile_function!(); + + // See also [`Timeline::datatype`] + let timeline = match column.data_type().to_logical_type() { + DataType::Int64 => Timeline::new_sequence(name), + DataType::Timestamp(TimeUnit::Nanosecond, None) => Timeline::new_temporal(name), + _ => { + return Err(DataTableError::NotATimeColumn { + name: name.into(), + datatype: column.data_type().clone(), + }) + } + }; + + let col_time = column + .as_any() + .downcast_ref::>() + // NOTE: cannot fail, datatype checked above + .unwrap(); + let col_time: TimeOptVec = col_time.into_iter().map(|time| time.copied()).collect(); + + Ok((timeline, col_time)) + } + /// Deserializes a sparse data column. fn deserialize_data_column( component: ComponentName, column: &dyn Array, ) -> DataTableResult { + crate::profile_function!(); Ok(DataCellColumn( column .as_any() .downcast_ref::>() .ok_or(DataTableError::NotAColumn(component.to_string()))? .iter() + // TODO(#1805): Schema metadata gets cloned in every single array. + // This'll become a problem as soon as we enable batching. .map(|array| array.map(|values| DataCell::from_arrow(component, values))) .collect(), )) @@ -682,11 +1016,10 @@ impl DataTable { // --- -impl TryFrom<&ArrowMsg> for DataTable { - type Error = DataTableError; - +impl DataTable { + /// Deserializes the contents of an [`ArrowMsg`] into a `DataTable`. #[inline] - fn try_from(msg: &ArrowMsg) -> DataTableResult { + pub fn from_arrow_msg(msg: &ArrowMsg) -> DataTableResult { let ArrowMsg { table_id, timepoint_max: _, @@ -696,18 +1029,17 @@ impl TryFrom<&ArrowMsg> for DataTable { Self::deserialize(*table_id, schema, chunk) } -} - -impl TryFrom<&DataTable> for ArrowMsg { - type Error = DataTableError; + /// Serializes the contents of a `DataTable` into an [`ArrowMsg`]. + // + // TODO(#1760): support serializing the cell size itself, so it can be computed on the clients. #[inline] - fn try_from(table: &DataTable) -> DataTableResult { - let timepoint_max = table.timepoint_max(); - let (schema, chunk) = table.serialize()?; + pub fn to_arrow_msg(&self) -> DataTableResult { + let timepoint_max = self.timepoint_max(); + let (schema, chunk) = self.serialize()?; Ok(ArrowMsg { - table_id: table.table_id, + table_id: self.table_id, timepoint_max, schema, chunk, @@ -731,3 +1063,70 @@ impl std::fmt::Display for DataTable { .fmt(f) } } + +// --- + +#[cfg(not(target_arch = "wasm32"))] +impl DataTable { + /// Crafts a simple but interesting `DataTable`. + pub fn example(timeless: bool) -> Self { + use crate::{ + component_types::{ColorRGBA, Label, Point2D}, + Time, + }; + + let table_id = TableId::random(); + + let timepoint = |frame_nr: i64| { + if timeless { + TimePoint::timeless() + } else { + TimePoint::from([ + (Timeline::new_temporal("log_time"), Time::now().into()), + (Timeline::new_sequence("frame_nr"), frame_nr.into()), + ]) + } + }; + + let row0 = { + let num_instances = 2; + let points: &[Point2D] = &[[10.0, 10.0].into(), [20.0, 20.0].into()]; + let colors: &[_] = &[ColorRGBA::from_rgb(128, 128, 128)]; + let labels: &[Label] = &[]; + + DataRow::from_cells3( + RowId::random(), + "a", + timepoint(1), + num_instances, + (points, colors, labels), + ) + }; + + let row1 = { + let num_instances = 0; + let colors: &[ColorRGBA] = &[]; + + DataRow::from_cells1(RowId::random(), "b", timepoint(1), num_instances, colors) + }; + + let row2 = { + let num_instances = 1; + let colors: &[_] = &[ColorRGBA::from_rgb(255, 255, 255)]; + let labels: &[_] = &[Label("hey".into())]; + + DataRow::from_cells2( + RowId::random(), + "c", + timepoint(2), + num_instances, + (colors, labels), + ) + }; + + let mut table = DataTable::from_rows(table_id, [row0, row1, row2]); + table.compute_all_size_bytes(); + + table + } +} diff --git a/crates/re_log_types/src/lib.rs b/crates/re_log_types/src/lib.rs index 3fe2071b86f6..cd6c9ea7ccf7 100644 --- a/crates/re_log_types/src/lib.rs +++ b/crates/re_log_types/src/lib.rs @@ -4,9 +4,6 @@ #![doc = document_features::document_features!()] //! -#[cfg(any(feature = "save", feature = "load"))] -pub mod encoding; - #[cfg(feature = "arrow_datagen")] pub mod datagen; @@ -20,6 +17,7 @@ mod data_table; pub mod hash; mod index; pub mod path; +mod size_bytes; mod time; pub mod time_point; mod time_range; @@ -28,6 +26,7 @@ mod time_real; pub mod external { pub use arrow2; pub use arrow2_convert; + pub use re_tuid; #[cfg(feature = "glam")] pub use glam; @@ -42,15 +41,20 @@ pub use self::component_types::context; pub use self::component_types::coordinates; pub use self::component_types::AnnotationContext; pub use self::component_types::Arrow3D; -pub use self::component_types::MsgId; pub use self::component_types::ViewCoordinates; pub use self::component_types::{EncodedMesh3D, ImuData, Mesh3D, MeshFormat, MeshId, RawMesh3D}; pub use self::data::*; -pub use self::data_cell::{DataCell, DataCellError, DataCellResult}; -pub use self::data_row::{DataRow, DataRowError, DataRowResult}; -pub use self::data_table::{DataTable, DataTableError, DataTableResult}; +pub use self::data_cell::{DataCell, DataCellError, DataCellInner, DataCellResult}; +pub use self::data_row::{DataRow, DataRowError, DataRowResult, RowId}; +pub use self::data_table::{ + DataCellColumn, DataCellOptVec, DataTable, DataTableError, DataTableResult, EntityPathVec, + ErasedTimeVec, NumInstancesVec, RowIdVec, TableId, TimePointVec, COLUMN_ENTITY_PATH, + COLUMN_INSERT_ID, COLUMN_NUM_INSTANCES, COLUMN_ROW_ID, COLUMN_TIMEPOINT, METADATA_KIND, + METADATA_KIND_CONTROL, METADATA_KIND_DATA, +}; pub use self::index::*; pub use self::path::*; +pub use self::size_bytes::SizeBytes; pub use self::time::{Duration, Time}; pub use self::time_point::{TimeInt, TimePoint, TimeType, Timeline, TimelineName}; pub use self::time_range::{TimeRange, TimeRangeF}; @@ -162,9 +166,8 @@ impl std::fmt::Display for ApplicationId { /// The most general log message sent from the SDK to the server. #[must_use] -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] // `PartialEq` used for tests in another crate #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -#[cfg_attr(test, derive(PartialEq))] #[allow(clippy::large_enum_variant)] pub enum LogMsg { /// A new recording has begun. @@ -173,32 +176,40 @@ pub enum LogMsg { BeginRecordingMsg(BeginRecordingMsg), /// Server-backed operation on an [`EntityPath`]. - EntityPathOpMsg(EntityPathOpMsg), + EntityPathOpMsg(RecordingId, EntityPathOpMsg), /// Log an entity using an [`ArrowMsg`]. - ArrowMsg(ArrowMsg), + ArrowMsg(RecordingId, ArrowMsg), /// Sent when the client shuts down the connection. - Goodbye(MsgId), + Goodbye(RowId), } impl LogMsg { - pub fn id(&self) -> MsgId { + pub fn id(&self) -> RowId { match self { - Self::BeginRecordingMsg(msg) => msg.msg_id, - Self::EntityPathOpMsg(msg) => msg.msg_id, - Self::Goodbye(msg_id) => *msg_id, + Self::BeginRecordingMsg(msg) => msg.row_id, + Self::EntityPathOpMsg(_, msg) => msg.row_id, + Self::Goodbye(row_id) => *row_id, // TODO(#1619): the following only makes sense because, while we support sending and // receiving batches, we don't actually do so yet. // We need to stop storing raw `LogMsg`s before we can benefit from our batching. - Self::ArrowMsg(msg) => msg.table_id, + Self::ArrowMsg(_, msg) => msg.table_id.into_row_id(), + } + } + + pub fn recording_id(&self) -> Option<&RecordingId> { + match self { + Self::BeginRecordingMsg(msg) => Some(&msg.info.recording_id), + Self::EntityPathOpMsg(recording_id, _) | Self::ArrowMsg(recording_id, _) => { + Some(recording_id) + } + Self::Goodbye(_) => None, } } } impl_into_enum!(BeginRecordingMsg, LogMsg, BeginRecordingMsg); -impl_into_enum!(EntityPathOpMsg, LogMsg, EntityPathOpMsg); -impl_into_enum!(ArrowMsg, LogMsg, ArrowMsg); // ---------------------------------------------------------------------------- @@ -206,8 +217,7 @@ impl_into_enum!(ArrowMsg, LogMsg, ArrowMsg); #[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct BeginRecordingMsg { - pub msg_id: MsgId, - + pub row_id: RowId, pub info: RecordingInfo, } @@ -299,7 +309,7 @@ impl std::fmt::Display for RecordingSource { #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct EntityPathOpMsg { /// A unique id per [`EntityPathOpMsg`]. - pub msg_id: MsgId, + pub row_id: RowId, /// Time information (when it was logged, when it was received, …). /// diff --git a/crates/re_log_types/src/path/component_name.rs b/crates/re_log_types/src/path/component_name.rs index 4e0020903746..bb96e862f84c 100644 --- a/crates/re_log_types/src/path/component_name.rs +++ b/crates/re_log_types/src/path/component_name.rs @@ -1,3 +1,5 @@ +use crate::SizeBytes; + re_string_interner::declare_new_type!( /// The name of an entity component, e.g. `pos` or `color`. pub struct ComponentName; @@ -15,6 +17,7 @@ impl ComponentName { /// Excludes the rerun namespace, so you'll get `color` but `ext.confidence`. /// /// Used for most UI elements. + #[inline] pub fn short_name(&self) -> &'static str { let full_name = self.0.as_str(); if let Some(short_name) = full_name.strip_prefix("rerun.") { @@ -24,3 +27,10 @@ impl ComponentName { } } } + +impl SizeBytes for ComponentName { + #[inline] + fn heap_size_bytes(&self) -> u64 { + 0 + } +} diff --git a/crates/re_log_types/src/path/entity_path.rs b/crates/re_log_types/src/path/entity_path.rs index 1a68576555c0..d23b257aa64b 100644 --- a/crates/re_log_types/src/path/entity_path.rs +++ b/crates/re_log_types/src/path/entity_path.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use crate::{ hash::Hash64, parse_entity_path, path::entity_path_impl::EntityPathImpl, EntityPathPart, + SizeBytes, }; // ---------------------------------------------------------------------------- @@ -156,6 +157,13 @@ impl EntityPath { } } +impl SizeBytes for EntityPath { + #[inline] + fn heap_size_bytes(&self) -> u64 { + 0 // NOTE: we assume it's amortized due to the `Arc` + } +} + impl FromIterator for EntityPath { fn from_iter>(parts: T) -> Self { Self::new(parts.into_iter().collect()) diff --git a/crates/re_log_types/src/size_bytes.rs b/crates/re_log_types/src/size_bytes.rs new file mode 100644 index 000000000000..a670eee44d74 --- /dev/null +++ b/crates/re_log_types/src/size_bytes.rs @@ -0,0 +1,173 @@ +use std::collections::{BTreeMap, HashMap}; + +use arrow2::datatypes::{DataType, Field}; +use smallvec::SmallVec; + +// --- + +/// Approximations of stack and heap size for both internal and external types. +/// +/// Motly used for statistics and triggering events such as garbage collection. +pub trait SizeBytes: Sized { + /// Returns the total size of `self` in bytes, accounting for both stack and heap space. + #[inline] + fn total_size_bytes(&self) -> u64 { + self.stack_size_bytes() + self.heap_size_bytes() + } + + /// Returns the total size of `self` on the stack, in bytes. + /// + /// Defaults to `std::mem::size_of_val(self)`. + #[inline] + fn stack_size_bytes(&self) -> u64 { + std::mem::size_of_val(self) as _ + } + + /// Returns the total size of `self` on the heap, in bytes. + fn heap_size_bytes(&self) -> u64; +} + +// --- Std --- + +impl SizeBytes for String { + #[inline] + fn heap_size_bytes(&self) -> u64 { + self.capacity() as u64 + } +} + +impl SizeBytes for BTreeMap { + #[inline] + fn heap_size_bytes(&self) -> u64 { + // TODO(cmc): This is sub-optimal if these types are PODs. + + // NOTE: It's all on the heap at this point. + self.keys().map(SizeBytes::total_size_bytes).sum::() + + self.values().map(SizeBytes::total_size_bytes).sum::() + } +} + +impl SizeBytes for HashMap { + #[inline] + fn heap_size_bytes(&self) -> u64 { + // TODO(cmc): This is sub-optimal if these types are PODs. + + // NOTE: It's all on the heap at this point. + self.keys().map(SizeBytes::total_size_bytes).sum::() + + self.values().map(SizeBytes::total_size_bytes).sum::() + } +} + +impl SizeBytes for Vec { + /// Does not take capacity into account. + #[inline] + fn heap_size_bytes(&self) -> u64 { + // TODO(cmc): This is sub-optimal if these types are PODs. + + // NOTE: It's all on the heap at this point. + self.iter().map(SizeBytes::total_size_bytes).sum::() + } +} + +impl SizeBytes for SmallVec<[T; N]> { + /// Does not take capacity into account. + #[inline] + fn heap_size_bytes(&self) -> u64 { + // TODO(cmc): This is sub-optimal if these types are PODs. + + // NOTE: It's all on the heap at this point. + self.iter().map(SizeBytes::total_size_bytes).sum::() + } +} + +impl SizeBytes for Option { + #[inline] + fn heap_size_bytes(&self) -> u64 { + self.as_ref().map_or(0, SizeBytes::heap_size_bytes) + } +} + +// NOTE: `impl SizeBytesExt for T {}` would be nice but violates orphan rules. +macro_rules! impl_size_bytes_pod { + ($ty:ty) => { + impl SizeBytes for $ty { + #[inline] + fn heap_size_bytes(&self) -> u64 { + 0 + } + } + }; + ($ty:ty, $($rest:ty),+) => { + impl_size_bytes_pod!($ty); impl_size_bytes_pod!($($rest),+); + }; +} + +impl_size_bytes_pod!(u8, u16, u32, u64, u128, i8, i16, i32, i64, i128, bool, f32, f64); + +// --- Arrow --- + +impl SizeBytes for DataType { + #[inline] + fn heap_size_bytes(&self) -> u64 { + match self { + DataType::Null + | DataType::Binary + | DataType::Boolean + | DataType::Date32 + | DataType::Date64 + | DataType::Float16 + | DataType::Float32 + | DataType::Float64 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::Int8 + | DataType::LargeBinary + | DataType::LargeUtf8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 + | DataType::UInt8 + | DataType::Time32(_) + | DataType::Time64(_) + | DataType::Duration(_) + | DataType::Interval(_) + | DataType::FixedSizeBinary(_) + | DataType::Decimal(_, _) + | DataType::Decimal256(_, _) + | DataType::Utf8 => 0, + DataType::Timestamp(_, str) => str.heap_size_bytes(), + DataType::List(field) + | DataType::FixedSizeList(field, _) + | DataType::LargeList(field) + | DataType::Map(field, _) => field.total_size_bytes(), // NOTE: Boxed, it's all on the heap + DataType::Struct(fields) => fields.heap_size_bytes(), + DataType::Union(fields, indices, _) => { + fields.heap_size_bytes() + indices.heap_size_bytes() + } + DataType::Dictionary(_, datatype, _) => datatype.total_size_bytes(), // NOTE: Boxed, it's all on the heap + DataType::Extension(name, datatype, extra) => { + name.heap_size_bytes() + + datatype.total_size_bytes() // NOTE: Boxed, it's all on the heap + + extra.heap_size_bytes() + } + } + } +} + +impl SizeBytes for Field { + #[inline] + fn heap_size_bytes(&self) -> u64 { + let Field { + name, + data_type, + is_nullable, + metadata, + } = self; + + name.heap_size_bytes() + + data_type.heap_size_bytes() + + is_nullable.heap_size_bytes() + + metadata.heap_size_bytes() + } +} diff --git a/crates/re_log_types/src/time_point/arrow.rs b/crates/re_log_types/src/time_point/arrow.rs deleted file mode 100644 index bed235cb6f4f..000000000000 --- a/crates/re_log_types/src/time_point/arrow.rs +++ /dev/null @@ -1,189 +0,0 @@ -use arrow2::{ - array::{ - Int64Array, ListArray, MutableArray, MutableListArray, MutablePrimitiveArray, - MutableStructArray, MutableUtf8Array, StructArray, UInt8Array, Utf8Array, - }, - datatypes::{DataType, Field}, -}; -use arrow2_convert::{deserialize::ArrowDeserialize, field::ArrowField, serialize::ArrowSerialize}; - -use crate::{TimeInt, TimePoint, Timeline}; - -arrow2_convert::arrow_enable_vec_for_type!(TimePoint); - -impl ArrowField for TimePoint { - type Type = Self; - - #[inline] - fn data_type() -> DataType { - //TODO(john) Use Dictionary type - //let time_type_values = Utf8Array::::from_slice(["Time", "Sequence"]); - //let time_type = DataType::Dictionary( - // i32::KEY_TYPE, - // Box::new(time_type_values.data_type().clone()), - // false, - //); - let time_type = DataType::UInt8; - - let struct_type = DataType::Struct(vec![ - Field::new("timeline", DataType::Utf8, false), - Field::new("type", time_type, false), - Field::new("time", DataType::Int64, false), - ]); - - ListArray::::default_datatype(struct_type) - //TODO(john) Wrapping the DataType in Extension exposes a bug in arrow2::io::ipc - //DataType::Extension("TimePoint".to_owned(), Box::new(list_type), None) - } -} - -impl ArrowSerialize for TimePoint { - type MutableArrayType = MutableListArray; - - #[inline] - fn new_array() -> Self::MutableArrayType { - let timeline_array: Box = Box::new(MutableUtf8Array::::new()); - let time_type_array: Box = Box::new(MutablePrimitiveArray::::new()); - let time_array: Box = Box::new(MutablePrimitiveArray::::new()); - - let data_type = Self::data_type(); - let DataType::List(inner) = data_type.to_logical_type() else { unreachable!() }; - let str_array = MutableStructArray::new( - inner.data_type.clone(), - vec![timeline_array, time_type_array, time_array], - ); - MutableListArray::new_from(str_array, data_type, 0) - } - - fn arrow_serialize( - v: &::Type, - array: &mut Self::MutableArrayType, - ) -> arrow2::error::Result<()> { - let struct_array = array.mut_values(); - for (timeline, time) in &v.0 { - ::arrow_serialize( - &timeline.name().to_string(), - struct_array.value(0).unwrap(), - )?; - ::arrow_serialize( - &(timeline.typ() as u8), - struct_array.value(1).unwrap(), - )?; - ::arrow_serialize( - &time.as_i64(), - struct_array.value(2).unwrap(), - )?; - struct_array.push(true); - } - array.try_push_valid() - } -} - -// ---------------------------------------------------------------------------- - -pub struct TimePointIterator<'a> { - time_points: <&'a ListArray as IntoIterator>::IntoIter, -} - -impl<'a> Iterator for TimePointIterator<'a> { - type Item = TimePoint; - - #[inline] - fn next(&mut self) -> Option { - self.time_points.next().flatten().map(|time_point| { - let struct_arr = time_point - .as_any() - .downcast_ref::() - .expect("StructArray"); - let values = struct_arr.values(); - let timelines = values[0] - .as_any() - .downcast_ref::>() - .expect("timelines"); - let types = values[1] - .as_any() - .downcast_ref::() - .expect("types"); - let times = values[2] - .as_any() - .downcast_ref::() - .expect("times"); - - let time_points = timelines.iter().zip(types.iter()).zip(times.iter()).map( - |((timeline, ty), time)| { - ( - Timeline::new( - timeline.unwrap(), - num_traits::FromPrimitive::from_u8(*ty.unwrap()) - .expect("valid TimeType"), - ), - TimeInt::from(*time.unwrap()), - ) - }, - ); - - time_points.collect() - }) - } -} - -// ---------------------------------------------------------------------------- -pub struct TimePointArray; - -impl<'a> IntoIterator for &'a TimePointArray { - type Item = TimePoint; - - type IntoIter = TimePointIterator<'a>; - - fn into_iter(self) -> Self::IntoIter { - panic!("Use iter_from_array_ref. This is a quirk of the way the traits work in arrow2_convert."); - } -} - -impl arrow2_convert::deserialize::ArrowArray for TimePointArray { - type BaseArrayType = arrow2::array::MapArray; - - #[inline] - fn iter_from_array_ref(b: &dyn arrow2::array::Array) -> <&Self as IntoIterator>::IntoIter { - let arr = b.as_any().downcast_ref::>().unwrap(); - assert_eq!(arr.validity(), None, "TimePoints should be non-null"); - - TimePointIterator { - time_points: arr.into_iter(), - } - } -} - -impl ArrowDeserialize for TimePoint { - type ArrayType = TimePointArray; - - fn arrow_deserialize( - v: <&Self::ArrayType as IntoIterator>::Item, - ) -> Option<::Type> { - Some(v) - } -} - -// ---------------------------------------------------------------------------- - -#[test] -fn test_timepoint_roundtrip() { - use crate::datagen; - use arrow2::array::Array; - use arrow2_convert::{deserialize::TryIntoCollection, serialize::TryIntoArrow}; - - let time_points_in = vec![ - TimePoint::from([ - datagen::build_log_time(crate::Time::from_ns_since_epoch(100)), - datagen::build_frame_nr(1234.into()), - ]), - TimePoint::from([ - datagen::build_log_time(crate::Time::from_ns_since_epoch(200)), - datagen::build_frame_nr(2345.into()), - ]), - ]; - - let array: Box = time_points_in.try_into_arrow().unwrap(); - let time_points_out: Vec = TryIntoCollection::try_into_collection(array).unwrap(); - assert_eq!(time_points_in, time_points_out); -} diff --git a/crates/re_log_types/src/time_point/mod.rs b/crates/re_log_types/src/time_point/mod.rs index bec628a8c954..4074810b8222 100644 --- a/crates/re_log_types/src/time_point/mod.rs +++ b/crates/re_log_types/src/time_point/mod.rs @@ -1,10 +1,9 @@ use std::collections::{btree_map, BTreeMap}; -mod arrow; mod time_int; mod timeline; -use crate::{time::Time, TimeRange}; +use crate::{time::Time, SizeBytes, TimeRange}; // Re-exports pub use time_int::TimeInt; @@ -21,6 +20,12 @@ pub use timeline::{Timeline, TimelineName}; #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct TimePoint(BTreeMap); +impl From> for TimePoint { + fn from(timelines: BTreeMap) -> Self { + Self(timelines) + } +} + impl TimePoint { /// Logging to this time means the data will show upp in all timelines, /// past and future. The time will be [`TimeInt::BEGINNING`], meaning it will @@ -68,6 +73,7 @@ impl TimePoint { /// Computes the union of two `TimePoint`s, keeping the maximum time value in case of /// conflicts. + #[inline] pub fn union_max(mut self, rhs: &Self) -> Self { for (&timeline, &time) in rhs { match self.0.entry(timeline) { @@ -84,6 +90,23 @@ impl TimePoint { } } +impl SizeBytes for TimePoint { + #[inline] + fn heap_size_bytes(&self) -> u64 { + type K = Timeline; + type V = TimeInt; + + // NOTE: This is only here to make sure this method fails to compile if the inner type + // changes, as the following size computation assumes POD types. + let inner: &BTreeMap = &self.0; + + let keys_size_bytes = std::mem::size_of::() * inner.len(); + let values_size_bytes = std::mem::size_of::() * inner.len(); + + (keys_size_bytes + values_size_bytes) as u64 + } +} + // ---------------------------------------------------------------------------- /// The type of a [`TimeInt`] or [`Timeline`]. diff --git a/crates/re_log_types/src/time_point/timeline.rs b/crates/re_log_types/src/time_point/timeline.rs index 14ed5cb5d835..d42f92d0869c 100644 --- a/crates/re_log_types/src/time_point/timeline.rs +++ b/crates/re_log_types/src/time_point/timeline.rs @@ -1,4 +1,6 @@ -use crate::{TimeRange, TimeType}; +use arrow2::datatypes::{DataType, TimeUnit}; + +use crate::{SizeBytes, TimeRange, TimeType}; re_string_interner::declare_new_type!( /// The name of a timeline. Often something like `"log_time"` or `"frame_nr"`. @@ -76,6 +78,7 @@ impl Timeline { } /// Returns a formatted string of `time_range` on this `Timeline`. + #[inline] pub fn format_time_range(&self, time_range: &TimeRange) -> String { format!( " - {}: from {} to {} (all inclusive)", @@ -84,10 +87,26 @@ impl Timeline { self.typ.format(time_range.max), ) } + + /// Returns the appropriate arrow datatype to represent this timeline. + #[inline] + pub fn datatype(&self) -> DataType { + match self.typ { + TimeType::Time => DataType::Timestamp(TimeUnit::Nanosecond, None), + TimeType::Sequence => DataType::Int64, + } + } } impl nohash_hasher::IsEnabled for Timeline {} +impl SizeBytes for Timeline { + #[inline] + fn heap_size_bytes(&self) -> u64 { + 0 + } +} + // required for [`nohash_hasher`]. #[allow(clippy::derive_hash_xor_eq)] impl std::hash::Hash for Timeline { diff --git a/crates/re_log_types/src/time_range.rs b/crates/re_log_types/src/time_range.rs index 645e8aff889a..e8350932e9cc 100644 --- a/crates/re_log_types/src/time_range.rs +++ b/crates/re_log_types/src/time_range.rs @@ -1,6 +1,6 @@ use std::ops::RangeInclusive; -use crate::{TimeInt, TimeReal}; +use crate::{SizeBytes, TimeInt, TimeReal}; // ---------------------------------------------------------------------------- @@ -38,6 +38,7 @@ impl TimeRange { self.min.as_i64().abs_diff(self.max.as_i64()) } + #[inline] pub fn center(&self) -> TimeInt { self.min + TimeInt::from((self.abs_length() / 2) as i64) } @@ -47,6 +48,11 @@ impl TimeRange { self.min <= time && time <= self.max } + #[inline] + pub fn intersects(&self, other: Self) -> bool { + self.min <= other.max && self.max >= other.min + } + #[inline] pub fn union(&self, other: Self) -> Self { Self { @@ -56,6 +62,13 @@ impl TimeRange { } } +impl SizeBytes for TimeRange { + #[inline] + fn heap_size_bytes(&self) -> u64 { + 0 + } +} + impl From for RangeInclusive { fn from(range: TimeRange) -> RangeInclusive { range.min..=range.max diff --git a/crates/re_memory/Cargo.toml b/crates/re_memory/Cargo.toml index bd4f33c96b79..7379533ca1b3 100644 --- a/crates/re_memory/Cargo.toml +++ b/crates/re_memory/Cargo.toml @@ -27,7 +27,7 @@ itertools = { workspace = true } nohash-hasher = "0.2" once_cell = "1.16" parking_lot.workspace = true -smallvec = "1.10" +smallvec.workspace = true # native dependencies: [target.'cfg(not(target_arch = "wasm32"))'.dependencies] diff --git a/crates/re_query/Cargo.toml b/crates/re_query/Cargo.toml index 4a28c1fd0f54..b09471633c7e 100644 --- a/crates/re_query/Cargo.toml +++ b/crates/re_query/Cargo.toml @@ -29,7 +29,6 @@ re_arrow_store.workspace = true re_data_store.workspace = true re_format.workspace = true re_log_types.workspace = true -re_log.workspace = true # External dependencies: arrow2 = { workspace = true, features = [ diff --git a/crates/re_query/benches/query_benchmark.rs b/crates/re_query/benches/query_benchmark.rs index d65c2947cbeb..a79c416e44c9 100644 --- a/crates/re_query/benches/query_benchmark.rs +++ b/crates/re_query/benches/query_benchmark.rs @@ -8,7 +8,7 @@ use re_arrow_store::{DataStore, LatestAtQuery}; use re_log_types::{ component_types::{ColorRGBA, InstanceKey, Point2D, Vec3D}, datagen::{build_frame_nr, build_some_colors, build_some_point2d, build_some_vec3d}, - entity_path, Component, DataRow, EntityPath, Index, MsgId, TimeType, Timeline, + entity_path, Component, DataRow, EntityPath, Index, RowId, TimeType, Timeline, }; use re_query::query_entity_with_primary; @@ -124,7 +124,7 @@ fn build_points_rows(paths: &[EntityPath], pts: usize) -> Vec { .flat_map(move |frame_idx| { paths.iter().map(move |path| { DataRow::from_cells2( - MsgId::ZERO, + RowId::ZERO, path.clone(), [build_frame_nr((frame_idx as i64).into())], pts as _, @@ -140,7 +140,7 @@ fn build_vecs_rows(paths: &[EntityPath], pts: usize) -> Vec { .flat_map(move |frame_idx| { paths.iter().map(move |path| { DataRow::from_cells1( - MsgId::ZERO, + RowId::ZERO, path.clone(), [build_frame_nr((frame_idx as i64).into())], pts as _, diff --git a/crates/re_query/examples/range.rs b/crates/re_query/examples/range.rs index 47251d85c75c..10328e328b04 100644 --- a/crates/re_query/examples/range.rs +++ b/crates/re_query/examples/range.rs @@ -8,7 +8,7 @@ use re_arrow_store::{DataStore, RangeQuery, TimeRange}; use re_log_types::{ component_types::{InstanceKey, Point2D, Rect2D}, datagen::{build_frame_nr, build_some_point2d, build_some_rects}, - Component as _, DataRow, EntityPath, MsgId, TimeType, + Component as _, DataRow, EntityPath, RowId, TimeType, }; use re_query::range_entity_with_primary; @@ -23,27 +23,27 @@ fn main() { let frame4 = [build_frame_nr(4.into())]; let rects = build_some_rects(2); - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), frame1, 2, &rects); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), frame1, 2, &rects); store.insert_row(&row).unwrap(); let points = build_some_point2d(2); - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), frame2, 2, &points); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), frame2, 2, &points); store.insert_row(&row).unwrap(); let points = build_some_point2d(4); - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), frame3, 4, &points); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), frame3, 4, &points); store.insert_row(&row).unwrap(); let rects = build_some_rects(3); - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), frame4, 3, &rects); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), frame4, 3, &rects); store.insert_row(&row).unwrap(); let points = build_some_point2d(3); - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), frame4, 3, &points); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), frame4, 3, &points); store.insert_row(&row).unwrap(); let rects = build_some_rects(3); - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), frame4, 3, &rects); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), frame4, 3, &rects); store.insert_row(&row).unwrap(); let query = RangeQuery::new(frame2[0].0, TimeRange::new(frame2[0].1, frame4[0].1)); diff --git a/crates/re_query/src/dataframe_util.rs b/crates/re_query/src/dataframe_util.rs index c71d48485eb5..2dfc18ba224c 100644 --- a/crates/re_query/src/dataframe_util.rs +++ b/crates/re_query/src/dataframe_util.rs @@ -135,9 +135,9 @@ impl ComponentWithInstances { where for<'a> &'a C0::ArrayType: IntoIterator, { - if C0::name() != self.name { + if C0::name() != self.name() { return Err(QueryError::TypeMismatch { - actual: self.name, + actual: self.name(), requested: C0::name(), }); } @@ -145,8 +145,7 @@ impl ComponentWithInstances { let instance_keys: Vec> = self.iter_instance_keys()?.map(Some).collect_vec(); - let values = - arrow_array_deserialize_iterator::>(self.values.as_ref())?.collect_vec(); + let values = self.values.try_to_native_opt()?.collect_vec(); df_builder2::(&instance_keys, &values) } @@ -160,8 +159,7 @@ where pub fn as_df1(&self) -> crate::Result { let instance_keys = self.primary.iter_instance_keys()?.map(Some).collect_vec(); - let primary_values = - arrow_array_deserialize_iterator(self.primary.values.as_ref())?.collect_vec(); + let primary_values = self.primary.values.try_to_native_opt()?.collect_vec(); df_builder2::(&instance_keys, &primary_values) } @@ -173,8 +171,7 @@ where { let instance_keys = self.primary.iter_instance_keys()?.map(Some).collect_vec(); - let primary_values = - arrow_array_deserialize_iterator(self.primary.values.as_ref())?.collect_vec(); + let primary_values = self.primary.values.try_to_native_opt()?.collect_vec(); let c1_values = self.iter_component::()?.collect_vec(); diff --git a/crates/re_query/src/entity_view.rs b/crates/re_query/src/entity_view.rs index e0054b917d30..a9bf71e39f1a 100644 --- a/crates/re_query/src/entity_view.rs +++ b/crates/re_query/src/entity_view.rs @@ -1,13 +1,13 @@ use std::{collections::BTreeMap, marker::PhantomData}; -use arrow2::array::{Array, MutableArray, PrimitiveArray}; +use arrow2::array::{Array, PrimitiveArray}; use re_format::arrow; use re_log_types::{ component_types::InstanceKey, external::arrow2_convert::{ deserialize::arrow_array_deserialize_iterator, field::ArrowField, serialize::ArrowSerialize, }, - Component, ComponentName, DeserializableComponent, SerializableComponent, + Component, ComponentName, DataCell, DeserializableComponent, RowId, SerializableComponent, }; use crate::QueryError; @@ -20,55 +20,54 @@ use crate::QueryError; /// See: [`crate::get_component_with_instances`] #[derive(Clone, Debug)] pub struct ComponentWithInstances { - pub(crate) name: ComponentName, - // TODO(jleibs): Remove optional once the store guarantees this will always exist - pub(crate) instance_keys: Option>, - pub(crate) values: Box, + pub(crate) instance_keys: DataCell, + pub(crate) values: DataCell, } impl ComponentWithInstances { + #[inline] pub fn name(&self) -> ComponentName { - self.name + self.values.component_name() } /// Number of values. 1 for splats. + #[inline] pub fn len(&self) -> usize { - self.values.len() + self.values.num_instances() as _ } + #[inline] pub fn is_empty(&self) -> bool { - self.values.len() == 0 + self.values.is_empty() } /// Iterate over the instance keys /// /// If the instance keys don't exist, generate them based on array-index position of the values + #[inline] pub fn iter_instance_keys(&self) -> crate::Result + '_> { - if let Some(keys) = &self.instance_keys { - let iter = arrow_array_deserialize_iterator::(keys.as_ref())?; - Ok(itertools::Either::Left(iter)) - } else { - let auto_num = (0..self.len()).map(|i| InstanceKey(i as u64)); - Ok(itertools::Either::Right(auto_num)) - } + self.instance_keys + .try_to_native::() + .map_err(Into::into) } /// Iterate over the values and convert them to a native `Component` + #[inline] pub fn iter_values( &self, ) -> crate::Result> + '_> where for<'a> &'a C::ArrayType: IntoIterator, { - if C::name() != self.name { + if C::name() != self.name() { return Err(QueryError::TypeMismatch { - actual: self.name, + actual: self.name(), requested: C::name(), }); } Ok(arrow_array_deserialize_iterator::>( - self.values.as_ref(), + self.values.as_arrow_ref(), )?) } @@ -77,9 +76,9 @@ impl ComponentWithInstances { where for<'a> &'a C::ArrayType: IntoIterator, { - if C::name() != self.name { + if C::name() != self.name() { return Err(QueryError::TypeMismatch { - actual: self.name, + actual: self.name(), requested: C::name(), }); } @@ -96,57 +95,35 @@ impl ComponentWithInstances { /// Look up the value that corresponds to a given `InstanceKey` and return as an arrow `Array` pub fn lookup_arrow(&self, instance_key: &InstanceKey) -> Option> { - let offset = if let Some(instance_keys) = &self.instance_keys { - // If `instance_keys` is set, extract the `PrimitiveArray`, and find - // the index of the value by `binary_search` - - let keys = instance_keys - .as_any() - .downcast_ref::>()? - .values(); - - // If the value is splatted, return the offset of the splat - if keys.len() == 1 && keys[0] == InstanceKey::SPLAT.0 { - 0 - } else { - // Otherwise binary search to find the offset of the instance - keys.binary_search(&instance_key.0).ok()? - } + let keys = self + .instance_keys + .as_arrow_ref() + .as_any() + .downcast_ref::>()? + .values(); + + // If the value is splatted, return the offset of the splat + let offset = if keys.len() == 1 && keys[0] == InstanceKey::SPLAT.0 { + 0 } else { - // If `instance_keys` is not set, then offset is the instance because the implicit - // index is a sequential list - let offset = instance_key.0 as usize; - (offset < self.values.len()).then_some(offset)? + // Otherwise binary search to find the offset of the instance + keys.binary_search(&instance_key.0).ok()? as u32 }; - Some(self.values.slice(offset, 1)) + Some(self.values.as_arrow_ref().slice(offset as _, 1)) } /// Produce a `ComponentWithInstances` from native component types pub fn from_native( - instance_keys: Option<&Vec>, - values: &Vec, - ) -> crate::Result { - use re_log_types::external::arrow2_convert::serialize::arrow_serialize_to_mutable_array; - - let instance_keys = if let Some(keys) = instance_keys { - Some( - arrow_serialize_to_mutable_array::>( - keys, - )? - .as_box(), - ) - } else { - None - }; - - let values = arrow_serialize_to_mutable_array::>(values)?.as_box(); - - Ok(ComponentWithInstances { - name: C::name(), + instance_keys: &[InstanceKey], + values: &[C], + ) -> ComponentWithInstances { + let instance_keys = DataCell::from_native(instance_keys); + let values = DataCell::from_native(values); + ComponentWithInstances { instance_keys, values, - }) + } } } @@ -254,6 +231,7 @@ where /// the primary component using instance keys. #[derive(Clone, Debug)] pub struct EntityView { + pub(crate) row_id: RowId, pub(crate) primary: ComponentWithInstances, pub(crate) components: BTreeMap, pub(crate) phantom: PhantomData, @@ -263,10 +241,10 @@ impl std::fmt::Display for EntityView { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let primary_table = arrow::format_table( [ - self.primary.instance_keys.as_ref().unwrap().as_ref(), - self.primary.values.as_ref(), + self.primary.instance_keys.as_arrow_ref(), + self.primary.values.as_arrow_ref(), ], - ["InstanceId", self.primary.name.as_str()], + ["InstanceId", self.primary.name().as_str()], ); f.write_fmt(format_args!("EntityView:\n{primary_table}")) @@ -281,6 +259,11 @@ where pub fn num_instances(&self) -> usize { self.primary.len() } + + #[inline] + pub fn row_id(&self) -> RowId { + self.row_id + } } impl EntityView @@ -288,16 +271,19 @@ where for<'a> &'a Primary::ArrayType: IntoIterator, { /// Iterate over the instance keys + #[inline] pub fn iter_instance_keys(&self) -> crate::Result + '_> { self.primary.iter_instance_keys() } /// Iterate over the primary component values. + #[inline] pub fn iter_primary(&self) -> crate::Result> + '_> { self.primary.iter_values() } /// Iterate over the flattened list of primary component values if any. + #[inline] pub fn iter_primary_flattened(&self) -> impl Iterator + '_ { self.primary .iter_values() @@ -308,6 +294,7 @@ where } /// Check if the entity has a component and its not empty + #[inline] pub fn has_component(&self) -> bool { self.components .get(&C::name()) @@ -331,7 +318,7 @@ where let mut component_instance_key_iter = component.iter_instance_keys()?; let component_value_iter = - arrow_array_deserialize_iterator::>(component.values.as_ref())?; + arrow_array_deserialize_iterator::>(component.values.as_arrow_ref())?; let next_component_instance_key = component_instance_key_iter.next(); @@ -343,49 +330,56 @@ where splatted_component_value: None, })) } else { - let nulls = (0..self.primary.values.len()).map(|_| None); + let nulls = (0..self.primary.values.num_instances()).map(|_| None); Ok(itertools::Either::Right(nulls)) } } /// Helper function to produce an `EntityView` from rust-native `field_types` - pub fn from_native(c0: (Option<&Vec>, &Vec)) -> crate::Result { - let primary = ComponentWithInstances::from_native(c0.0, c0.1)?; - - Ok(Self { + #[inline] + pub fn from_native(c0: (&[InstanceKey], &[Primary])) -> Self { + let primary = ComponentWithInstances::from_native(c0.0, c0.1); + Self { + row_id: RowId::ZERO, primary, components: Default::default(), phantom: PhantomData, - }) + } } /// Helper function to produce an `EntityView` from rust-native `field_types` + #[inline] pub fn from_native2( - primary: (Option<&Vec>, &Vec), - component: (Option<&Vec>, &Vec), - ) -> crate::Result + primary: (&[InstanceKey], &[Primary]), + component: (&[InstanceKey], &[C]), + ) -> Self where C: Component + 'static, C: ArrowSerialize + ArrowField, { - let primary = ComponentWithInstances::from_native(primary.0, primary.1)?; - let component_c1 = ComponentWithInstances::from_native(component.0, component.1)?; + let primary = ComponentWithInstances::from_native(primary.0, primary.1); + let component_c1 = ComponentWithInstances::from_native(component.0, component.1); - let components = [(component_c1.name, component_c1)].into(); + let components = [(component_c1.name(), component_c1)].into(); - Ok(Self { + Self { + row_id: RowId::ZERO, primary, components, phantom: PhantomData, - }) + } } } #[test] fn lookup_value() { + use arrow2::array::MutableArray; use re_log_types::component_types::{InstanceKey, Point2D, Rect2D}; use re_log_types::external::arrow2_convert::serialize::arrow_serialize_to_mutable_array; - let points = vec![ + + let instance_keys = InstanceKey::from_iter(0..5); + + let points = [ Point2D { x: 1.0, y: 2.0 }, // Point2D { x: 3.0, y: 4.0 }, Point2D { x: 5.0, y: 6.0 }, @@ -393,22 +387,23 @@ fn lookup_value() { Point2D { x: 9.0, y: 10.0 }, ]; - let component = ComponentWithInstances::from_native(None, &points).unwrap(); + let component = + ComponentWithInstances::from_native(instance_keys.as_slice(), points.as_slice()); let missing_value = component.lookup_arrow(&InstanceKey(5)); assert_eq!(missing_value, None); let value = component.lookup_arrow(&InstanceKey(2)).unwrap(); - let expected_point = vec![points[2].clone()]; + let expected_point = [points[2].clone()]; let expected_arrow = - arrow_serialize_to_mutable_array::>(&expected_point) + arrow_serialize_to_mutable_array::(expected_point.as_slice()) .unwrap() .as_box(); assert_eq!(expected_arrow, value); - let instance_keys = vec![ + let instance_keys = [ InstanceKey(17), InstanceKey(47), InstanceKey(48), @@ -416,16 +411,16 @@ fn lookup_value() { InstanceKey(472), ]; - let component = ComponentWithInstances::from_native(Some(&instance_keys), &points).unwrap(); + let component = ComponentWithInstances::from_native(instance_keys.as_slice(), &points); let missing_value = component.lookup_arrow(&InstanceKey(46)); assert_eq!(missing_value, None); let value = component.lookup_arrow(&InstanceKey(99)).unwrap(); - let expected_point = vec![points[3].clone()]; + let expected_point = [points[3].clone()]; let expected_arrow = - arrow_serialize_to_mutable_array::>(&expected_point) + arrow_serialize_to_mutable_array::(expected_point.as_slice()) .unwrap() .as_box(); @@ -452,14 +447,14 @@ fn lookup_value() { #[test] fn lookup_splat() { use re_log_types::component_types::{InstanceKey, Point2D}; - let instances = vec![ + let instances = [ InstanceKey::SPLAT, // ]; - let points = vec![ + let points = [ Point2D { x: 1.0, y: 2.0 }, // ]; - let component = ComponentWithInstances::from_native(Some(&instances), &points).unwrap(); + let component = ComponentWithInstances::from_native(instances.as_slice(), points.as_slice()); // Any instance we look up will return the slatted value let value = component.lookup::(&InstanceKey(1)).unwrap(); diff --git a/crates/re_query/src/lib.rs b/crates/re_query/src/lib.rs index 17f414a64c5b..e17a237159fc 100644 --- a/crates/re_query/src/lib.rs +++ b/crates/re_query/src/lib.rs @@ -41,6 +41,9 @@ pub enum QueryError { requested: re_log_types::ComponentName, }, + #[error("Error with one or more the underlying data cells: {0}")] + DataCell(#[from] re_log_types::DataCellError), + #[error("Error converting arrow data")] ArrowError(#[from] arrow2::error::Error), diff --git a/crates/re_query/src/query.rs b/crates/re_query/src/query.rs index f52130654907..23ec0c70e353 100644 --- a/crates/re_query/src/query.rs +++ b/crates/re_query/src/query.rs @@ -1,7 +1,9 @@ use std::collections::BTreeMap; use re_arrow_store::{DataStore, LatestAtQuery}; -use re_log_types::{component_types::InstanceKey, Component, ComponentName, DataRow, EntityPath}; +use re_log_types::{ + component_types::InstanceKey, Component, ComponentName, DataRow, EntityPath, RowId, +}; use crate::{ComponentWithInstances, EntityView, QueryError}; @@ -14,7 +16,7 @@ use crate::{ComponentWithInstances, EntityView, QueryError}; /// let ent_path = "point"; /// let query = LatestAtQuery::new(Timeline::new_sequence("frame_nr"), 123.into()); /// -/// let component = re_query::get_component_with_instances( +/// let (_, component) = re_query::get_component_with_instances( /// &store, /// &query, /// &ent_path.into(), @@ -46,20 +48,24 @@ pub fn get_component_with_instances( query: &LatestAtQuery, ent_path: &EntityPath, component: ComponentName, -) -> crate::Result { +) -> crate::Result<(RowId, ComponentWithInstances)> { + debug_assert_eq!(store.cluster_key(), InstanceKey::name()); + let components = [InstanceKey::name(), component]; - let row_indices = store + let (row_id, mut cells) = store .latest_at(query, ent_path, component, &components) .ok_or(QueryError::PrimaryNotFound)?; - let mut results = store.get(&components, &row_indices); - - Ok(ComponentWithInstances { - name: component, - instance_keys: results[0].take(), - values: results[1].take().ok_or(QueryError::PrimaryNotFound)?, - }) + Ok(( + row_id, + ComponentWithInstances { + // NOTE: The unwrap cannot fail, the cluster key's presence is guaranteed + // by the store. + instance_keys: cells[0].take().unwrap(), + values: cells[1].take().ok_or(QueryError::PrimaryNotFound)?, + }, + )) } /// Retrieve an `EntityView` from the `DataStore` @@ -115,7 +121,7 @@ pub fn query_entity_with_primary( ) -> crate::Result> { crate::profile_function!(); - let primary = get_component_with_instances(store, query, ent_path, Primary::name())?; + let (row_id, primary) = get_component_with_instances(store, query, ent_path, Primary::name())?; // TODO(jleibs): lots of room for optimization here. Once "instance" is // guaranteed to be sorted we should be able to leverage this during the @@ -124,11 +130,13 @@ pub fn query_entity_with_primary( let components: crate::Result> = components .iter() - // Filter out `Primary` and `InstanceKey` from the component list since are + // Filter out `Primary` and `InstanceKey` from the component list since they are // always queried above when creating the primary. .filter(|component| *component != &Primary::name() && *component != &InstanceKey::name()) .filter_map(|component| { - match get_component_with_instances(store, query, ent_path, *component) { + match get_component_with_instances(store, query, ent_path, *component) + .map(|(_, cwi)| cwi) + { Ok(component_result) => Some(Ok((*component, component_result))), Err(QueryError::PrimaryNotFound) => None, Err(err) => Some(Err(err)), @@ -137,6 +145,7 @@ pub fn query_entity_with_primary( .collect(); Ok(EntityView { + row_id, primary, components: components?, phantom: std::marker::PhantomData, @@ -148,7 +157,6 @@ pub fn __populate_example_store() -> DataStore { use re_log_types::{ component_types::{ColorRGBA, Point2D}, datagen::build_frame_nr, - MsgId, }; let mut store = DataStore::new(InstanceKey::name(), Default::default()); @@ -160,7 +168,7 @@ pub fn __populate_example_store() -> DataStore { let points = vec![Point2D { x: 1.0, y: 2.0 }, Point2D { x: 3.0, y: 4.0 }]; let row = DataRow::from_cells2( - MsgId::ZERO, + RowId::random(), ent_path, timepoint, instances.len() as _, @@ -172,7 +180,7 @@ pub fn __populate_example_store() -> DataStore { let colors = vec![ColorRGBA(0xff000000)]; let row = DataRow::from_cells2( - MsgId::ZERO, + RowId::random(), ent_path, timepoint, instances.len() as _, @@ -194,7 +202,7 @@ fn simple_get_component() { let ent_path = "point"; let query = LatestAtQuery::new(Timeline::new_sequence("frame_nr"), 123.into()); - let component = + let (_, component) = get_component_with_instances(&store, &query, &ent_path.into(), Point2D::name()).unwrap(); #[cfg(feature = "polars")] diff --git a/crates/re_query/src/range.rs b/crates/re_query/src/range.rs index 41d59a81c974..2198d9dedcb2 100644 --- a/crates/re_query/src/range.rs +++ b/crates/re_query/src/range.rs @@ -93,27 +93,26 @@ pub fn range_entity_with_primary<'a, Primary: Component + 'a, const N: usize>( .chain( store .range(query, ent_path, components) - .map(move |(time, _, row_indices)| { - let results = store.get(&components, &row_indices); - let instance_keys = results[cluster_col].clone(); // shallow - let cwis = results + .map(move |(time, row_id, mut cells)| { + // NOTE: The unwrap cannot fail, the cluster key's presence is guaranteed + // by the store. + let instance_keys = cells[cluster_col].take().unwrap(); + let is_primary = cells[primary_col].is_some(); + let cwis = cells .into_iter() - .enumerate() - .map(|(i, res)| { - res.map(|res| { - ComponentWithInstances { - name: components[i], - instance_keys: instance_keys.clone(), // shallow - values: res.clone(), // shallow - } + .map(|cell| { + cell.map(|cell| { + ( + row_id, + ComponentWithInstances { + instance_keys: instance_keys.clone(), /* shallow */ + values: cell, + }, + ) }) }) .collect::>(); - ( - time, - row_indices[primary_col].is_some(), // is_primary - cwis, - ) + (time, is_primary, cwis) }), ) .filter_map(move |(time, is_primary, cwis)| { @@ -127,13 +126,16 @@ pub fn range_entity_with_primary<'a, Primary: Component + 'a, const N: usize>( // We only yield if the primary component has been updated! is_primary.then(|| { + // NOTE: safe to unwrap, set just above + let (row_id, cwi) = state[primary_col].clone().unwrap(); // shallow + let ent_view = EntityView { - // safe to unwrap, set just above - primary: state[primary_col].clone().unwrap(), // shallow + row_id, + primary: cwi, components: components .iter() .zip(state.iter().cloned() /* shallow */) - .filter_map(|(component, cwi)| cwi.map(|cwi| (*component, cwi))) + .filter_map(|(component, cwi)| cwi.map(|(_, cwi)| (*component, cwi))) .collect(), phantom: std::marker::PhantomData, }; diff --git a/crates/re_query/src/visit.rs b/crates/re_query/src/visit.rs index dccdb5050b51..86bdb178a179 100644 --- a/crates/re_query/src/visit.rs +++ b/crates/re_query/src/visit.rs @@ -9,23 +9,24 @@ //! # use re_query::EntityView; //! # use re_log_types::component_types::{ColorRGBA, InstanceKey, Point2D}; //! -//! let points = vec![ +//! let instances = InstanceKey::from_iter(0..3); +//! +//! let points = [ //! Point2D { x: 1.0, y: 2.0 }, //! Point2D { x: 3.0, y: 4.0 }, //! Point2D { x: 5.0, y: 6.0 }, //! ]; //! -//! let colors = vec![ +//! let colors = [ //! ColorRGBA(0), //! ColorRGBA(1), //! ColorRGBA(2), //! ]; //! //! let entity_view = EntityView::from_native2( -//! (None, &points), -//! (None, &colors), -//! ) -//! .unwrap(); +//! (&instances, &points), +//! (&instances, &colors), +//! ); //! //! let mut points_out = Vec::::new(); //! let mut colors_out = Vec::::new(); @@ -38,8 +39,8 @@ //! .ok() //! .unwrap(); //! -//! assert_eq!(points, points_out); -//! assert_eq!(colors, colors_out); +//! assert_eq!(points.as_slice(), points_out.as_slice()); +//! assert_eq!(colors.as_slice(), colors_out.as_slice()); //! ``` use re_log_types::{ diff --git a/crates/re_query/tests/query_tests.rs b/crates/re_query/tests/query_tests.rs index a1f28a5724eb..becd59681d7f 100644 --- a/crates/re_query/tests/query_tests.rs +++ b/crates/re_query/tests/query_tests.rs @@ -5,7 +5,7 @@ use re_log_types::{ component_types::InstanceKey, component_types::{ColorRGBA, Point2D}, datagen::build_frame_nr, - Component, DataRow, MsgId, + Component, DataRow, RowId, }; use re_query::query_entity_with_primary; @@ -18,14 +18,14 @@ fn simple_query() { // Create some points with implicit instances let points = vec![Point2D { x: 1.0, y: 2.0 }, Point2D { x: 3.0, y: 4.0 }]; - let row = DataRow::from_cells1(MsgId::random(), ent_path, timepoint, 2, points); + let row = DataRow::from_cells1(RowId::random(), ent_path, timepoint, 2, points); store.insert_row(&row).unwrap(); // Assign one of them a color with an explicit instance let color_instances = vec![InstanceKey(1)]; let colors = vec![ColorRGBA(0xff000000)]; let row = DataRow::from_cells2( - MsgId::random(), + RowId::random(), ent_path, timepoint, 1, @@ -89,13 +89,13 @@ fn timeless_query() { // Create some points with implicit instances let points = vec![Point2D { x: 1.0, y: 2.0 }, Point2D { x: 3.0, y: 4.0 }]; - let row = DataRow::from_cells1(MsgId::random(), ent_path, timepoint, 2, points); + let row = DataRow::from_cells1(RowId::random(), ent_path, timepoint, 2, points); store.insert_row(&row).unwrap(); // Assign one of them a color with an explicit instance.. timelessly! let color_instances = vec![InstanceKey(1)]; let colors = vec![ColorRGBA(0xff000000)]; - let row = DataRow::from_cells2(MsgId::random(), ent_path, [], 1, (color_instances, colors)); + let row = DataRow::from_cells2(RowId::random(), ent_path, [], 1, (color_instances, colors)); store.insert_row(&row).unwrap(); // Retrieve the view @@ -154,12 +154,12 @@ fn no_instance_join_query() { // Create some points with an implicit instance let points = vec![Point2D { x: 1.0, y: 2.0 }, Point2D { x: 3.0, y: 4.0 }]; - let row = DataRow::from_cells1(MsgId::random(), ent_path, timepoint, 2, points); + let row = DataRow::from_cells1(RowId::random(), ent_path, timepoint, 2, points); store.insert_row(&row).unwrap(); // Assign them colors with explicit instances let colors = vec![ColorRGBA(0xff000000), ColorRGBA(0x00ff0000)]; - let row = DataRow::from_cells1(MsgId::random(), ent_path, timepoint, 2, colors); + let row = DataRow::from_cells1(RowId::random(), ent_path, timepoint, 2, colors); store.insert_row(&row).unwrap(); // Retrieve the view @@ -218,7 +218,7 @@ fn missing_column_join_query() { // Create some points with an implicit instance let points = vec![Point2D { x: 1.0, y: 2.0 }, Point2D { x: 3.0, y: 4.0 }]; - let row = DataRow::from_cells1(MsgId::random(), ent_path, timepoint, 2, points); + let row = DataRow::from_cells1(RowId::random(), ent_path, timepoint, 2, points); store.insert_row(&row).unwrap(); // Retrieve the view @@ -276,14 +276,14 @@ fn splatted_query() { // Create some points with implicit instances let points = vec![Point2D { x: 1.0, y: 2.0 }, Point2D { x: 3.0, y: 4.0 }]; - let row = DataRow::from_cells1(MsgId::random(), ent_path, timepoint, 2, points); + let row = DataRow::from_cells1(RowId::random(), ent_path, timepoint, 2, points); store.insert_row(&row).unwrap(); // Assign all of them a color via splat let color_instances = vec![InstanceKey::SPLAT]; let colors = vec![ColorRGBA(0xff000000)]; let row = DataRow::from_cells2( - MsgId::random(), + RowId::random(), ent_path, timepoint, 1, diff --git a/crates/re_query/tests/range_tests.rs b/crates/re_query/tests/range_tests.rs index 26367a11787a..7b5aea29749b 100644 --- a/crates/re_query/tests/range_tests.rs +++ b/crates/re_query/tests/range_tests.rs @@ -5,7 +5,7 @@ use re_log_types::{ component_types::InstanceKey, component_types::{ColorRGBA, Point2D}, datagen::build_frame_nr, - Component, DataRow, EntityPath, MsgId, + Component, DataRow, EntityPath, RowId, }; use re_query::range_entity_with_primary; @@ -19,14 +19,14 @@ fn simple_range() { { // Create some points with implicit instances let points = vec![Point2D { x: 1.0, y: 2.0 }, Point2D { x: 3.0, y: 4.0 }]; - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), timepoint1, 2, points); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), timepoint1, 2, points); store.insert_row(&row).unwrap(); // Assign one of them a color with an explicit instance let color_instances = vec![InstanceKey(1)]; let colors = vec![ColorRGBA(0xff000000)]; let row = DataRow::from_cells2( - MsgId::random(), + RowId::random(), ent_path.clone(), timepoint1, 1, @@ -41,7 +41,7 @@ fn simple_range() { let color_instances = vec![InstanceKey(0)]; let colors = vec![ColorRGBA(0xff000000)]; let row = DataRow::from_cells2( - MsgId::random(), + RowId::random(), ent_path.clone(), timepoint2, 1, @@ -54,7 +54,7 @@ fn simple_range() { { // Create some points with implicit instances let points = vec![Point2D { x: 10.0, y: 20.0 }, Point2D { x: 30.0, y: 40.0 }]; - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), timepoint3, 2, points); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), timepoint3, 2, points); store.insert_row(&row).unwrap(); } @@ -237,18 +237,18 @@ fn timeless_range() { { // Create some points with implicit instances let points = vec![Point2D { x: 1.0, y: 2.0 }, Point2D { x: 3.0, y: 4.0 }]; - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), timepoint1, 2, &points); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), timepoint1, 2, &points); store.insert_row(&row).unwrap(); // Insert timelessly too! - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), [], 2, &points); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), [], 2, &points); store.insert_row(&row).unwrap(); // Assign one of them a color with an explicit instance let color_instances = vec![InstanceKey(1)]; let colors = vec![ColorRGBA(0xff000000)]; let row = DataRow::from_cells2( - MsgId::random(), + RowId::random(), ent_path.clone(), timepoint1, 1, @@ -258,7 +258,7 @@ fn timeless_range() { // Insert timelessly too! let row = DataRow::from_cells2( - MsgId::random(), + RowId::random(), ent_path.clone(), [], 1, @@ -273,7 +273,7 @@ fn timeless_range() { let color_instances = vec![InstanceKey(0)]; let colors = vec![ColorRGBA(0xff000000)]; let row = DataRow::from_cells2( - MsgId::random(), + RowId::random(), ent_path.clone(), timepoint2, 1, @@ -283,7 +283,7 @@ fn timeless_range() { // Insert timelessly too! let row = DataRow::from_cells2( - MsgId::random(), + RowId::random(), ent_path.clone(), timepoint2, 1, @@ -296,16 +296,16 @@ fn timeless_range() { { // Create some points with implicit instances let points = vec![Point2D { x: 10.0, y: 20.0 }, Point2D { x: 30.0, y: 40.0 }]; - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), timepoint3, 2, &points); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), timepoint3, 2, &points); store.insert_row(&row).unwrap(); // Insert timelessly too! - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), [], 2, &points); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), [], 2, &points); store.insert_row(&row).unwrap(); } // β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” - // β”‚ insert_id ┆ frame_nr ┆ entity ┆ rerun.colorrgba ┆ rerun.instance_key ┆ rerun.msg_id ┆ rerun.point2d β”‚ + // β”‚ insert_id ┆ frame_nr ┆ entity ┆ rerun.colorrgba ┆ rerun.instance_key ┆ rerun.row_id ┆ rerun.point2d β”‚ // β•žβ•β•β•β•β•β•β•β•β•β•β•β•ͺ══════════β•ͺ════════β•ͺ═════════════════β•ͺ════════════════════β•ͺ══════════════════════β•ͺ════════════════════════════║ // β”‚ 2 ┆ null ┆ point ┆ null ┆ [0, 1] ┆ [{167328063302243... ┆ [{1.0,2.0}, {3.0,4.0}] β”‚ // β”œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”Όβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ•Œβ”€ @@ -670,14 +670,14 @@ fn simple_splatted_range() { { // Create some points with implicit instances let points = vec![Point2D { x: 1.0, y: 2.0 }, Point2D { x: 3.0, y: 4.0 }]; - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), timepoint1, 2, points); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), timepoint1, 2, points); store.insert_row(&row).unwrap(); // Assign one of them a color with an explicit instance let color_instances = vec![InstanceKey(1)]; let colors = vec![ColorRGBA(0xff000000)]; let row = DataRow::from_cells2( - MsgId::random(), + RowId::random(), ent_path.clone(), timepoint1, 1, @@ -692,7 +692,7 @@ fn simple_splatted_range() { let color_instances = vec![InstanceKey::SPLAT]; let colors = vec![ColorRGBA(0x00ff0000)]; let row = DataRow::from_cells2( - MsgId::random(), + RowId::random(), ent_path.clone(), timepoint2, 1, @@ -705,7 +705,7 @@ fn simple_splatted_range() { { // Create some points with implicit instances let points = vec![Point2D { x: 10.0, y: 20.0 }, Point2D { x: 30.0, y: 40.0 }]; - let row = DataRow::from_cells1(MsgId::random(), ent_path.clone(), timepoint3, 2, points); + let row = DataRow::from_cells1(RowId::random(), ent_path.clone(), timepoint3, 2, points); store.insert_row(&row).unwrap(); } diff --git a/crates/re_query/tests/visit_tests.rs b/crates/re_query/tests/visit_tests.rs index 3ad0897e2354..7a401e95a229 100644 --- a/crates/re_query/tests/visit_tests.rs +++ b/crates/re_query/tests/visit_tests.rs @@ -4,12 +4,13 @@ use re_query::{ComponentWithInstances, EntityView}; #[test] fn basic_single_iter() { - let points = vec![ + let instance_keys = InstanceKey::from_iter(0..2); + let points = [ Point2D { x: 1.0, y: 2.0 }, // Point2D { x: 3.0, y: 4.0 }, ]; - let component = ComponentWithInstances::from_native(None, &points).unwrap(); + let component = ComponentWithInstances::from_native(&instance_keys, &points); let results = itertools::izip!( points.into_iter(), @@ -24,25 +25,26 @@ fn basic_single_iter() { #[test] fn implicit_joined_iter() { - let points = vec![ + let instance_keys = InstanceKey::from_iter(0..3); + + let points = [ Point2D { x: 1.0, y: 2.0 }, // Point2D { x: 3.0, y: 4.0 }, Point2D { x: 5.0, y: 6.0 }, ]; - let colors = vec![ + let colors = [ ColorRGBA(0), // ColorRGBA(1), ColorRGBA(2), ]; let entity_view = EntityView::from_native2( - (None, &points), // - (None, &colors), - ) - .unwrap(); + (&instance_keys, &points), // + (&instance_keys, &colors), + ); - let expected_colors = vec![ + let expected_colors = [ Some(ColorRGBA(0)), // Some(ColorRGBA(1)), Some(ColorRGBA(2)), @@ -60,27 +62,28 @@ fn implicit_joined_iter() { #[test] fn implicit_primary_joined_iter() { - let points = vec![ + let point_ids = InstanceKey::from_iter(0..3); + + let points = [ Point2D { x: 1.0, y: 2.0 }, // Point2D { x: 3.0, y: 4.0 }, Point2D { x: 5.0, y: 6.0 }, ]; - let color_ids = vec![ + let color_ids = [ InstanceKey(1), // InstanceKey(2), ]; - let colors = vec![ + let colors = [ ColorRGBA(1), // ColorRGBA(2), ]; let entity_view = EntityView::from_native2( - (None, &points), // - (Some(&color_ids), &colors), - ) - .unwrap(); + (&point_ids, &points), // + (&color_ids, &colors), + ); let expected_colors = vec![ None, // @@ -100,19 +103,21 @@ fn implicit_primary_joined_iter() { #[test] fn implicit_component_joined_iter() { - let point_ids = vec![ + let point_ids = [ InstanceKey(0), // InstanceKey(2), InstanceKey(4), ]; - let points = vec![ + let points = [ Point2D { x: 1.0, y: 2.0 }, // Point2D { x: 3.0, y: 4.0 }, Point2D { x: 5.0, y: 6.0 }, ]; - let colors = vec![ + let color_ids = InstanceKey::from_iter(0..5); + + let colors = [ ColorRGBA(0), // ColorRGBA(1), ColorRGBA(2), @@ -121,10 +126,9 @@ fn implicit_component_joined_iter() { ]; let entity_view = EntityView::from_native2( - (Some(&point_ids), &points), // - (None, &colors), - ) - .unwrap(); + (&point_ids, &points), // + (&color_ids, &colors), + ); let expected_colors = vec![ Some(ColorRGBA(0)), // @@ -175,10 +179,9 @@ fn complex_joined_iter() { ]; let entity_view = EntityView::from_native2( - (Some(&point_ids), &points), // - (Some(&color_ids), &colors), - ) - .unwrap(); + (&point_ids, &points), // + (&color_ids, &colors), + ); let expected_colors = vec![ None, @@ -199,25 +202,19 @@ fn complex_joined_iter() { #[test] fn single_visit() { - let points = vec![ + let instance_keys = InstanceKey::from_iter(0..4); + let points = [ Point2D { x: 1.0, y: 2.0 }, Point2D { x: 3.0, y: 4.0 }, Point2D { x: 5.0, y: 6.0 }, Point2D { x: 7.0, y: 8.0 }, ]; - let entity_view = EntityView::from_native((None, &points)).unwrap(); + let entity_view = EntityView::from_native((&instance_keys, &points)); let mut instance_key_out = Vec::::new(); let mut points_out = Vec::::new(); - let expected_instance = vec![ - InstanceKey(0), // - InstanceKey(1), - InstanceKey(2), - InstanceKey(3), - ]; - entity_view .visit1(|instance_key: InstanceKey, point: Point2D| { instance_key_out.push(instance_key); @@ -226,8 +223,8 @@ fn single_visit() { .ok() .unwrap(); - assert_eq!(instance_key_out, expected_instance); - assert_eq!(points, points_out); + assert_eq!(instance_key_out, instance_keys); + assert_eq!(points.as_slice(), points_out.as_slice()); } #[test] @@ -240,6 +237,8 @@ fn joint_visit() { Point2D { x: 9.0, y: 10.0 }, ]; + let point_ids = InstanceKey::from_iter(0..5); + let colors = vec![ ColorRGBA(0xff000000), // ColorRGBA(0x00ff0000), @@ -251,10 +250,9 @@ fn joint_visit() { ]; let entity_view = EntityView::from_native2( - (None, &points), // - (Some(&color_ids), &colors), - ) - .unwrap(); + (&point_ids, &points), // + (&color_ids, &colors), + ); let mut points_out = Vec::::new(); let mut colors_out = Vec::>::new(); diff --git a/crates/re_renderer/Cargo.toml b/crates/re_renderer/Cargo.toml index 96083f8281b5..03bc85390551 100644 --- a/crates/re_renderer/Cargo.toml +++ b/crates/re_renderer/Cargo.toml @@ -58,7 +58,7 @@ macaw.workspace = true ordered-float = "3.2" parking_lot.workspace = true slotmap = "1.0.6" -smallvec = "1.10" +smallvec.workspace = true static_assertions = "1.1" thiserror.workspace = true type-map = "0.5" diff --git a/crates/re_renderer/examples/2d.rs b/crates/re_renderer/examples/2d.rs index cb4c25035645..d647f09cfc35 100644 --- a/crates/re_renderer/examples/2d.rs +++ b/crates/re_renderer/examples/2d.rs @@ -1,7 +1,8 @@ use ecolor::Hsva; use re_renderer::{ renderer::{ - LineStripFlags, RectangleDrawData, TextureFilterMag, TextureFilterMin, TexturedRect, + ColormappedTexture, LineStripFlags, RectangleDrawData, RectangleOptions, TextureFilterMag, + TextureFilterMin, TexturedRect, }, resource_managers::{GpuTexture2DHandle, Texture2DCreationDesc}, view_builder::{self, Projection, TargetConfiguration, ViewBuilder}, @@ -39,7 +40,7 @@ impl framework::Example for Render2D { &mut re_ctx.gpu_resources.textures, &Texture2DCreationDesc { label: "rerun logo".into(), - data: &image_data, + data: image_data.into(), format: wgpu::TextureFormat::Rgba8UnormSrgb, width: rerun_logo.width(), height: rerun_logo.height(), @@ -67,7 +68,7 @@ impl framework::Example for Render2D { splits[0].resolution_in_pixel[1] as f32, ); - let mut line_strip_builder = LineStripSeriesBuilder::<()>::new(re_ctx); + let mut line_strip_builder = LineStripSeriesBuilder::new(re_ctx); // Blue rect outline around the bottom right quarter. { @@ -149,29 +150,26 @@ impl framework::Example for Render2D { // Moving the windows to a high dpi screen makes the second one bigger. // Also, it looks different under perspective projection. // The third point is automatic thickness which is determined by the point renderer implementation. - let mut point_cloud_builder = PointCloudBuilder::<()>::new(re_ctx); - point_cloud_builder - .batch("points") - .add_points_2d( - 4, - [ - glam::vec2(500.0, 120.0), - glam::vec2(520.0, 120.0), - glam::vec2(540.0, 120.0), - glam::vec2(560.0, 120.0), - ] - .into_iter(), - ) - .radii( - [ - Size::new_scene(4.0), - Size::new_points(4.0), - Size::AUTO, - Size::AUTO_LARGE, - ] - .into_iter(), - ) - .colors(std::iter::repeat(Color32::from_rgb(55, 180, 1)).take(4)); + let mut point_cloud_builder = PointCloudBuilder::new(re_ctx); + point_cloud_builder.batch("points").add_points_2d( + 4, + [ + glam::vec2(500.0, 120.0), + glam::vec2(520.0, 120.0), + glam::vec2(540.0, 120.0), + glam::vec2(560.0, 120.0), + ] + .into_iter(), + [ + Size::new_scene(4.0), + Size::new_points(4.0), + Size::AUTO, + Size::AUTO_LARGE, + ] + .into_iter(), + std::iter::repeat(Color32::from_rgb(55, 180, 1)), + std::iter::repeat(re_renderer::PickingLayerInstanceId::default()), + ); // Pile stuff to test for overlap handling { @@ -185,7 +183,7 @@ impl framework::Example for Render2D { } } - let line_strip_draw_data = line_strip_builder.to_draw_data(re_ctx); + let line_strip_draw_data = line_strip_builder.to_draw_data(re_ctx).unwrap(); let point_draw_data = point_cloud_builder.to_draw_data(re_ctx).unwrap(); let image_scale = 4.0; @@ -196,10 +194,14 @@ impl framework::Example for Render2D { top_left_corner_position: glam::vec3(500.0, 120.0, -0.05), extent_u: self.rerun_logo_texture_width as f32 * image_scale * glam::Vec3::X, extent_v: self.rerun_logo_texture_height as f32 * image_scale * glam::Vec3::Y, - texture: self.rerun_logo_texture.clone(), - texture_filter_magnification: TextureFilterMag::Nearest, - texture_filter_minification: TextureFilterMin::Linear, - ..Default::default() + colormapped_texture: ColormappedTexture::from_unorm_srgba( + self.rerun_logo_texture.clone(), + ), + options: RectangleOptions { + texture_filter_magnification: TextureFilterMag::Nearest, + texture_filter_minification: TextureFilterMin::Linear, + ..Default::default() + }, }, TexturedRect { top_left_corner_position: glam::vec3( @@ -210,11 +212,15 @@ impl framework::Example for Render2D { ), extent_u: self.rerun_logo_texture_width as f32 * image_scale * glam::Vec3::X, extent_v: self.rerun_logo_texture_height as f32 * image_scale * glam::Vec3::Y, - texture: self.rerun_logo_texture.clone(), - texture_filter_magnification: TextureFilterMag::Linear, - texture_filter_minification: TextureFilterMin::Linear, - depth_offset: 1, - ..Default::default() + colormapped_texture: ColormappedTexture::from_unorm_srgba( + self.rerun_logo_texture.clone(), + ), + options: RectangleOptions { + texture_filter_magnification: TextureFilterMag::Linear, + texture_filter_minification: TextureFilterMin::Linear, + depth_offset: 1, + ..Default::default() + }, }, ], ) @@ -223,25 +229,22 @@ impl framework::Example for Render2D { vec![ // 2d view to the left { - let mut view_builder = ViewBuilder::default(); - view_builder - .setup_view( - re_ctx, - TargetConfiguration { - name: "2D".into(), - resolution_in_pixel: splits[0].resolution_in_pixel, - view_from_world: macaw::IsoTransform::IDENTITY, - projection_from_view: Projection::Orthographic { - camera_mode: - view_builder::OrthographicCameraMode::TopLeftCornerAndExtendZ, - vertical_world_size: splits[0].resolution_in_pixel[1] as f32, - far_plane_distance: 1000.0, - }, - pixels_from_point, - ..Default::default() + let mut view_builder = ViewBuilder::new( + re_ctx, + TargetConfiguration { + name: "2D".into(), + resolution_in_pixel: splits[0].resolution_in_pixel, + view_from_world: macaw::IsoTransform::IDENTITY, + projection_from_view: Projection::Orthographic { + camera_mode: + view_builder::OrthographicCameraMode::TopLeftCornerAndExtendZ, + vertical_world_size: splits[0].resolution_in_pixel[1] as f32, + far_plane_distance: 1000.0, }, - ) - .unwrap(); + pixels_from_point, + ..Default::default() + }, + ); view_builder.queue_draw(&line_strip_draw_data); view_builder.queue_draw(&point_draw_data); view_builder.queue_draw(&rectangle_draw_data); @@ -256,7 +259,6 @@ impl framework::Example for Render2D { }, // and 3d view of the same scene to the right { - let mut view_builder = ViewBuilder::default(); let seconds_since_startup = time.seconds_since_startup(); let camera_rotation_center = screen_size.extend(0.0) * 0.5; let camera_position = glam::vec3( @@ -265,27 +267,25 @@ impl framework::Example for Render2D { seconds_since_startup.cos(), ) * screen_size.x.max(screen_size.y) + camera_rotation_center; - view_builder - .setup_view( - re_ctx, - view_builder::TargetConfiguration { - name: "3D".into(), - resolution_in_pixel: splits[1].resolution_in_pixel, - view_from_world: macaw::IsoTransform::look_at_rh( - camera_position, - camera_rotation_center, - glam::Vec3::Y, - ) - .unwrap(), - projection_from_view: Projection::Perspective { - vertical_fov: 70.0 * std::f32::consts::TAU / 360.0, - near_plane_distance: 0.01, - }, - pixels_from_point, - ..Default::default() + let mut view_builder = ViewBuilder::new( + re_ctx, + view_builder::TargetConfiguration { + name: "3D".into(), + resolution_in_pixel: splits[1].resolution_in_pixel, + view_from_world: macaw::IsoTransform::look_at_rh( + camera_position, + camera_rotation_center, + glam::Vec3::Y, + ) + .unwrap(), + projection_from_view: Projection::Perspective { + vertical_fov: 70.0 * std::f32::consts::TAU / 360.0, + near_plane_distance: 0.01, }, - ) - .unwrap(); + pixels_from_point, + ..Default::default() + }, + ); let command_buffer = view_builder .queue_draw(&line_strip_draw_data) .queue_draw(&point_draw_data) diff --git a/crates/re_renderer/examples/depth_cloud.rs b/crates/re_renderer/examples/depth_cloud.rs index fb6a18821ace..09314d44a65c 100644 --- a/crates/re_renderer/examples/depth_cloud.rs +++ b/crates/re_renderer/examples/depth_cloud.rs @@ -20,8 +20,8 @@ use itertools::Itertools; use macaw::IsoTransform; use re_renderer::{ renderer::{ - DepthCloud, DepthCloudDepthData, DepthCloudDrawData, DepthClouds, DrawData, - GenericSkyboxDrawData, RectangleDrawData, TexturedRect, + ColormappedTexture, DepthCloud, DepthCloudDepthData, DepthCloudDrawData, DepthClouds, + DrawData, GenericSkyboxDrawData, RectangleDrawData, RectangleOptions, TexturedRect, }, resource_managers::{GpuTexture2DHandle, Texture2DCreationDesc}, view_builder::{self, Projection, ViewBuilder}, @@ -98,38 +98,37 @@ impl RenderDepthClouds { }) .multiunzip(); - let mut builder = PointCloudBuilder::<()>::new(re_ctx); - builder - .batch("backprojected point cloud") - .add_points(num_points as _, points.into_iter()) - .colors(colors.into_iter()) - .radii(radii.into_iter()); + let mut builder = PointCloudBuilder::new(re_ctx); + builder.batch("backprojected point cloud").add_points( + num_points as _, + points.into_iter(), + radii.into_iter(), + colors.into_iter(), + std::iter::empty::(), + ); builder.to_draw_data(re_ctx).unwrap() }; - let mut view_builder = ViewBuilder::default(); - view_builder - .setup_view( - re_ctx, - view_builder::TargetConfiguration { - name: "Point Cloud".into(), - resolution_in_pixel, - view_from_world: IsoTransform::look_at_rh( - self.camera_position, - Vec3::ZERO, - Vec3::Y, - ) - .unwrap(), - projection_from_view: Projection::Perspective { - vertical_fov: 70.0 * std::f32::consts::TAU / 360.0, - near_plane_distance: 0.01, - }, - pixels_from_point, - ..Default::default() + let mut view_builder = ViewBuilder::new( + re_ctx, + view_builder::TargetConfiguration { + name: "Point Cloud".into(), + resolution_in_pixel, + view_from_world: IsoTransform::look_at_rh( + self.camera_position, + Vec3::ZERO, + Vec3::Y, + ) + .unwrap(), + projection_from_view: Projection::Perspective { + vertical_fov: 70.0 * std::f32::consts::TAU / 360.0, + near_plane_distance: 0.01, }, - ) - .unwrap(); + pixels_from_point, + ..Default::default() + }, + ); let command_buffer = view_builder .queue_draw(&GenericSkyboxDrawData::new(re_ctx)) @@ -181,36 +180,34 @@ impl RenderDepthClouds { max_depth_in_world: 5.0, depth_dimensions: depth.dimensions, depth_data: depth.data.clone(), - colormap: re_renderer::ColorMap::ColorMapTurbo, + colormap: re_renderer::Colormap::Turbo, outline_mask_id: Default::default(), + picking_object_id: Default::default(), }], radius_boost_in_ui_points_for_outlines: 2.5, }, ) .unwrap(); - let mut view_builder = ViewBuilder::default(); - view_builder - .setup_view( - re_ctx, - view_builder::TargetConfiguration { - name: "Depth Cloud".into(), - resolution_in_pixel, - view_from_world: IsoTransform::look_at_rh( - self.camera_position, - Vec3::ZERO, - Vec3::Y, - ) - .unwrap(), - projection_from_view: Projection::Perspective { - vertical_fov: 70.0 * std::f32::consts::TAU / 360.0, - near_plane_distance: 0.01, - }, - pixels_from_point, - ..Default::default() + let mut view_builder = ViewBuilder::new( + re_ctx, + view_builder::TargetConfiguration { + name: "Depth Cloud".into(), + resolution_in_pixel, + view_from_world: IsoTransform::look_at_rh( + self.camera_position, + Vec3::ZERO, + Vec3::Y, + ) + .unwrap(), + projection_from_view: Projection::Perspective { + vertical_fov: 70.0 * std::f32::consts::TAU / 360.0, + near_plane_distance: 0.01, }, - ) - .unwrap(); + pixels_from_point, + ..Default::default() + }, + ); let command_buffer = view_builder .queue_draw(&GenericSkyboxDrawData::new(re_ctx)) @@ -243,7 +240,7 @@ impl framework::Example for RenderDepthClouds { &mut re_ctx.gpu_resources.textures, &Texture2DCreationDesc { label: "albedo".into(), - data: bytemuck::cast_slice(&albedo.rgba8), + data: bytemuck::cast_slice(&albedo.rgba8).into(), format: wgpu::TextureFormat::Rgba8UnormSrgb, width: albedo.dimensions.x, height: albedo.dimensions.y, @@ -310,7 +307,7 @@ impl framework::Example for RenderDepthClouds { let world_from_model = rotation * translation_center * scale; let frame_draw_data = { - let mut builder = LineStripSeriesBuilder::<()>::new(re_ctx); + let mut builder = LineStripSeriesBuilder::new(re_ctx); { let mut line_batch = builder.batch("frame").world_from_obj(world_from_model); line_batch.add_box_outline(glam::Affine3A::from_scale_rotation_translation( @@ -319,7 +316,7 @@ impl framework::Example for RenderDepthClouds { glam::Vec3::ONE * 0.5, )); } - builder.to_draw_data(re_ctx) + builder.to_draw_data(re_ctx).unwrap() }; let image_draw_data = RectangleDrawData::new( @@ -329,12 +326,14 @@ impl framework::Example for RenderDepthClouds { .transform_point3(glam::Vec3::new(1.0, 1.0, 0.0)), extent_u: world_from_model.transform_vector3(-glam::Vec3::X), extent_v: world_from_model.transform_vector3(-glam::Vec3::Y), - texture: albedo_handle.clone(), - texture_filter_magnification: re_renderer::renderer::TextureFilterMag::Nearest, - texture_filter_minification: re_renderer::renderer::TextureFilterMin::Linear, - multiplicative_tint: Rgba::from_white_alpha(0.5), - depth_offset: -1, - ..Default::default() + colormapped_texture: ColormappedTexture::from_unorm_srgba(albedo_handle.clone()), + options: RectangleOptions { + texture_filter_magnification: re_renderer::renderer::TextureFilterMag::Nearest, + texture_filter_minification: re_renderer::renderer::TextureFilterMin::Linear, + multiplicative_tint: Rgba::from_white_alpha(0.5), + depth_offset: -1, + ..Default::default() + }, }], ) .unwrap(); diff --git a/crates/re_renderer/examples/framework.rs b/crates/re_renderer/examples/framework.rs index 65d16fa3cf30..fef6e7544b5b 100644 --- a/crates/re_renderer/examples/framework.rs +++ b/crates/re_renderer/examples/framework.rs @@ -210,10 +210,10 @@ impl Application { Event::WindowEvent { event: WindowEvent::CursorMoved { position, .. }, .. - } => self.example.on_cursor_moved(glam::uvec2( - position.x.round() as u32, - position.y.round() as u32, - )), + } => self + .example + // Don't round the position: The entire range from 0 to excluding 1 should fall into pixel coordinate 0! + .on_cursor_moved(glam::uvec2(position.x as u32, position.y as u32)), Event::WindowEvent { event: WindowEvent::ScaleFactorChanged { @@ -288,14 +288,11 @@ impl Application { }); for draw_result in &draw_results { - draw_result - .view_builder - .composite( - &self.re_ctx, - &mut composite_pass, - draw_result.target_location, - ) - .expect("Failed to composite view main surface"); + draw_result.view_builder.composite( + &self.re_ctx, + &mut composite_pass, + draw_result.target_location, + ); } }; diff --git a/crates/re_renderer/examples/multiview.rs b/crates/re_renderer/examples/multiview.rs index 24c76d4c9d5f..48b77e24388f 100644 --- a/crates/re_renderer/examples/multiview.rs +++ b/crates/re_renderer/examples/multiview.rs @@ -84,7 +84,7 @@ fn build_lines(re_ctx: &mut RenderContext, seconds_since_startup: f32) -> LineDr // Calculate some points that look nice for an animated line. let lorenz_points = lorenz_points(seconds_since_startup); - let mut builder = LineStripSeriesBuilder::<()>::new(re_ctx); + let mut builder = LineStripSeriesBuilder::new(re_ctx); { let mut batch = builder.batch("lines without transform"); @@ -125,7 +125,7 @@ fn build_lines(re_ctx: &mut RenderContext, seconds_since_startup: f32) -> LineDr .radius(Size::new_scene(0.1)) .flags(LineStripFlags::CAP_END_TRIANGLE); - builder.to_draw_data(re_ctx) + builder.to_draw_data(re_ctx).unwrap() } enum CameraControl { @@ -210,8 +210,7 @@ impl Multiview { draw_data: &D, index: u32, ) -> (ViewBuilder, wgpu::CommandBuffer) { - let mut view_builder = ViewBuilder::default(); - view_builder.setup_view(re_ctx, target_cfg).unwrap(); + let mut view_builder = ViewBuilder::new(re_ctx, target_cfg); if self .take_screenshot_next_frame_for_view @@ -316,16 +315,17 @@ impl Example for Multiview { let skybox = GenericSkyboxDrawData::new(re_ctx); let lines = build_lines(re_ctx, seconds_since_startup); - let mut builder = PointCloudBuilder::<()>::new(re_ctx); + let mut builder = PointCloudBuilder::new(re_ctx); builder .batch("Random Points") .world_from_obj(glam::Mat4::from_rotation_x(seconds_since_startup)) .add_points( self.random_points_positions.len(), self.random_points_positions.iter().cloned(), - ) - .radii(self.random_points_radii.iter().cloned()) - .colors(self.random_points_colors.iter().cloned()); + self.random_points_radii.iter().cloned(), + self.random_points_colors.iter().cloned(), + std::iter::empty::(), + ); let point_cloud = builder.to_draw_data(re_ctx).unwrap(); let meshes = build_mesh_instances( diff --git a/crates/re_renderer/examples/outlines.rs b/crates/re_renderer/examples/outlines.rs index b7b106a60426..2cdc2ed6e87c 100644 --- a/crates/re_renderer/examples/outlines.rs +++ b/crates/re_renderer/examples/outlines.rs @@ -40,8 +40,6 @@ impl framework::Example for Outlines { time: &framework::Time, pixels_from_point: f32, ) -> Vec { - let mut view_builder = ViewBuilder::default(); - if !self.is_paused { self.seconds_since_startup += time.last_frame_duration.as_secs_f32(); } @@ -49,35 +47,30 @@ impl framework::Example for Outlines { // TODO(#1426): unify camera logic between examples. let camera_position = glam::vec3(1.0, 3.5, 7.0); - view_builder - .setup_view( - re_ctx, - TargetConfiguration { - name: "OutlinesDemo".into(), - resolution_in_pixel: resolution, - view_from_world: macaw::IsoTransform::look_at_rh( - camera_position, - glam::Vec3::ZERO, - glam::Vec3::Y, - ) - .unwrap(), - projection_from_view: Projection::Perspective { - vertical_fov: 70.0 * std::f32::consts::TAU / 360.0, - near_plane_distance: 0.01, - }, - pixels_from_point, - outline_config: Some(OutlineConfig { - outline_radius_pixel: (seconds_since_startup * 2.0).sin().abs() * 10.0 - + 2.0, - color_layer_a: re_renderer::Rgba::from_rgb(1.0, 0.6, 0.0), - color_layer_b: re_renderer::Rgba::from_rgba_unmultiplied( - 0.25, 0.3, 1.0, 0.5, - ), - }), - ..Default::default() + let mut view_builder = ViewBuilder::new( + re_ctx, + TargetConfiguration { + name: "OutlinesDemo".into(), + resolution_in_pixel: resolution, + view_from_world: macaw::IsoTransform::look_at_rh( + camera_position, + glam::Vec3::ZERO, + glam::Vec3::Y, + ) + .unwrap(), + projection_from_view: Projection::Perspective { + vertical_fov: 70.0 * std::f32::consts::TAU / 360.0, + near_plane_distance: 0.01, }, - ) - .unwrap(); + pixels_from_point, + outline_config: Some(OutlineConfig { + outline_radius_pixel: (seconds_since_startup * 2.0).sin().abs() * 10.0 + 2.0, + color_layer_a: re_renderer::Rgba::from_rgb(1.0, 0.6, 0.0), + color_layer_b: re_renderer::Rgba::from_rgba_unmultiplied(0.25, 0.3, 1.0, 0.5), + }), + ..Default::default() + }, + ); let outline_mask_large_mesh = match ((seconds_since_startup * 0.5) as u64) % 5 { 0 => OutlineMaskPreference::NONE, diff --git a/crates/re_renderer/examples/picking.rs b/crates/re_renderer/examples/picking.rs index ed21f8f3875f..afcb442ffbba 100644 --- a/crates/re_renderer/examples/picking.rs +++ b/crates/re_renderer/examples/picking.rs @@ -102,49 +102,45 @@ impl framework::Example for Picking { PickingLayerProcessor::next_readback_result::<()>(re_ctx, READBACK_IDENTIFIER) { // Grab the middle pixel. usually we'd want to do something clever that snaps the the closest object of interest. - let picked_pixel = picking_result.picking_data[(picking_result.rect.extent.x / 2 - + (picking_result.rect.extent.y / 2) * picking_result.rect.extent.x) - as usize]; + let picked_id = picking_result.picked_id(picking_result.rect.extent / 2); + //let picked_position = + // picking_result.picked_world_position(picking_result.rect.extent / 2); + //dbg!(picked_position, picked_id); self.mesh_is_hovered = false; - if picked_pixel == MESH_ID { + if picked_id == MESH_ID { self.mesh_is_hovered = true; - } else if picked_pixel.object.0 != 0 - && picked_pixel.object.0 <= self.point_sets.len() as u64 + } else if picked_id.object.0 != 0 && picked_id.object.0 <= self.point_sets.len() as u64 { - let point_set = &mut self.point_sets[picked_pixel.object.0 as usize - 1]; - point_set.radii[picked_pixel.instance.0 as usize] = Size::new_scene(0.1); - point_set.colors[picked_pixel.instance.0 as usize] = Color32::DEBUG_COLOR; + let point_set = &mut self.point_sets[picked_id.object.0 as usize - 1]; + point_set.radii[picked_id.instance.0 as usize] = Size::new_scene(0.1); + point_set.colors[picked_id.instance.0 as usize] = Color32::DEBUG_COLOR; } } - let mut view_builder = ViewBuilder::default(); - // TODO(#1426): unify camera logic between examples. let camera_position = glam::vec3(1.0, 3.5, 7.0); - view_builder - .setup_view( - re_ctx, - TargetConfiguration { - name: "OutlinesDemo".into(), - resolution_in_pixel: resolution, - view_from_world: macaw::IsoTransform::look_at_rh( - camera_position, - glam::Vec3::ZERO, - glam::Vec3::Y, - ) - .unwrap(), - projection_from_view: Projection::Perspective { - vertical_fov: 70.0 * std::f32::consts::TAU / 360.0, - near_plane_distance: 0.01, - }, - pixels_from_point, - outline_config: None, - ..Default::default() + let mut view_builder = ViewBuilder::new( + re_ctx, + TargetConfiguration { + name: "OutlinesDemo".into(), + resolution_in_pixel: resolution, + view_from_world: macaw::IsoTransform::look_at_rh( + camera_position, + glam::Vec3::ZERO, + glam::Vec3::Y, + ) + .unwrap(), + projection_from_view: Projection::Perspective { + vertical_fov: 70.0 * std::f32::consts::TAU / 360.0, + near_plane_distance: 0.01, }, - ) - .unwrap(); + pixels_from_point, + outline_config: None, + ..Default::default() + }, + ); // Use an uneven number of pixels for the picking rect so that there is a clearly defined middle-pixel. // (for this sample a size of 1 would be sufficient, but for a real application you'd want to use a larger size to allow snapping) @@ -157,7 +153,7 @@ impl framework::Example for Picking { .schedule_picking_rect(re_ctx, picking_rect, READBACK_IDENTIFIER, (), false) .unwrap(); - let mut point_builder = PointCloudBuilder::<()>::new(re_ctx); + let mut point_builder = PointCloudBuilder::new(re_ctx); for (i, point_set) in self.point_sets.iter().enumerate() { point_builder .batch(format!("Random Points {i}")) @@ -165,10 +161,10 @@ impl framework::Example for Picking { .add_points( point_set.positions.len(), point_set.positions.iter().cloned(), - ) - .radii(point_set.radii.iter().cloned()) - .colors(point_set.colors.iter().cloned()) - .picking_instance_ids(point_set.picking_ids.iter().cloned()); + point_set.radii.iter().cloned(), + point_set.colors.iter().cloned(), + point_set.picking_ids.iter().cloned(), + ); } view_builder.queue_draw(&point_builder.to_draw_data(re_ctx).unwrap()); diff --git a/crates/re_renderer/shader/colormap.wgsl b/crates/re_renderer/shader/colormap.wgsl index 6c99dac97ebb..59be61afdfe4 100644 --- a/crates/re_renderer/shader/colormap.wgsl +++ b/crates/re_renderer/shader/colormap.wgsl @@ -2,29 +2,31 @@ #import <./utils/srgb.wgsl> // NOTE: Keep in sync with `colormap.rs`! -const GRAYSCALE: u32 = 0u; -const COLORMAP_TURBO: u32 = 1u; -const COLORMAP_VIRIDIS: u32 = 2u; -const COLORMAP_PLASMA: u32 = 3u; -const COLORMAP_MAGMA: u32 = 4u; -const COLORMAP_INFERNO: u32 = 5u; +const COLORMAP_GRAYSCALE: u32 = 1u; +const COLORMAP_INFERNO: u32 = 2u; +const COLORMAP_MAGMA: u32 = 3u; +const COLORMAP_PLASMA: u32 = 4u; +const COLORMAP_TURBO: u32 = 5u; +const COLORMAP_VIRIDIS: u32 = 6u; /// Returns a gamma-space sRGB in 0-1 range. /// /// The input will be saturated to [0, 1] range. fn colormap_srgb(which: u32, t: f32) -> Vec3 { - if which == COLORMAP_TURBO { + if which == COLORMAP_GRAYSCALE { + return linear_from_srgb(Vec3(t)); + } else if which == COLORMAP_INFERNO { + return colormap_inferno_srgb(t); + } else if which == COLORMAP_MAGMA { + return colormap_magma_srgb(t); + } else if which == COLORMAP_PLASMA { + return colormap_plasma_srgb(t); + } else if which == COLORMAP_TURBO { return colormap_turbo_srgb(t); } else if which == COLORMAP_VIRIDIS { return colormap_viridis_srgb(t); - } else if which == COLORMAP_PLASMA { - return colormap_plasma_srgb(t); - } else if which == COLORMAP_MAGMA { - return colormap_magma_srgb(t); - } else if which == COLORMAP_INFERNO { - return colormap_inferno_srgb(t); - } else { // assume grayscale - return linear_from_srgb(Vec3(t)); + } else { + return ERROR_RGBA.rgb; } } diff --git a/crates/re_renderer/shader/copy_texture.wgsl b/crates/re_renderer/shader/copy_texture.wgsl new file mode 100644 index 000000000000..aaa5bb4c36b6 --- /dev/null +++ b/crates/re_renderer/shader/copy_texture.wgsl @@ -0,0 +1,15 @@ +// Reads the content of a texture and writes it out as is. +// +// This is needed e.g. on WebGL to convert from a depth format to a regular color format that can be read back to the CPU. + +#import <./types.wgsl> +#import <./global_bindings.wgsl> +#import <./screen_triangle_vertex.wgsl> + +@group(1) @binding(0) +var tex: texture_2d; + +@fragment +fn main(in: FragmentInput) -> @location(0) Vec4 { + return textureSample(tex, nearest_sampler, in.texcoord); +} diff --git a/crates/re_renderer/shader/depth_cloud.wgsl b/crates/re_renderer/shader/depth_cloud.wgsl index 14caa6d8e8d4..1e7f7afdf0d7 100644 --- a/crates/re_renderer/shader/depth_cloud.wgsl +++ b/crates/re_renderer/shader/depth_cloud.wgsl @@ -28,6 +28,9 @@ struct DepthCloudInfo { /// Outline mask id for the outline mask pass. outline_mask_id: UVec2, + /// Picking object id that applies for the entire depth cloud. + picking_layer_object_id: UVec2, + /// Multiplier to get world-space depth from whatever is in the texture. world_depth_from_texture_value: f32, @@ -51,11 +54,23 @@ var depth_cloud_info: DepthCloudInfo; var depth_texture: texture_2d; struct VertexOut { - @builtin(position) pos_in_clip: Vec4, - @location(0) pos_in_world: Vec3, - @location(1) point_pos_in_world: Vec3, - @location(2) point_color: Vec4, - @location(3) point_radius: f32, + @builtin(position) + pos_in_clip: Vec4, + + @location(0) @interpolate(perspective) + pos_in_world: Vec3, + + @location(1) @interpolate(flat) + point_pos_in_world: Vec3, + + @location(2) @interpolate(flat) + point_color: Vec4, + + @location(3) @interpolate(flat) + point_radius: f32, + + @location(4) @interpolate(flat) + quad_idx: u32, }; // --- @@ -63,7 +78,7 @@ struct VertexOut { struct PointData { pos_in_world: Vec3, unresolved_radius: f32, - color: Vec4 + color: Vec4, } // Backprojects the depth texture using the intrinsics passed in the uniform buffer. @@ -75,6 +90,7 @@ fn compute_point_data(quad_idx: i32) -> PointData { let world_space_depth = depth_cloud_info.world_depth_from_texture_value * textureLoad(depth_texture, texcoords, 0).x; var data: PointData; + if 0.0 < world_space_depth && world_space_depth < f32max { // TODO(cmc): albedo textures let color = Vec4(colormap_linear(depth_cloud_info.colormap, world_space_depth / depth_cloud_info.max_depth_in_world), 1.0); @@ -113,6 +129,7 @@ fn vs_main(@builtin(vertex_index) vertex_idx: u32) -> VertexOut { var out: VertexOut; out.point_pos_in_world = point_data.pos_in_world; out.point_color = point_data.color; + out.quad_idx = u32(quad_idx); if 0.0 < point_data.unresolved_radius { // Span quad @@ -145,7 +162,7 @@ fn fs_main_picking_layer(in: VertexOut) -> @location(0) UVec4 { if coverage <= 0.5 { discard; } - return UVec4(0u, 0u, 0u, 0u); // TODO(andreas): Implement picking layer id pass-through. + return UVec4(depth_cloud_info.picking_layer_object_id, in.quad_idx, 0u); } @fragment diff --git a/crates/re_renderer/shader/lines.wgsl b/crates/re_renderer/shader/lines.wgsl index ffa727b4fdb4..c8812e8112df 100644 --- a/crates/re_renderer/shader/lines.wgsl +++ b/crates/re_renderer/shader/lines.wgsl @@ -10,6 +10,8 @@ var line_strip_texture: texture_2d; @group(1) @binding(1) var position_data_texture: texture_2d; +@group(1) @binding(2) +var picking_instance_id_texture: texture_2d; struct DrawDataUniformBuffer { radius_boost_in_ui_points: f32, @@ -19,12 +21,13 @@ struct DrawDataUniformBuffer { // if we wouldn't add padding here, which isn't available on WebGL. _padding: Vec4, }; -@group(1) @binding(2) +@group(1) @binding(3) var draw_data: DrawDataUniformBuffer; struct BatchUniformBuffer { world_from_obj: Mat4, outline_mask_ids: UVec2, + picking_layer_object_id: UVec2, }; @group(2) @binding(0) var batch: BatchUniformBuffer; @@ -32,8 +35,8 @@ var batch: BatchUniformBuffer; // textureLoad needs i32 right now, so we use that with all sizes & indices to avoid casts // https://github.com/gfx-rs/naga/issues/1997 -const LINESTRIP_TEXTURE_SIZE: i32 = 512; -const POSITION_DATA_TEXTURE_SIZE: i32 = 256; +const POSITION_TEXTURE_SIZE: i32 = 512; +const LINE_STRIP_TEXTURE_SIZE: i32 = 256; // Flags // See lines.rs#LineStripFlags @@ -69,6 +72,9 @@ struct VertexOut { @location(5) @interpolate(flat) fragment_flags: u32, + + @location(6) @interpolate(flat) + picking_instance_id: UVec2, }; struct LineStripData { @@ -76,13 +82,15 @@ struct LineStripData { unresolved_radius: f32, stippling: f32, flags: u32, + picking_instance_id: UVec2, } // Read and unpack line strip data at a given location fn read_strip_data(idx: u32) -> LineStripData { // can be u32 once https://github.com/gfx-rs/naga/issues/1997 is solved let idx = i32(idx); - var raw_data = textureLoad(position_data_texture, IVec2(idx % POSITION_DATA_TEXTURE_SIZE, idx / POSITION_DATA_TEXTURE_SIZE), 0).xy; + let coord = IVec2(idx % LINE_STRIP_TEXTURE_SIZE, idx / LINE_STRIP_TEXTURE_SIZE); + var raw_data = textureLoad(position_data_texture, coord, 0).xy; var data: LineStripData; data.color = linear_from_srgba(unpack4x8unorm_workaround(raw_data.x)); @@ -91,6 +99,7 @@ fn read_strip_data(idx: u32) -> LineStripData { data.unresolved_radius = unpack2x16float(raw_data.y).y; data.flags = ((raw_data.y >> 8u) & 0xFFu); data.stippling = f32((raw_data.y >> 16u) & 0xFFu) * (1.0 / 255.0); + data.picking_instance_id = textureLoad(picking_instance_id_texture, coord, 0).rg; return data; } @@ -103,7 +112,7 @@ struct PositionData { fn read_position_data(idx: u32) -> PositionData { // can be u32 once https://github.com/gfx-rs/naga/issues/1997 is solved let idx = i32(idx); - var raw_data = textureLoad(line_strip_texture, IVec2(idx % LINESTRIP_TEXTURE_SIZE, idx / LINESTRIP_TEXTURE_SIZE), 0); + var raw_data = textureLoad(line_strip_texture, IVec2(idx % POSITION_TEXTURE_SIZE, idx / POSITION_TEXTURE_SIZE), 0); var data: PositionData; let pos_4d = batch.world_from_obj * Vec4(raw_data.xyz, 1.0); @@ -262,6 +271,7 @@ fn vs_main(@builtin(vertex_index) vertex_idx: u32) -> VertexOut { out.active_radius = active_radius; out.fragment_flags = strip_data.flags & (NO_COLOR_GRADIENT | (u32(is_cap_triangle) * select(CAP_START_ROUND, CAP_END_ROUND, is_right_triangle))); + out.picking_instance_id = strip_data.picking_instance_id; return out; } @@ -305,7 +315,7 @@ fn fs_main_picking_layer(in: VertexOut) -> @location(0) UVec4 { if coverage < 0.5 { discard; } - return UVec4(0u, 0u, 0u, 0u); // TODO(andreas): Implement picking layer id pass-through. + return UVec4(batch.picking_layer_object_id, in.picking_instance_id); } @fragment diff --git a/crates/re_renderer/shader/rectangle.wgsl b/crates/re_renderer/shader/rectangle.wgsl index daf506956108..afd0de119ca2 100644 --- a/crates/re_renderer/shader/rectangle.wgsl +++ b/crates/re_renderer/shader/rectangle.wgsl @@ -1,14 +1,37 @@ #import <./types.wgsl> +#import <./colormap.wgsl> #import <./global_bindings.wgsl> #import <./utils/depth_offset.wgsl> +// Keep in sync with mirror in rectangle.rs + +// Which texture to read from? +const SAMPLE_TYPE_FLOAT_FILTER = 1u; +const SAMPLE_TYPE_FLOAT_NOFILTER = 2u; +const SAMPLE_TYPE_SINT_NOFILTER = 3u; +const SAMPLE_TYPE_UINT_NOFILTER = 4u; + +// How do we do colormapping? +const COLOR_MAPPER_OFF = 1u; +const COLOR_MAPPER_FUNCTION = 2u; +const COLOR_MAPPER_TEXTURE = 3u; + +const FILTER_NEAREST = 1u; +const FILTER_BILINEAR = 2u; + struct UniformBuffer { /// Top left corner position in world space. top_left_corner_position: Vec3, + /// Which colormap to use, if any + colormap_function: u32, + /// Vector that spans up the rectangle from its top left corner along the u axis of the texture. extent_u: Vec3, + /// Which texture sample to use + sample_type: u32, + /// Vector that spans up the rectangle from its top left corner along the v axis of the texture. extent_v: Vec3, @@ -18,48 +41,46 @@ struct UniformBuffer { multiplicative_tint: Vec4, outline_mask: UVec2, + + /// Range of the texture values. + /// Will be mapped to the [0, 1] range before we colormap. + range_min_max: Vec2, + + color_mapper: u32, + + /// Exponent to raise the normalized texture value. + /// Inverse brightness. + gamma: f32, + + minification_filter: u32, + magnification_filter: u32, }; @group(1) @binding(0) var rect_info: UniformBuffer; @group(1) @binding(1) -var texture: texture_2d; +var texture_sampler: sampler; @group(1) @binding(2) -var texture_sampler: sampler; +var texture_float: texture_2d; + +@group(1) @binding(3) +var texture_sint: texture_2d; + +@group(1) @binding(4) +var texture_uint: texture_2d; + +@group(1) @binding(5) +var colormap_texture: texture_2d; +@group(1) @binding(6) +var texture_float_filterable: texture_2d; struct VertexOut { @builtin(position) position: Vec4, @location(0) texcoord: Vec2, }; -@vertex -fn vs_main(@builtin(vertex_index) v_idx: u32) -> VertexOut { - let texcoord = Vec2(f32(v_idx / 2u), f32(v_idx % 2u)); - let pos = texcoord.x * rect_info.extent_u + texcoord.y * rect_info.extent_v + - rect_info.top_left_corner_position; - - var out: VertexOut; - out.position = apply_depth_offset(frame.projection_from_world * Vec4(pos, 1.0), rect_info.depth_offset); - out.texcoord = texcoord; - - return out; -} - -@fragment -fn fs_main(in: VertexOut) -> @location(0) Vec4 { - let texture_color = textureSample(texture, texture_sampler, in.texcoord); - return texture_color * rect_info.multiplicative_tint; -} - -@fragment -fn fs_main_picking_layer(in: VertexOut) -> @location(0) UVec4 { - return UVec4(0u, 0u, 0u, 0u); // TODO(andreas): Implement picking layer id pass-through. -} - -@fragment -fn fs_main_outline_mask(in: VertexOut) -> @location(0) UVec2 { - return rect_info.outline_mask; -} +// The fragment and vertex shaders are in two separate files in order +// to work around this bug: https://github.com/gfx-rs/naga/issues/1743 diff --git a/crates/re_renderer/shader/rectangle_fs.wgsl b/crates/re_renderer/shader/rectangle_fs.wgsl new file mode 100644 index 000000000000..0d1a35cad961 --- /dev/null +++ b/crates/re_renderer/shader/rectangle_fs.wgsl @@ -0,0 +1,108 @@ +#import <./rectangle.wgsl> + +fn is_magnifying(pixel_coord: Vec2) -> bool { + return fwidth(pixel_coord.x) < 1.0; +} + +fn tex_filter(pixel_coord: Vec2) -> u32 { + if is_magnifying(pixel_coord) { + return rect_info.magnification_filter; + } else { + return rect_info.minification_filter; + } +} + +@fragment +fn fs_main(in: VertexOut) -> @location(0) Vec4 { + // Sample the main texture: + var sampled_value: Vec4; + if rect_info.sample_type == SAMPLE_TYPE_FLOAT_FILTER { + // TODO(emilk): support mipmaps + sampled_value = textureSampleLevel(texture_float_filterable, texture_sampler, in.texcoord, 0.0); + } else if rect_info.sample_type == SAMPLE_TYPE_FLOAT_NOFILTER { + let coord = in.texcoord * Vec2(textureDimensions(texture_float).xy); + if tex_filter(coord) == FILTER_NEAREST { + // nearest + sampled_value = textureLoad(texture_float, IVec2(coord + vec2(0.5)), 0); + } else { + // bilinear + let v00 = textureLoad(texture_float, IVec2(coord) + IVec2(0, 0), 0); + let v01 = textureLoad(texture_float, IVec2(coord) + IVec2(0, 1), 0); + let v10 = textureLoad(texture_float, IVec2(coord) + IVec2(1, 0), 0); + let v11 = textureLoad(texture_float, IVec2(coord) + IVec2(1, 1), 0); + let top = mix(v00, v10, fract(coord.x)); + let bottom = mix(v01, v11, fract(coord.x)); + sampled_value = mix(top, bottom, fract(coord.y)); + } + } else if rect_info.sample_type == SAMPLE_TYPE_SINT_NOFILTER { + let coord = in.texcoord * Vec2(textureDimensions(texture_sint).xy); + if tex_filter(coord) == FILTER_NEAREST { + // nearest + sampled_value = Vec4(textureLoad(texture_sint, IVec2(coord + vec2(0.5)), 0)); + } else { + // bilinear + let v00 = Vec4(textureLoad(texture_sint, IVec2(coord) + IVec2(0, 0), 0)); + let v01 = Vec4(textureLoad(texture_sint, IVec2(coord) + IVec2(0, 1), 0)); + let v10 = Vec4(textureLoad(texture_sint, IVec2(coord) + IVec2(1, 0), 0)); + let v11 = Vec4(textureLoad(texture_sint, IVec2(coord) + IVec2(1, 1), 0)); + let top = mix(v00, v10, fract(coord.x)); + let bottom = mix(v01, v11, fract(coord.x)); + sampled_value = mix(top, bottom, fract(coord.y)); + } + } else if rect_info.sample_type == SAMPLE_TYPE_UINT_NOFILTER { + let coord = in.texcoord * Vec2(textureDimensions(texture_uint).xy); + if tex_filter(coord) == FILTER_NEAREST { + // nearest + sampled_value = Vec4(textureLoad(texture_uint, IVec2(coord + vec2(0.5)), 0)); + } else { + // bilinear + let v00 = Vec4(textureLoad(texture_uint, IVec2(coord) + IVec2(0, 0), 0)); + let v01 = Vec4(textureLoad(texture_uint, IVec2(coord) + IVec2(0, 1), 0)); + let v10 = Vec4(textureLoad(texture_uint, IVec2(coord) + IVec2(1, 0), 0)); + let v11 = Vec4(textureLoad(texture_uint, IVec2(coord) + IVec2(1, 1), 0)); + let top = mix(v00, v10, fract(coord.x)); + let bottom = mix(v01, v11, fract(coord.x)); + sampled_value = mix(top, bottom, fract(coord.y)); + } + } else { + return ERROR_RGBA; // unknown sample type + } + + // Normalize the sample: + let range = rect_info.range_min_max; + var normalized_value: Vec4 = (sampled_value - range.x) / (range.y - range.x); + + // Apply gamma: + normalized_value = vec4(pow(normalized_value.rgb, vec3(rect_info.gamma)), normalized_value.a); // TODO(emilk): handle premultiplied alpha + + // Apply colormap, if any: + var texture_color: Vec4; + if rect_info.color_mapper == COLOR_MAPPER_OFF { + texture_color = normalized_value; + } else if rect_info.color_mapper == COLOR_MAPPER_FUNCTION { + let rgb = colormap_linear(rect_info.colormap_function, normalized_value.r); + texture_color = Vec4(rgb, 1.0); + } else if rect_info.color_mapper == COLOR_MAPPER_TEXTURE { + let colormap_size = textureDimensions(colormap_texture).xy; + let color_index = normalized_value.r * f32(colormap_size.x * colormap_size.y); + // TODO(emilk): interpolate between neighboring colors for non-integral color indices + let color_index_i32 = i32(color_index); + let x = color_index_i32 % colormap_size.x; + let y = color_index_i32 / colormap_size.x; + texture_color = textureLoad(colormap_texture, IVec2(x, y), 0); + } else { + return ERROR_RGBA; // unknown color mapper + } + + return texture_color * rect_info.multiplicative_tint; +} + +@fragment +fn fs_main_picking_layer(in: VertexOut) -> @location(0) UVec4 { + return UVec4(0u, 0u, 0u, 0u); // TODO(andreas): Implement picking layer id pass-through. +} + +@fragment +fn fs_main_outline_mask(in: VertexOut) -> @location(0) UVec2 { + return rect_info.outline_mask; +} diff --git a/crates/re_renderer/shader/rectangle_vs.wgsl b/crates/re_renderer/shader/rectangle_vs.wgsl new file mode 100644 index 000000000000..e0758c17c23a --- /dev/null +++ b/crates/re_renderer/shader/rectangle_vs.wgsl @@ -0,0 +1,14 @@ +#import <./rectangle.wgsl> + +@vertex +fn vs_main(@builtin(vertex_index) v_idx: u32) -> VertexOut { + let texcoord = Vec2(f32(v_idx / 2u), f32(v_idx % 2u)); + let pos = texcoord.x * rect_info.extent_u + texcoord.y * rect_info.extent_v + + rect_info.top_left_corner_position; + + var out: VertexOut; + out.position = apply_depth_offset(frame.projection_from_world * Vec4(pos, 1.0), rect_info.depth_offset); + out.texcoord = texcoord; + + return out; +} diff --git a/crates/re_renderer/shader/types.wgsl b/crates/re_renderer/shader/types.wgsl index 71552d38e7dd..3323c7a6cd1f 100644 --- a/crates/re_renderer/shader/types.wgsl +++ b/crates/re_renderer/shader/types.wgsl @@ -48,3 +48,7 @@ const ONE = Vec4(1.0, 1.0, 1.0, 1.0); // fn inf() -> f32 { // return 1.0 / 0.0; // } + + +/// The color to use when we encounter an error. +const ERROR_RGBA = Vec4(1.0, 0.0, 1.0, 1.0); diff --git a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs index db9f14b0a8fa..db1a1d085a90 100644 --- a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs +++ b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs @@ -1,6 +1,6 @@ use std::{num::NonZeroU32, sync::mpsc}; -use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, TextureRowDataInfo}; +use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, Texture2DBufferInfo}; /// A sub-allocated staging buffer that can be written to. /// @@ -65,11 +65,15 @@ where /// Pushes several elements into the buffer. /// /// Panics if there are more elements than there is space in the buffer. + /// + /// Returns number of elements pushed. #[inline] - pub fn extend(&mut self, elements: impl Iterator) { + pub fn extend(&mut self, elements: impl Iterator) -> usize { + let num_written_before = self.num_written(); for element in elements { self.push(element); } + self.num_written() - num_written_before } /// Pushes a single element into the buffer and advances the write pointer. @@ -100,15 +104,13 @@ where destination: wgpu::ImageCopyTexture<'_>, copy_extent: glam::UVec2, ) { - let bytes_per_row = TextureRowDataInfo::new(destination.texture.format(), copy_extent.x) - .bytes_per_row_padded; + let buffer_info = Texture2DBufferInfo::new(destination.texture.format(), copy_extent); // Validate that we stay within the written part of the slice (wgpu can't fully know our intention here, so we have to check). // We go one step further and require the size to be exactly equal - it's too unlikely that you wrote more than is needed! // (and if you did you probably have regrets anyways!) - let required_buffer_size = bytes_per_row * copy_extent.y; debug_assert_eq!( - required_buffer_size as usize, + buffer_info.buffer_size_padded as usize, self.num_written() * std::mem::size_of::() ); @@ -117,7 +119,7 @@ where buffer: &self.chunk_buffer, layout: wgpu::ImageDataLayout { offset: self.byte_offset_in_chunk_buffer, - bytes_per_row: NonZeroU32::new(bytes_per_row), + bytes_per_row: NonZeroU32::new(buffer_info.bytes_per_row_padded), rows_per_image: None, }, }, diff --git a/crates/re_renderer/src/allocator/gpu_readback_belt.rs b/crates/re_renderer/src/allocator/gpu_readback_belt.rs index 09fcdd981054..8e5f413743e9 100644 --- a/crates/re_renderer/src/allocator/gpu_readback_belt.rs +++ b/crates/re_renderer/src/allocator/gpu_readback_belt.rs @@ -1,6 +1,6 @@ use std::{num::NonZeroU32, ops::Range, sync::mpsc}; -use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, TextureRowDataInfo}; +use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, Texture2DBufferInfo}; /// Identifier used to identify a buffer upon retrieval of the data. /// @@ -61,13 +61,11 @@ impl GpuReadbackBuffer { source.texture.format().describe().block_size as u64, ); - let bytes_per_row = TextureRowDataInfo::new(source.texture.format(), copy_extents.x) - .bytes_per_row_padded; - let num_bytes = bytes_per_row * copy_extents.y; + let buffer_info = Texture2DBufferInfo::new(source.texture.format(), *copy_extents); // Validate that stay within the slice (wgpu can't fully know our intention here, so we have to check). debug_assert!( - (num_bytes as u64) <= self.range_in_chunk.end - start_offset, + buffer_info.buffer_size_padded <= self.range_in_chunk.end - start_offset, "Texture data is too large to fit into the readback buffer!" ); @@ -77,7 +75,7 @@ impl GpuReadbackBuffer { buffer: &self.chunk_buffer, layout: wgpu::ImageDataLayout { offset: start_offset, - bytes_per_row: NonZeroU32::new(bytes_per_row), + bytes_per_row: NonZeroU32::new(buffer_info.bytes_per_row_padded), rows_per_image: None, }, }, @@ -88,7 +86,8 @@ impl GpuReadbackBuffer { }, ); - self.range_in_chunk = start_offset..self.range_in_chunk.end; + self.range_in_chunk = + (start_offset + buffer_info.buffer_size_padded)..self.range_in_chunk.end; } } diff --git a/crates/re_renderer/src/colormap.rs b/crates/re_renderer/src/colormap.rs index 4625b4939d62..15cd98d5dc14 100644 --- a/crates/re_renderer/src/colormap.rs +++ b/crates/re_renderer/src/colormap.rs @@ -5,25 +5,53 @@ use glam::{Vec2, Vec3A, Vec4, Vec4Swizzles}; // --- // NOTE: Keep in sync with `colormap.wgsl`! -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] #[repr(u32)] -pub enum ColorMap { - Grayscale = 0, - ColorMapTurbo = 1, - ColorMapViridis = 2, - ColorMapPlasma = 3, - ColorMapMagma = 4, - ColorMapInferno = 5, +pub enum Colormap { + // Reserve 0 for "disabled" + /// Perceptually even + #[default] + Grayscale = 1, + Inferno = 2, + Magma = 3, + Plasma = 4, + Turbo = 5, + Viridis = 6, } -pub fn colormap_srgb(which: ColorMap, t: f32) -> [u8; 4] { +impl Colormap { + pub const ALL: [Self; 6] = [ + Self::Grayscale, + Self::Inferno, + Self::Magma, + Self::Plasma, + Self::Turbo, + Self::Viridis, + ]; +} + +impl std::fmt::Display for Colormap { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Colormap::Grayscale => write!(f, "Grayscale"), + Colormap::Inferno => write!(f, "Inferno"), + Colormap::Magma => write!(f, "Magma"), + Colormap::Plasma => write!(f, "Plasma"), + Colormap::Turbo => write!(f, "Turbo"), + Colormap::Viridis => write!(f, "Viridis"), + } + } +} + +pub fn colormap_srgb(which: Colormap, t: f32) -> [u8; 4] { match which { - ColorMap::Grayscale => grayscale_srgb(t), - ColorMap::ColorMapTurbo => colormap_turbo_srgb(t), - ColorMap::ColorMapViridis => colormap_viridis_srgb(t), - ColorMap::ColorMapPlasma => colormap_plasma_srgb(t), - ColorMap::ColorMapMagma => colormap_magma_srgb(t), - ColorMap::ColorMapInferno => colormap_inferno_srgb(t), + Colormap::Grayscale => grayscale_srgb(t), + Colormap::Turbo => colormap_turbo_srgb(t), + Colormap::Viridis => colormap_viridis_srgb(t), + Colormap::Plasma => colormap_plasma_srgb(t), + Colormap::Magma => colormap_magma_srgb(t), + Colormap::Inferno => colormap_inferno_srgb(t), } } diff --git a/crates/re_renderer/src/config.rs b/crates/re_renderer/src/config.rs index d168ab274373..91e0315d401d 100644 --- a/crates/re_renderer/src/config.rs +++ b/crates/re_renderer/src/config.rs @@ -2,13 +2,16 @@ /// /// To reduce complexity, we don't do fine-grained feature checks, /// but instead support set of features, each a superset of the next. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum HardwareTier { - /// For WebGL and native OpenGL. Maintains strict WebGL capability. - Basic, + /// Limited feature support as provided by WebGL and native GLES2/OpenGL3(ish). + Gles, - /// Run natively with Vulkan/Metal but don't demand anything that isn't widely available. - Native, + /// Full support of WebGPU spec without additional feature requirements. + /// + /// Expecting to run either in a stable WebGPU implementation. + /// I.e. either natively with Vulkan/Metal or in a browser with WebGPU support. + FullWebGpuSupport, // Run natively with Vulkan/Metal and require additional features. //HighEnd } @@ -17,8 +20,16 @@ impl HardwareTier { /// Whether the current hardware tier supports sampling from textures with a sample count higher than 1. pub fn support_sampling_msaa_texture(&self) -> bool { match self { - HardwareTier::Basic => false, - HardwareTier::Native => true, + HardwareTier::Gles => false, + HardwareTier::FullWebGpuSupport => true, + } + } + + /// Whether the current hardware tier supports sampling from textures with a sample count higher than 1. + pub fn support_depth_readback(&self) -> bool { + match self { + HardwareTier::Gles => false, + HardwareTier::FullWebGpuSupport => true, } } } @@ -27,9 +38,9 @@ impl Default for HardwareTier { fn default() -> Self { // Use "Basic" tier for actual web but also if someone forces the GL backend! if supported_backends() == wgpu::Backends::GL { - HardwareTier::Basic + HardwareTier::Gles } else { - HardwareTier::Native + HardwareTier::FullWebGpuSupport } } } @@ -63,7 +74,11 @@ impl HardwareTier { /// Downlevel features required by the given tier. pub fn required_downlevel_capabilities(self) -> wgpu::DownlevelCapabilities { wgpu::DownlevelCapabilities { - flags: wgpu::DownlevelFlags::empty(), + flags: match self { + HardwareTier::Gles => wgpu::DownlevelFlags::empty(), + // Require fully WebGPU compliance for the native tier. + HardwareTier::FullWebGpuSupport => wgpu::DownlevelFlags::all(), + }, limits: Default::default(), // unused so far both here and in wgpu shader_model: wgpu::ShaderModel::Sm4, } diff --git a/crates/re_renderer/src/context.rs b/crates/re_renderer/src/context.rs index e25aa5fc9b6a..701d1800d561 100644 --- a/crates/re_renderer/src/context.rs +++ b/crates/re_renderer/src/context.rs @@ -5,7 +5,7 @@ use type_map::concurrent::{self, TypeMap}; use crate::{ allocator::{CpuWriteGpuReadBelt, GpuReadbackBelt}, - config::RenderContextConfig, + config::{HardwareTier, RenderContextConfig}, global_bindings::GlobalBindings, renderer::Renderer, resource_managers::{MeshManager, TextureManager2D}, @@ -78,14 +78,11 @@ impl Renderers { impl RenderContext { /// Chunk size for our cpu->gpu buffer manager. /// - /// For native: 32MiB chunk size (as big as a for instance a 2048x1024 float4 texture) - /// For web (memory constraint!): 8MiB - #[cfg(not(target_arch = "wasm32"))] + /// 32MiB chunk size (as big as a for instance a 2048x1024 float4 texture) + /// (it's tempting to use something smaller on Web, but this may just cause more + /// buffers to be allocated the moment we want to upload a bigger chunk) const CPU_WRITE_GPU_READ_BELT_DEFAULT_CHUNK_SIZE: Option = wgpu::BufferSize::new(1024 * 1024 * 32); - #[cfg(target_arch = "wasm32")] - const CPU_WRITE_GPU_READ_BELT_DEFAULT_CHUNK_SIZE: Option = - wgpu::BufferSize::new(1024 * 1024 * 8); /// Chunk size for our gpu->cpu buffer manager. /// @@ -210,14 +207,26 @@ impl RenderContext { fn poll_device(&mut self) { crate::profile_function!(); - // Browsers don't let us wait for GPU work via `poll`. - // * WebGPU: `poll` is a no-op as the spec doesn't specify it at all. + // Browsers don't let us wait for GPU work via `poll`: + // + // * WebGPU: `poll` is a no-op as the spec doesn't specify it at all. Calling it doesn't hurt though. + // // * WebGL: Internal timeout can't go above a browser specific value. // Since wgpu ran into issues in the past with some browsers returning errors, // it uses a timeout of zero and ignores errors there. - // TODO(andreas): That's not the only thing that's weird with `maintain` in general. - // See https://github.com/gfx-rs/wgpu/issues/3601 - if cfg!(target_arch = "wasm32") { + // + // This causes unused buffers to be freed immediately, which is wrong but also doesn't hurt + // since WebGL doesn't care about freeing buffers/textures that are still in use. + // Meaning, that from our POV we're actually freeing cpu memory that we wanted to free anyways. + // *More importantly this means that we get buffers from the staging belts back earlier!* + // Therefore, we just always "block" instead on WebGL to free as early as possible, + // knowing that we're not _actually_ blocking. + // + // For more details check https://github.com/gfx-rs/wgpu/issues/3601 + if cfg!(target_arch = "wasm32") + && self.shared_renderer_data.config.hardware_tier == HardwareTier::Gles + { + self.device.poll(wgpu::Maintain::Wait); return; } diff --git a/crates/re_renderer/src/draw_phases/picking_layer.rs b/crates/re_renderer/src/draw_phases/picking_layer.rs index 8a52147077ba..dc5cf38f033f 100644 --- a/crates/re_renderer/src/draw_phases/picking_layer.rs +++ b/crates/re_renderer/src/draw_phases/picking_layer.rs @@ -12,11 +12,18 @@ use crate::{ allocator::create_and_fill_uniform_buffer, global_bindings::FrameUniformBuffer, + include_shader_module, view_builder::ViewBuilder, - wgpu_resources::{GpuBindGroup, GpuTexture, TextureDesc, TextureRowDataInfo}, + wgpu_resources::{ + BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuRenderPipelineHandle, + GpuTexture, GpuTextureHandle, PipelineLayoutDesc, PoolError, RenderPipelineDesc, + Texture2DBufferInfo, TextureDesc, WgpuResourcePools, + }, DebugLabel, GpuReadbackBuffer, GpuReadbackIdentifier, IntRect, RenderContext, }; +use smallvec::smallvec; + /// GPU retrieved & processed picking data result. pub struct PickingResult { /// User data supplied on picking request. @@ -26,17 +33,58 @@ pub struct PickingResult { /// Describes the area of the picking layer that was read back. pub rect: IntRect, - /// Picking data for the requested rectangle. + /// Picking id data for the requested rectangle. + /// + /// GPU internal row padding has already been removed from this buffer. + /// Pixel data is stored in the normal fashion - row wise, left to right, top to bottom. + pub picking_id_data: Vec, + + /// Picking depth data for the requested rectangle. + /// + /// Use [`PickingResult::picked_world_position`] for easy interpretation of the data. + /// + /// GPU internal row padding has already been removed from this buffer. + /// Pixel data is stored in the normal fashion - row wise, left to right, top to bottom. + pub picking_depth_data: Vec, + + /// Transforms a NDC position on the picking rect to a world position. + world_from_cropped_projection: glam::Mat4, +} + +impl PickingResult { + /// Returns the picked world position. + /// + /// Panics if the position is outside of the picking rect. /// - /// GPU internal row padding has already been removed. - /// Data is stored row wise, left to right, top to bottom. - pub picking_data: Vec, + /// Keep in mind that the picked position may be (negative) infinity if nothing was picked. + #[inline] + pub fn picked_world_position(&self, pos_on_picking_rect: glam::UVec2) -> glam::Vec3 { + let raw_depth = self.picking_depth_data + [(pos_on_picking_rect.y * self.rect.width() + pos_on_picking_rect.x) as usize]; + + self.world_from_cropped_projection.project_point3( + pixel_coord_to_ndc(pos_on_picking_rect.as_vec2(), self.rect.extent.as_vec2()) + .extend(raw_depth), + ) + } + + /// Returns the picked picking id. + /// + /// Panics if the position is outside of the picking rect. + #[inline] + pub fn picked_id(&self, pos_on_picking_rect: glam::UVec2) -> PickingLayerId { + self.picking_id_data + [(pos_on_picking_rect.y * self.rect.width() + pos_on_picking_rect.x) as usize] + } } /// Type used as user data on the gpu readback belt. struct ReadbackBeltMetadata { picking_rect: IntRect, + world_from_cropped_projection: glam::Mat4, user_data: T, + + depth_readback_workaround_in_use: bool, } /// The first 64bit of the picking layer. @@ -76,22 +124,32 @@ impl From for [u32; 4] { } } +/// Converts a pixel coordinate to normalized device coordinates. +pub fn pixel_coord_to_ndc(coord: glam::Vec2, target_resolution: glam::Vec2) -> glam::Vec2 { + glam::vec2( + coord.x / target_resolution.x * 2.0 - 1.0, + 1.0 - coord.y / target_resolution.y * 2.0, + ) +} + /// Manages the rendering of the picking layer pass, its render targets & readback buffer. /// /// The view builder creates this for every frame that requests a picking result. pub struct PickingLayerProcessor { pub picking_target: GpuTexture, - picking_depth: GpuTexture, + picking_depth_target: GpuTexture, readback_buffer: GpuReadbackBuffer, bind_group_0: GpuBindGroup, + + depth_readback_workaround: Option, } impl PickingLayerProcessor { /// The texture format used for the picking layer. pub const PICKING_LAYER_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba32Uint; - pub const PICKING_LAYER_DEPTH_FORMAT: wgpu::TextureFormat = - ViewBuilder::MAIN_TARGET_DEPTH_FORMAT; + /// The depth format used for the picking layer - f32 makes it easiest to deal with retrieved depth and is guaranteed to be copyable. + pub const PICKING_LAYER_DEPTH_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Depth32Float; pub const PICKING_LAYER_MSAA_STATE: wgpu::MultisampleState = wgpu::MultisampleState { count: 1, @@ -122,19 +180,6 @@ impl PickingLayerProcessor { readback_identifier: GpuReadbackIdentifier, readback_user_data: T, ) -> Self { - let row_info = TextureRowDataInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.width()); - let buffer_size = row_info.bytes_per_row_padded * picking_rect.height(); - let readback_buffer = ctx.gpu_readback_belt.lock().allocate( - &ctx.device, - &ctx.gpu_resources.buffers, - buffer_size as u64, - readback_identifier, - Box::new(ReadbackBeltMetadata { - picking_rect, - user_data: readback_user_data, - }), - ); - let mut picking_target_usage = wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC; picking_target_usage.set( @@ -154,44 +199,57 @@ impl PickingLayerProcessor { usage: picking_target_usage, }, ); - let picking_depth = ctx.gpu_resources.textures.alloc( + + let direct_depth_readback = ctx + .shared_renderer_data + .config + .hardware_tier + .support_depth_readback(); + + let picking_depth_target = ctx.gpu_resources.textures.alloc( &ctx.device, &TextureDesc { - label: format!("{view_name} - picking_layer depth").into(), + label: format!("{view_name} - picking_layer depth target").into(), format: Self::PICKING_LAYER_DEPTH_FORMAT, - usage: wgpu::TextureUsages::RENDER_ATTACHMENT, + usage: if direct_depth_readback { + wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC + } else { + wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::TEXTURE_BINDING + }, ..picking_target.creation_desc }, ); - let rect_min = picking_rect.top_left_corner.as_vec2(); + let depth_readback_workaround = (!direct_depth_readback).then(|| { + DepthReadbackWorkaround::new(ctx, picking_rect.extent, picking_depth_target.handle) + }); + + let rect_min = picking_rect.left_top.as_vec2(); let rect_max = rect_min + picking_rect.extent.as_vec2(); let screen_resolution = screen_resolution.as_vec2(); - let rect_min_ndc = glam::vec2( - rect_min.x / screen_resolution.x * 2.0 - 1.0, - 1.0 - rect_max.y / screen_resolution.y * 2.0, - ); - let rect_max_ndc = glam::vec2( - rect_max.x / screen_resolution.x * 2.0 - 1.0, - 1.0 - rect_min.y / screen_resolution.y * 2.0, - ); - let rect_center_ndc = (rect_min_ndc + rect_max_ndc) * 0.5; - let cropped_projection_from_projection = - glam::Mat4::from_scale(2.0 / (rect_max_ndc - rect_min_ndc).extend(1.0)) - * glam::Mat4::from_translation(-rect_center_ndc.extend(0.0)); + // y axis is flipped in NDC, therefore we need to flip the y axis of the rect. + let rect_min_ndc = + pixel_coord_to_ndc(glam::vec2(rect_min.x, rect_max.y), screen_resolution); + let rect_max_ndc = + pixel_coord_to_ndc(glam::vec2(rect_max.x, rect_min.y), screen_resolution); + let scale = 2.0 / (rect_max_ndc - rect_min_ndc); + let translation = -0.5 * (rect_min_ndc + rect_max_ndc); + let cropped_projection_from_projection = glam::Mat4::from_scale(scale.extend(1.0)) + * glam::Mat4::from_translation(translation.extend(0.0)); // Setup frame uniform buffer let previous_projection_from_world: glam::Mat4 = frame_uniform_buffer_content.projection_from_world.into(); + let cropped_projection_from_world = + cropped_projection_from_projection * previous_projection_from_world; let previous_projection_from_view: glam::Mat4 = frame_uniform_buffer_content.projection_from_view.into(); + let cropped_projection_from_view = + cropped_projection_from_projection * previous_projection_from_view; + let frame_uniform_buffer_content = FrameUniformBuffer { - projection_from_world: (cropped_projection_from_projection - * previous_projection_from_world) - .into(), - projection_from_view: (cropped_projection_from_projection - * previous_projection_from_view) - .into(), + projection_from_world: cropped_projection_from_world.into(), + projection_from_view: cropped_projection_from_view.into(), ..*frame_uniform_buffer_content }; @@ -207,11 +265,44 @@ impl PickingLayerProcessor { frame_uniform_buffer, ); + let row_info_id = Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent); + let row_info_depth = Texture2DBufferInfo::new( + if direct_depth_readback { + Self::PICKING_LAYER_DEPTH_FORMAT + } else { + DepthReadbackWorkaround::READBACK_FORMAT + }, + picking_rect.extent, + ); + + // Offset of the depth buffer in the readback buffer needs to be aligned to size of a depth pixel. + // This is "trivially true" if the size of the depth format is a multiple of the size of the id format. + debug_assert!( + Self::PICKING_LAYER_FORMAT.describe().block_size + % Self::PICKING_LAYER_DEPTH_FORMAT.describe().block_size + == 0 + ); + let buffer_size = row_info_id.buffer_size_padded + row_info_depth.buffer_size_padded; + + let readback_buffer = ctx.gpu_readback_belt.lock().allocate( + &ctx.device, + &ctx.gpu_resources.buffers, + buffer_size, + readback_identifier, + Box::new(ReadbackBeltMetadata { + picking_rect, + user_data: readback_user_data, + world_from_cropped_projection: cropped_projection_from_world.inverse(), + depth_readback_workaround_in_use: depth_readback_workaround.is_some(), + }), + ); + PickingLayerProcessor { bind_group_0, picking_target, - picking_depth, + picking_depth_target, readback_buffer, + depth_readback_workaround, } } @@ -233,10 +324,10 @@ impl PickingLayerProcessor { }, })], depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { - view: &self.picking_depth.default_view, + view: &self.picking_depth_target.default_view, depth_ops: Some(wgpu::Operations { load: ViewBuilder::DEFAULT_DEPTH_CLEAR, - store: false, + store: true, // Store for readback! }), stencil_ops: None, }), @@ -247,20 +338,49 @@ impl PickingLayerProcessor { pass } - pub fn end_render_pass(self, encoder: &mut wgpu::CommandEncoder) { - self.readback_buffer.read_texture2d( + pub fn end_render_pass( + self, + encoder: &mut wgpu::CommandEncoder, + pools: &WgpuResourcePools, + ) -> Result<(), PoolError> { + let extent = glam::uvec2( + self.picking_target.texture.width(), + self.picking_target.texture.height(), + ); + + let readable_depth_texture = if let Some(depth_copy_workaround) = + self.depth_readback_workaround.as_ref() + { + depth_copy_workaround.copy_to_readable_texture(encoder, pools, &self.bind_group_0)? + } else { + &self.picking_depth_target + }; + + self.readback_buffer.read_multiple_texture2d( encoder, - wgpu::ImageCopyTexture { - texture: &self.picking_target.texture, - mip_level: 0, - origin: wgpu::Origin3d::ZERO, - aspect: wgpu::TextureAspect::All, - }, - glam::uvec2( - self.picking_target.texture.width(), - self.picking_target.texture.height(), - ), + &[ + ( + wgpu::ImageCopyTexture { + texture: &self.picking_target.texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + extent, + ), + ( + wgpu::ImageCopyTexture { + texture: &readable_depth_texture.texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + extent, + ), + ], ); + + Ok(()) } /// Returns the oldest received picking results for a given identifier and user data type. @@ -279,38 +399,202 @@ impl PickingLayerProcessor { ctx.gpu_readback_belt .lock() .readback_data::>(identifier, |data, metadata| { - // Due to https://github.com/gfx-rs/wgpu/issues/3508 the data might be completely unaligned, - // so much, that we can't interpret it just as `PickingLayerId`. - // Therefore, we have to do a copy of the data regardless. - let row_info = TextureRowDataInfo::new( + // Assert that our texture data reinterpretation works out from a pixel size point of view. + debug_assert_eq!( + Self::PICKING_LAYER_DEPTH_FORMAT.describe().block_size as usize, + std::mem::size_of::() + ); + debug_assert_eq!( + Self::PICKING_LAYER_FORMAT.describe().block_size as usize, + std::mem::size_of::() + ); + + let buffer_info_id = Texture2DBufferInfo::new( Self::PICKING_LAYER_FORMAT, - metadata.picking_rect.extent.x, + metadata.picking_rect.extent, ); + let buffer_info_depth = Texture2DBufferInfo::new( + if metadata.depth_readback_workaround_in_use { + DepthReadbackWorkaround::READBACK_FORMAT + } else { + Self::PICKING_LAYER_DEPTH_FORMAT + }, + metadata.picking_rect.extent, + ); + + let picking_id_data = buffer_info_id + .remove_padding_and_convert(&data[..buffer_info_id.buffer_size_padded as _]); + let mut picking_depth_data = buffer_info_depth + .remove_padding_and_convert(&data[buffer_info_id.buffer_size_padded as _..]); - // Copies need to use [u8] because of aforementioned alignment issues. - let mut picking_data = vec![ - PickingLayerId::default(); - (metadata.picking_rect.extent.x * metadata.picking_rect.extent.y) - as usize - ]; - let picking_data_as_u8 = bytemuck::cast_slice_mut(&mut picking_data); - for row in 0..metadata.picking_rect.extent.y { - let offset_padded = (row_info.bytes_per_row_padded * row) as usize; - let offset_unpadded = (row_info.bytes_per_row_unpadded * row) as usize; - picking_data_as_u8[offset_unpadded - ..(offset_unpadded + row_info.bytes_per_row_unpadded as usize)] - .copy_from_slice( - &data[offset_padded - ..(offset_padded + row_info.bytes_per_row_unpadded as usize)], - ); + if metadata.depth_readback_workaround_in_use { + // Can't read back depth textures & can't read back R32Float textures either! + // See https://github.com/gfx-rs/wgpu/issues/3644 + debug_assert_eq!( + DepthReadbackWorkaround::READBACK_FORMAT + .describe() + .block_size as usize, + std::mem::size_of::() * 4 + ); + picking_depth_data = picking_depth_data.into_iter().step_by(4).collect(); } result = Some(PickingResult { - picking_data, + picking_id_data, + picking_depth_data, user_data: metadata.user_data, rect: metadata.picking_rect, + world_from_cropped_projection: metadata.world_from_cropped_projection, }); }); result } } + +/// Utility for copying a depth texture when it can't be read-back directly to a [`wgpu::TextureFormat::R32Float`] which is readable texture. +/// +/// Implementation note: +/// This is a plain & simple "sample in shader and write to texture" utility. +/// It might be worth abstracting this further into a general purpose operator. +/// There is not much in here that is specific to the depth usecase! +struct DepthReadbackWorkaround { + render_pipeline: GpuRenderPipelineHandle, + bind_group: GpuBindGroup, + readable_texture: GpuTexture, +} + +impl DepthReadbackWorkaround { + /// There's two layers of workarounds here: + /// * WebGL (via spec) not being able to read back depth textures + /// * unclear behavior for any readback that isn't RGBA + /// Furthermore, integer textures also seemed to be problematic, + /// but it seems to work fine for [`wgpu::TextureFormat::Rgba32Uint`] which we use for our picking ID + /// Details see [wgpu#3644](https://github.com/gfx-rs/wgpu/issues/3644) + const READBACK_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba32Float; + + fn new( + ctx: &mut RenderContext, + extent: glam::UVec2, + depth_target_handle: GpuTextureHandle, + ) -> DepthReadbackWorkaround { + let readable_texture = ctx.gpu_resources.textures.alloc( + &ctx.device, + &TextureDesc { + label: "DepthCopyWorkaround::readable_texture".into(), + format: Self::READBACK_FORMAT, + usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::RENDER_ATTACHMENT, + size: wgpu::Extent3d { + width: extent.x, + height: extent.y, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + }, + ); + + let bind_group_layout = ctx.gpu_resources.bind_group_layouts.get_or_create( + &ctx.device, + &BindGroupLayoutDesc { + label: "DepthCopyWorkaround::bind_group_layout".into(), + entries: vec![wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }], + }, + ); + + let bind_group = ctx.gpu_resources.bind_groups.alloc( + &ctx.device, + &ctx.gpu_resources, + &BindGroupDesc { + label: "DepthCopyWorkaround::bind_group".into(), + entries: smallvec![BindGroupEntry::DefaultTextureView(depth_target_handle)], + layout: bind_group_layout, + }, + ); + + let render_pipeline = ctx.gpu_resources.render_pipelines.get_or_create( + &ctx.device, + &RenderPipelineDesc { + label: "DepthCopyWorkaround::render_pipeline".into(), + pipeline_layout: ctx.gpu_resources.pipeline_layouts.get_or_create( + &ctx.device, + &PipelineLayoutDesc { + label: "DepthCopyWorkaround::render_pipeline".into(), + entries: vec![ + ctx.shared_renderer_data.global_bindings.layout, + bind_group_layout, + ], + }, + &ctx.gpu_resources.bind_group_layouts, + ), + vertex_entrypoint: "main".into(), + vertex_handle: ctx.gpu_resources.shader_modules.get_or_create( + &ctx.device, + &mut ctx.resolver, + &include_shader_module!("../../shader/screen_triangle.wgsl"), + ), + fragment_entrypoint: "main".into(), + fragment_handle: ctx.gpu_resources.shader_modules.get_or_create( + &ctx.device, + &mut ctx.resolver, + &include_shader_module!("../../shader/copy_texture.wgsl"), + ), + vertex_buffers: smallvec![], + render_targets: smallvec![Some(readable_texture.texture.format().into())], + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleStrip, + cull_mode: None, + ..Default::default() + }, + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + }, + &ctx.gpu_resources.pipeline_layouts, + &ctx.gpu_resources.shader_modules, + ); + + Self { + render_pipeline, + bind_group, + readable_texture, + } + } + + fn copy_to_readable_texture( + &self, + encoder: &mut wgpu::CommandEncoder, + pools: &WgpuResourcePools, + global_binding_bind_group: &GpuBindGroup, + ) -> Result<&GpuTexture, PoolError> { + // Copy depth texture to a readable (color) texture with a screen filling triangle. + let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: DebugLabel::from("Depth copy workaround").get(), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &self.readable_texture.default_view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT), + store: true, // Store for readback! + }, + })], + depth_stencil_attachment: None, + }); + + let pipeline = pools.render_pipelines.get_resource(self.render_pipeline)?; + pass.set_pipeline(pipeline); + pass.set_bind_group(0, global_binding_bind_group, &[]); + pass.set_bind_group(1, &self.bind_group, &[]); + pass.draw(0..3, 0..1); + + Ok(&self.readable_texture) + } +} diff --git a/crates/re_renderer/src/draw_phases/screenshot.rs b/crates/re_renderer/src/draw_phases/screenshot.rs index 875284986c90..68c05b3b545c 100644 --- a/crates/re_renderer/src/draw_phases/screenshot.rs +++ b/crates/re_renderer/src/draw_phases/screenshot.rs @@ -11,7 +11,7 @@ //! Or alternatively try to render the images in several tiles πŸ€”. In any case this would greatly improve quality! use crate::{ - wgpu_resources::{GpuTexture, TextureDesc, TextureRowDataInfo}, + wgpu_resources::{GpuTexture, Texture2DBufferInfo, TextureDesc}, DebugLabel, GpuReadbackBuffer, GpuReadbackIdentifier, RenderContext, }; @@ -37,12 +37,11 @@ impl ScreenshotProcessor { readback_identifier: GpuReadbackIdentifier, readback_user_data: T, ) -> Self { - let row_info = TextureRowDataInfo::new(Self::SCREENSHOT_COLOR_FORMAT, resolution.x); - let buffer_size = row_info.bytes_per_row_padded * resolution.y; + let buffer_info = Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, resolution); let screenshot_readback_buffer = ctx.gpu_readback_belt.lock().allocate( &ctx.device, &ctx.gpu_resources.buffers, - buffer_size as u64, + buffer_info.buffer_size_padded, readback_identifier, Box::new(ReadbackBeltMetadata { extent: resolution, @@ -130,9 +129,9 @@ impl ScreenshotProcessor { .lock() .readback_data::>(identifier, |data: &[u8], metadata| { screenshot_was_available = Some(()); - let texture_row_info = - TextureRowDataInfo::new(Self::SCREENSHOT_COLOR_FORMAT, metadata.extent.x); - let texture_data = texture_row_info.remove_padding(data); + let buffer_info = + Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, metadata.extent); + let texture_data = buffer_info.remove_padding(data); on_screenshot(&texture_data, metadata.extent, metadata.user_data); }); screenshot_was_available diff --git a/crates/re_renderer/src/importer/gltf.rs b/crates/re_renderer/src/importer/gltf.rs index fff88b006cb9..9a129640b43a 100644 --- a/crates/re_renderer/src/importer/gltf.rs +++ b/crates/re_renderer/src/importer/gltf.rs @@ -62,7 +62,7 @@ pub fn load_gltf_from_buffer( format!("gltf image used by {texture_names} in {mesh_name}") } .into(), - data: &data, + data: data.into(), format, width: image.width, height: image.height, diff --git a/crates/re_renderer/src/lib.rs b/crates/re_renderer/src/lib.rs index 7f4f23db8db0..770c0589f7fe 100644 --- a/crates/re_renderer/src/lib.rs +++ b/crates/re_renderer/src/lib.rs @@ -33,7 +33,7 @@ pub use allocator::GpuReadbackIdentifier; pub use color::Rgba32Unmul; pub use colormap::{ colormap_inferno_srgb, colormap_magma_srgb, colormap_plasma_srgb, colormap_srgb, - colormap_turbo_srgb, colormap_viridis_srgb, grayscale_srgb, ColorMap, + colormap_turbo_srgb, colormap_viridis_srgb, grayscale_srgb, Colormap, }; pub use context::RenderContext; pub use debug_label::DebugLabel; @@ -41,7 +41,7 @@ pub use depth_offset::DepthOffset; pub use line_strip_builder::{LineStripBuilder, LineStripSeriesBuilder}; pub use point_cloud_builder::{PointCloudBatchBuilder, PointCloudBuilder}; pub use size::Size; -pub use view_builder::AutoSizeConfig; +pub use view_builder::{AutoSizeConfig, ViewBuilder}; pub use wgpu_resources::WgpuResourcePoolStatistics; mod draw_phases; diff --git a/crates/re_renderer/src/line_strip_builder.rs b/crates/re_renderer/src/line_strip_builder.rs index 2c273491d563..9fee0afdb964 100644 --- a/crates/re_renderer/src/line_strip_builder.rs +++ b/crates/re_renderer/src/line_strip_builder.rs @@ -1,8 +1,12 @@ use std::ops::Range; use crate::{ - renderer::{LineBatchInfo, LineDrawData, LineStripFlags, LineStripInfo, LineVertex}, - Color32, DebugLabel, OutlineMaskPreference, RenderContext, Size, + allocator::CpuWriteGpuReadBuffer, + renderer::{ + LineBatchInfo, LineDrawData, LineDrawDataError, LineStripFlags, LineStripInfo, LineVertex, + }, + Color32, DebugLabel, OutlineMaskPreference, PickingLayerInstanceId, PickingLayerObjectId, + RenderContext, Size, }; /// Builder for a vector of line strips, making it easy to create [`crate::renderer::LineDrawData`]. @@ -11,31 +15,39 @@ use crate::{ /// of writing to a GPU readable memory location. /// This will require some ahead of time size limit, but should be feasible. /// But before that we first need to sort out cpu->gpu transfers better by providing staging buffers. -pub struct LineStripSeriesBuilder { +pub struct LineStripSeriesBuilder { pub vertices: Vec, - // Number of elements in strips and strip_user_data should be equal at all times. + pub batches: Vec, + pub strips: Vec, - pub strip_user_data: Vec, - pub batches: Vec, + /// Buffer for picking instance id - every strip gets its own instance id. + /// Therefore, there need to be always as many picking instance ids as there are strips. + pub(crate) picking_instance_ids_buffer: CpuWriteGpuReadBuffer, pub(crate) radius_boost_in_ui_points_for_outlines: f32, } -impl LineStripSeriesBuilder -where - PerStripUserData: Default + Copy, -{ - // TODO(andreas): ctx not yet needed since we don't write to GPU yet, but soon needed. - pub fn new(_ctx: &RenderContext) -> Self { +impl LineStripSeriesBuilder { + pub fn new(ctx: &RenderContext) -> Self { const RESERVE_SIZE: usize = 512; + // TODO(andreas): Be more resourceful about the size allocated here. Typically we know in advance! + let picking_instance_ids_buffer = ctx + .cpu_write_gpu_read_belt + .lock() + .allocate::( + &ctx.device, + &ctx.gpu_resources.buffers, + LineDrawData::MAX_NUM_STRIPS, + ); + Self { vertices: Vec::with_capacity(RESERVE_SIZE * 2), strips: Vec::with_capacity(RESERVE_SIZE), - strip_user_data: Vec::with_capacity(RESERVE_SIZE), batches: Vec::with_capacity(16), + picking_instance_ids_buffer, radius_boost_in_ui_points_for_outlines: 0.0, } } @@ -50,16 +62,14 @@ where } /// Start of a new batch. - pub fn batch( - &mut self, - label: impl Into, - ) -> LineBatchBuilder<'_, PerStripUserData> { + pub fn batch(&mut self, label: impl Into) -> LineBatchBuilder<'_> { self.batches.push(LineBatchInfo { label: label.into(), world_from_obj: glam::Mat4::IDENTITY, line_vertex_count: 0, overall_outline_mask_ids: OutlineMaskPreference::NONE, additional_outline_mask_ids_vertex_ranges: Vec::new(), + picking_object_id: PickingLayerObjectId::default(), }); LineBatchBuilder(self) @@ -84,44 +94,11 @@ where } /// Finalizes the builder and returns a line draw data with all the lines added so far. - pub fn to_draw_data(&self, ctx: &mut crate::context::RenderContext) -> LineDrawData { - LineDrawData::new( - ctx, - &self.vertices, - &self.strips, - &self.batches, - self.radius_boost_in_ui_points_for_outlines, - ) - .unwrap() - } - - /// Iterates over all line strips batches together with their strips and their respective vertices. - pub fn iter_strips_with_vertices( - &self, - ) -> impl Iterator< - Item = ( - (&LineStripInfo, &PerStripUserData), - impl Iterator, - ), - > { - let mut cumulative_offset = 0; - self.strips - .iter() - .zip(self.strip_user_data.iter()) - .enumerate() - .map(move |(strip_index, strip)| { - (strip, { - let offset = cumulative_offset; - let strip_index = strip_index as u32; - let vertex_iterator = self - .vertices - .iter() - .skip(offset) - .take_while(move |v| v.strip_index == strip_index); - cumulative_offset += vertex_iterator.clone().count(); - vertex_iterator - }) - }) + pub fn to_draw_data( + self, + ctx: &mut crate::context::RenderContext, + ) -> Result { + LineDrawData::new(ctx, self) } pub fn is_empty(&self) -> bool { @@ -129,9 +106,9 @@ where } } -pub struct LineBatchBuilder<'a, PerStripUserData>(&'a mut LineStripSeriesBuilder); +pub struct LineBatchBuilder<'a>(&'a mut LineStripSeriesBuilder); -impl<'a, PerStripUserData> Drop for LineBatchBuilder<'a, PerStripUserData> { +impl<'a> Drop for LineBatchBuilder<'a> { fn drop(&mut self) { // Remove batch again if it wasn't actually used. if self.0.batches.last().unwrap().line_vertex_count == 0 { @@ -140,10 +117,7 @@ impl<'a, PerStripUserData> Drop for LineBatchBuilder<'a, PerStripUserData> { } } -impl<'a, PerStripUserData> LineBatchBuilder<'a, PerStripUserData> -where - PerStripUserData: Default + Copy, -{ +impl<'a> LineBatchBuilder<'a> { #[inline] fn batch_mut(&mut self) -> &mut LineBatchInfo { self.0 @@ -176,11 +150,14 @@ where self } + /// Sets the picking object id for every element in the batch. + pub fn picking_object_id(mut self, picking_object_id: PickingLayerObjectId) -> Self { + self.batch_mut().picking_object_id = picking_object_id; + self + } + /// Adds a 3D series of line connected points. - pub fn add_strip( - &mut self, - points: impl Iterator, - ) -> LineStripBuilder<'_, PerStripUserData> { + pub fn add_strip(&mut self, points: impl Iterator) -> LineStripBuilder<'_> { let old_strip_count = self.0.strips.len(); let old_vertex_count = self.0.vertices.len(); let strip_index = old_strip_count as _; @@ -188,14 +165,13 @@ where self.add_vertices(points, strip_index); let new_vertex_count = self.0.vertices.len(); - debug_assert_eq!(self.0.strips.len(), self.0.strip_user_data.len()); self.0.strips.push(LineStripInfo::default()); - self.0.strip_user_data.push(PerStripUserData::default()); let new_strip_count = self.0.strips.len(); LineStripBuilder { builder: self.0, outline_mask_ids: OutlineMaskPreference::NONE, + picking_instance_id: PickingLayerInstanceId::default(), vertex_range: old_vertex_count..new_vertex_count, strip_range: old_strip_count..new_strip_count, } @@ -203,11 +179,7 @@ where /// Adds a single 3D line segment connecting two points. #[inline] - pub fn add_segment( - &mut self, - a: glam::Vec3, - b: glam::Vec3, - ) -> LineStripBuilder<'_, PerStripUserData> { + pub fn add_segment(&mut self, a: glam::Vec3, b: glam::Vec3) -> LineStripBuilder<'_> { self.add_strip([a, b].into_iter()) } @@ -215,7 +187,12 @@ where pub fn add_segments( &mut self, segments: impl Iterator, - ) -> LineStripBuilder<'_, PerStripUserData> { + ) -> LineStripBuilder<'_> { + debug_assert_eq!( + self.0.strips.len(), + self.0.picking_instance_ids_buffer.num_written() + ); + let old_strip_count = self.0.strips.len(); let old_vertex_count = self.0.vertices.len(); let mut strip_index = old_strip_count as u32; @@ -230,18 +207,15 @@ where let new_vertex_count = self.0.vertices.len(); let num_strips_added = strip_index as usize - old_strip_count; - debug_assert_eq!(self.0.strips.len(), self.0.strip_user_data.len()); self.0 .strips .extend(std::iter::repeat(LineStripInfo::default()).take(num_strips_added)); - self.0 - .strip_user_data - .extend(std::iter::repeat(PerStripUserData::default()).take(num_strips_added)); let new_strip_count = self.0.strips.len(); LineStripBuilder { builder: self.0, outline_mask_ids: OutlineMaskPreference::NONE, + picking_instance_id: PickingLayerInstanceId::default(), vertex_range: old_vertex_count..new_vertex_count, strip_range: old_strip_count..new_strip_count, } @@ -252,10 +226,7 @@ where /// Internally adds 12 line segments with rounded line heads. /// Disables color gradient since we don't support gradients in this setup yet (i.e. enabling them does not look good) #[inline] - pub fn add_box_outline( - &mut self, - transform: glam::Affine3A, - ) -> LineStripBuilder<'_, PerStripUserData> { + pub fn add_box_outline(&mut self, transform: glam::Affine3A) -> LineStripBuilder<'_> { let corners = [ transform.transform_point3(glam::vec3(-0.5, -0.5, -0.5)), transform.transform_point3(glam::vec3(-0.5, -0.5, 0.5)), @@ -305,7 +276,7 @@ where top_left_corner: glam::Vec3, extent_u: glam::Vec3, extent_v: glam::Vec3, - ) -> LineStripBuilder<'_, PerStripUserData> { + ) -> LineStripBuilder<'_> { self.add_segments( [ (top_left_corner, top_left_corner + extent_u), @@ -337,17 +308,13 @@ where pub fn add_strip_2d( &mut self, points: impl Iterator, - ) -> LineStripBuilder<'_, PerStripUserData> { + ) -> LineStripBuilder<'_> { self.add_strip(points.map(|p| p.extend(0.0))) } /// Adds a single 2D line segment connecting two points. Uses autogenerated depth value. #[inline] - pub fn add_segment_2d( - &mut self, - a: glam::Vec2, - b: glam::Vec2, - ) -> LineStripBuilder<'_, PerStripUserData> { + pub fn add_segment_2d(&mut self, a: glam::Vec2, b: glam::Vec2) -> LineStripBuilder<'_> { self.add_strip_2d([a, b].into_iter()) } @@ -358,7 +325,7 @@ where pub fn add_segments_2d( &mut self, segments: impl Iterator, - ) -> LineStripBuilder<'_, PerStripUserData> { + ) -> LineStripBuilder<'_> { self.add_segments(segments.map(|(a, b)| (a.extend(0.0), b.extend(0.0)))) } @@ -372,7 +339,7 @@ where top_left_corner: glam::Vec2, extent_u: glam::Vec2, extent_v: glam::Vec2, - ) -> LineStripBuilder<'_, PerStripUserData> { + ) -> LineStripBuilder<'_> { self.add_rectangle_outline( top_left_corner.extend(0.0), extent_u.extend(0.0), @@ -389,7 +356,7 @@ where &mut self, min: glam::Vec2, max: glam::Vec2, - ) -> LineStripBuilder<'_, PerStripUserData> { + ) -> LineStripBuilder<'_> { self.add_rectangle_outline( min.extend(0.0), glam::Vec3::X * (max.x - min.x), @@ -398,17 +365,15 @@ where } } -pub struct LineStripBuilder<'a, PerStripUserData> { - builder: &'a mut LineStripSeriesBuilder, +pub struct LineStripBuilder<'a> { + builder: &'a mut LineStripSeriesBuilder, outline_mask_ids: OutlineMaskPreference, + picking_instance_id: PickingLayerInstanceId, vertex_range: Range, strip_range: Range, } -impl<'a, PerStripUserData> LineStripBuilder<'a, PerStripUserData> -where - PerStripUserData: Clone, -{ +impl<'a> LineStripBuilder<'a> { #[inline] pub fn radius(self, radius: Size) -> Self { for strip in self.builder.strips[self.strip_range.clone()].iter_mut() { @@ -433,6 +398,11 @@ where self } + pub fn picking_instance_id(mut self, instance_id: PickingLayerInstanceId) -> Self { + self.picking_instance_id = instance_id; + self + } + /// Sets an individual outline mask ids. /// Note that this has a relatively high performance impact. #[inline] @@ -440,20 +410,9 @@ where self.outline_mask_ids = outline_mask_ids; self } - - /// Adds user data for every strip this builder adds. - /// - /// User data is currently not available on the GPU. - #[inline] - pub fn user_data(self, user_data: PerStripUserData) -> Self { - for d in self.builder.strip_user_data[self.strip_range.clone()].iter_mut() { - *d = user_data.clone(); - } - self - } } -impl<'a, PerStripUserData> Drop for LineStripBuilder<'a, PerStripUserData> { +impl<'a> Drop for LineStripBuilder<'a> { fn drop(&mut self) { if self.outline_mask_ids.is_some() { self.builder @@ -466,5 +425,8 @@ impl<'a, PerStripUserData> Drop for LineStripBuilder<'a, PerStripUserData> { self.outline_mask_ids, )); } + self.builder + .picking_instance_ids_buffer + .extend(std::iter::repeat(self.picking_instance_id).take(self.strip_range.len())); } } diff --git a/crates/re_renderer/src/mesh.rs b/crates/re_renderer/src/mesh.rs index ad3f52ea0fab..8f971064ca37 100644 --- a/crates/re_renderer/src/mesh.rs +++ b/crates/re_renderer/src/mesh.rs @@ -286,7 +286,7 @@ impl GpuMesh { .iter() .zip(uniform_buffer_bindings.into_iter()) { - let texture = ctx.texture_manager_2d.get(&material.albedo)?; + let texture = ctx.texture_manager_2d.get(&material.albedo); let bind_group = pools.bind_groups.alloc( device, pools, diff --git a/crates/re_renderer/src/point_cloud_builder.rs b/crates/re_renderer/src/point_cloud_builder.rs index 84ef0e0187b9..63278d5c7e69 100644 --- a/crates/re_renderer/src/point_cloud_builder.rs +++ b/crates/re_renderer/src/point_cloud_builder.rs @@ -9,23 +9,19 @@ use crate::{ }; /// Builder for point clouds, making it easy to create [`crate::renderer::PointCloudDrawData`]. -pub struct PointCloudBuilder { - // Size of `point`/color`/`per_point_user_data` must be equal. +pub struct PointCloudBuilder { + // Size of `point`/color` must be equal. pub vertices: Vec, pub(crate) color_buffer: CpuWriteGpuReadBuffer, pub(crate) picking_instance_ids_buffer: CpuWriteGpuReadBuffer, - pub user_data: Vec, pub(crate) batches: Vec, pub(crate) radius_boost_in_ui_points_for_outlines: f32, } -impl PointCloudBuilder -where - PerPointUserData: Default + Copy, -{ +impl PointCloudBuilder { pub fn new(ctx: &RenderContext) -> Self { const RESERVE_SIZE: usize = 512; @@ -48,7 +44,6 @@ where vertices: Vec::with_capacity(RESERVE_SIZE), color_buffer, picking_instance_ids_buffer, - user_data: Vec::with_capacity(RESERVE_SIZE), batches: Vec::with_capacity(16), radius_boost_in_ui_points_for_outlines: 0.0, } @@ -65,10 +60,7 @@ where /// Start of a new batch. #[inline] - pub fn batch( - &mut self, - label: impl Into, - ) -> PointCloudBatchBuilder<'_, PerPointUserData> { + pub fn batch(&mut self, label: impl Into) -> PointCloudBatchBuilder<'_> { self.batches.push(PointCloudBatchInfo { label: label.into(), world_from_obj: glam::Mat4::IDENTITY, @@ -105,30 +97,6 @@ where }) } - // Iterate over all batches, yielding the batch info and a point vertex iterator zipped with its user data. - pub fn iter_vertices_and_userdata_by_batch( - &self, - ) -> impl Iterator< - Item = ( - &PointCloudBatchInfo, - impl Iterator, - ), - > { - let mut vertex_offset = 0; - self.batches.iter().map(move |batch| { - let out = ( - batch, - self.vertices - .iter() - .zip(self.user_data.iter()) - .skip(vertex_offset) - .take(batch.point_count as usize), - ); - vertex_offset += batch.point_count as usize; - out - }) - } - /// Finalizes the builder and returns a point cloud draw data with all the points added so far. pub fn to_draw_data( self, @@ -138,29 +106,18 @@ where } } -pub struct PointCloudBatchBuilder<'a, PerPointUserData>( - &'a mut PointCloudBuilder, -) -where - PerPointUserData: Default + Copy; +pub struct PointCloudBatchBuilder<'a>(&'a mut PointCloudBuilder); -impl<'a, PerPointUserData> Drop for PointCloudBatchBuilder<'a, PerPointUserData> -where - PerPointUserData: Default + Copy, -{ +impl<'a> Drop for PointCloudBatchBuilder<'a> { fn drop(&mut self) { // Remove batch again if it wasn't actually used. if self.0.batches.last().unwrap().point_count == 0 { self.0.batches.pop(); } - self.extend_defaults(); } } -impl<'a, PerPointUserData> PointCloudBatchBuilder<'a, PerPointUserData> -where - PerPointUserData: Default + Copy, -{ +impl<'a> PointCloudBatchBuilder<'a> { #[inline] fn batch_mut(&mut self) -> &mut PointCloudBatchInfo { self.0 @@ -183,139 +140,110 @@ where self } - /// Each time we `add_points`, or upon builder drop, make sure that we - /// fill in any additional colors and user-data to have matched vectors. - fn extend_defaults(&mut self) { - if self.0.color_buffer.num_written() < self.0.vertices.len() { - self.0.color_buffer.extend( - std::iter::repeat(Color32::WHITE) - .take(self.0.vertices.len() - self.0.color_buffer.num_written()), - ); - } - - if self.0.picking_instance_ids_buffer.num_written() < self.0.vertices.len() { - self.0 - .picking_instance_ids_buffer - .extend(std::iter::repeat(Default::default()).take( - self.0.vertices.len() - self.0.picking_instance_ids_buffer.num_written(), - )); - } - - if self.0.user_data.len() < self.0.vertices.len() { - self.0.user_data.extend( - std::iter::repeat(PerPointUserData::default()) - .take(self.0.vertices.len() - self.0.user_data.len()), - ); - } - } - - #[inline] /// Add several 3D points /// /// Returns a `PointBuilder` which can be used to set the colors, radii, and user-data for the points. /// - /// Params: - /// - `size_hint`: The `PointBuilder` will pre-allocate buffers to accommodate up to this number of points. - /// The resulting point batch, will still be determined by the length of the iterator. - /// - `positions`: An iterable of the positions of the collection of points + /// Will *always* add `num_points`, no matter how many elements are in the iterators. + /// Missing elements will be filled up with defaults (in case of positions that's the origin) + /// + /// TODO(#957): Clamps number of points to the allowed per-builder maximum. + #[inline] pub fn add_points( - &mut self, - size_hint: usize, + mut self, + mut num_points: usize, positions: impl Iterator, - ) -> PointsBuilder<'_, PerPointUserData> { + radii: impl Iterator, + colors: impl Iterator, + picking_instance_ids: impl Iterator, + ) -> Self { // TODO(jleibs): Figure out if we can plumb-through proper support for `Iterator::size_hints()` // or potentially make `FixedSizedIterator` work correctly. This should be possible size the // underlying arrow structures are of known-size, but carries some complexity with the amount of // chaining, joining, filtering, etc. that happens along the way. crate::profile_function!(); - self.extend_defaults(); - debug_assert_eq!(self.0.vertices.len(), self.0.color_buffer.num_written()); - debug_assert_eq!(self.0.vertices.len(), self.0.user_data.len()); - - let old_size = self.0.vertices.len(); - - self.0.vertices.reserve(size_hint); - self.0.vertices.extend(positions.map(|p| PointCloudVertex { - position: p, - radius: Size::AUTO, - })); + debug_assert_eq!( + self.0.vertices.len(), + self.0.picking_instance_ids_buffer.num_written() + ); - let num_points = self.0.vertices.len() - old_size; + if num_points + self.0.vertices.len() > PointCloudDrawData::MAX_NUM_POINTS { + re_log::error_once!( + "Reached maximum number of supported points of {}. + See also https://github.com/rerun-io/rerun/issues/957", + PointCloudDrawData::MAX_NUM_POINTS + ); + num_points = PointCloudDrawData::MAX_NUM_POINTS - self.0.vertices.len(); + } + if num_points == 0 { + return self; + } self.batch_mut().point_count += num_points as u32; - self.0.user_data.reserve(num_points); - - let new_range = old_size..self.0.vertices.len(); - - let max_points = self.0.vertices.len(); - - PointsBuilder { - vertices: &mut self.0.vertices[new_range], - max_points, - colors: &mut self.0.color_buffer, - picking_instance_ids: &mut self.0.picking_instance_ids_buffer, - user_data: &mut self.0.user_data, - additional_outline_mask_ids: &mut self - .0 - .batches - .last_mut() - .unwrap() - .additional_outline_mask_ids_vertex_ranges, - start_vertex_index: old_size as _, + { + crate::profile_scope!("positions"); + let num_before = self.0.vertices.len(); + self.0.vertices.extend( + positions + .take(num_points) + .zip(radii.take(num_points)) + .map(|(position, radius)| PointCloudVertex { position, radius }), + ); + // Fill up with defaults. Doing this in a separate step is faster than chaining the iterator. + let num_default = num_points - (self.0.vertices.len() - num_before); + self.0.vertices.extend( + std::iter::repeat(PointCloudVertex { + position: glam::Vec3::ZERO, + radius: Size::AUTO, + }) + .take(num_default), + ); } - } - - #[inline] - pub fn add_point(&mut self, position: glam::Vec3) -> PointBuilder<'_, PerPointUserData> { - self.extend_defaults(); - - debug_assert_eq!(self.0.vertices.len(), self.0.color_buffer.num_written()); - debug_assert_eq!(self.0.vertices.len(), self.0.user_data.len()); - - let vertex_index = self.0.vertices.len() as u32; - self.0.vertices.push(PointCloudVertex { - position, - radius: Size::AUTO, - }); - self.0.user_data.push(Default::default()); - self.batch_mut().point_count += 1; - - PointBuilder { - vertex: self.0.vertices.last_mut().unwrap(), - color: &mut self.0.color_buffer, - user_data: self.0.user_data.last_mut().unwrap(), - vertex_index, - additional_outline_mask_ids: &mut self + { + crate::profile_scope!("colors"); + let num_written = self.0.color_buffer.extend(colors.take(num_points)); + // Fill up with defaults. Doing this in a separate step is faster than chaining the iterator. + self.0 + .color_buffer + .extend(std::iter::repeat(Color32::TRANSPARENT).take(num_points - num_written)); + } + { + crate::profile_scope!("picking_instance_ids"); + let num_written = self .0 - .batches - .last_mut() - .unwrap() - .additional_outline_mask_ids_vertex_ranges, - outline_mask_id: OutlineMaskPreference::NONE, + .picking_instance_ids_buffer + .extend(picking_instance_ids.take(num_points)); + // Fill up with defaults. Doing this in a separate step is faster than chaining the iterator. + self.0.picking_instance_ids_buffer.extend( + std::iter::repeat(PickingLayerInstanceId::default()).take(num_points - num_written), + ); } + + self } /// Adds several 2D points. Uses an autogenerated depth value, the same for all points passed. /// - /// Params: - /// - `size_hint`: The `PointBuilder` will pre-allocate buffers to accommodate up to this number of points. - /// The resulting point batch, will be the size of the length of the `positions` iterator. - /// - `positions`: An iterable of the positions of the collection of points + /// Will *always* add `num_points`, no matter how many elements are in the iterators. + /// Missing elements will be filled up with defaults (in case of positions that's the origin) #[inline] pub fn add_points_2d( - &mut self, - size_hint: usize, + self, + num_points: usize, positions: impl Iterator, - ) -> PointsBuilder<'_, PerPointUserData> { - self.add_points(size_hint, positions.map(|p| p.extend(0.0))) - } - - /// Adds a single 2D point. Uses an autogenerated depth value. - #[inline] - pub fn add_point_2d(&mut self, position: glam::Vec2) -> PointBuilder<'_, PerPointUserData> { - self.add_point(position.extend(0.0)) + radii: impl Iterator, + colors: impl Iterator, + picking_instance_ids: impl Iterator, + ) -> Self { + self.add_points( + num_points, + positions.map(|p| p.extend(0.0)), + radii, + colors, + picking_instance_ids, + ) } /// Set flags for this batch. @@ -324,149 +252,25 @@ where self } + /// Sets the picking object id for the current batch. pub fn picking_object_id(mut self, picking_object_id: PickingLayerObjectId) -> Self { self.batch_mut().picking_object_id = picking_object_id; self } -} - -// TODO(andreas): Should remove single-point builder, practically this never makes sense as we're almost always dealing with arrays of points. -pub struct PointBuilder<'a, PerPointUserData> { - vertex: &'a mut PointCloudVertex, - color: &'a mut CpuWriteGpuReadBuffer, - user_data: &'a mut PerPointUserData, - vertex_index: u32, - additional_outline_mask_ids: &'a mut Vec<(std::ops::Range, OutlineMaskPreference)>, - outline_mask_id: OutlineMaskPreference, -} - -impl<'a, PerPointUserData> PointBuilder<'a, PerPointUserData> -where - PerPointUserData: Clone, -{ - #[inline] - pub fn radius(self, radius: Size) -> Self { - self.vertex.radius = radius; - self - } - - /// This mustn't call this more than once. - #[inline] - pub fn color(self, color: Color32) -> Self { - self.color.push(color); - self - } - - pub fn user_data(self, data: PerPointUserData) -> Self { - *self.user_data = data; - self - } - - /// Pushes additional outline mask ids for this point - /// - /// Prefer the `overall_outline_mask_ids` setting to set the outline mask ids for the entire batch whenever possible! - pub fn outline_mask_id(mut self, outline_mask_id: OutlineMaskPreference) -> Self { - self.outline_mask_id = outline_mask_id; - self - } -} - -impl<'a, PerPointUserData> Drop for PointBuilder<'a, PerPointUserData> { - fn drop(&mut self) { - if self.outline_mask_id.is_some() { - self.additional_outline_mask_ids.push(( - self.vertex_index..self.vertex_index + 1, - self.outline_mask_id, - )); - } - } -} - -pub struct PointsBuilder<'a, PerPointUserData> { - // Vertices is a slice, which radii will update - vertices: &'a mut [PointCloudVertex], - max_points: usize, - colors: &'a mut CpuWriteGpuReadBuffer, - picking_instance_ids: &'a mut CpuWriteGpuReadBuffer, - user_data: &'a mut Vec, - additional_outline_mask_ids: &'a mut Vec<(std::ops::Range, OutlineMaskPreference)>, - start_vertex_index: u32, -} - -impl<'a, PerPointUserData> PointsBuilder<'a, PerPointUserData> -where - PerPointUserData: Clone, -{ - /// Assigns radii to all points. - /// - /// This mustn't call this more than once. - /// - /// If the iterator doesn't cover all points, some will not be assigned. - /// If the iterator provides more values than there are points, the extra values will be ignored. - #[inline] - pub fn radii(self, radii: impl Iterator) -> Self { - // TODO(andreas): This seems like an argument for moving radius - // to a separate storage - crate::profile_function!(); - for (point, radius) in self.vertices.iter_mut().zip(radii) { - point.radius = radius; - } - self - } - - /// Assigns colors to all points. - /// - /// This mustn't call this more than once. - /// - /// If the iterator doesn't cover all points, some will not be assigned. - /// If the iterator provides more values than there are points, the extra values will be ignored. - #[inline] - pub fn colors(self, colors: impl Iterator) -> Self { - crate::profile_function!(); - self.colors - .extend(colors.take(self.max_points - self.colors.num_written())); - self - } - - #[inline] - pub fn picking_instance_ids( - self, - picking_instance_ids: impl Iterator, - ) -> Self { - crate::profile_function!(); - self.picking_instance_ids.extend( - picking_instance_ids.take(self.max_points - self.picking_instance_ids.num_written()), - ); - self - } - - /// Assigns user data for all points in this builder. - /// - /// This mustn't call this more than once. - /// - /// User data is currently not available on the GPU. - #[inline] - pub fn user_data(self, data: impl Iterator) -> Self { - crate::profile_function!(); - self.user_data - .extend(data.take(self.max_points - self.user_data.len())); - self - } /// Pushes additional outline mask ids for a specific range of points. - /// The range is relative to this builder's range, not the entire batch. + /// The range is relative to this batch. /// /// Prefer the `overall_outline_mask_ids` setting to set the outline mask ids for the entire batch whenever possible! #[inline] pub fn push_additional_outline_mask_ids_for_range( - self, + mut self, range: std::ops::Range, ids: OutlineMaskPreference, ) -> Self { - self.additional_outline_mask_ids.push(( - (range.start + self.start_vertex_index)..(range.end + self.start_vertex_index), - ids, - )); + self.batch_mut() + .additional_outline_mask_ids_vertex_ranges + .push((range, ids)); self } } diff --git a/crates/re_renderer/src/rect.rs b/crates/re_renderer/src/rect.rs index 60c48ea82ae4..8b70e81ac357 100644 --- a/crates/re_renderer/src/rect.rs +++ b/crates/re_renderer/src/rect.rs @@ -4,7 +4,7 @@ #[derive(Clone, Copy, Debug)] pub struct IntRect { /// The top left corner of the rectangle. - pub top_left_corner: glam::IVec2, + pub left_top: glam::IVec2, /// The size of the rectangle. pub extent: glam::UVec2, @@ -14,23 +14,23 @@ impl IntRect { #[inline] pub fn from_middle_and_extent(middle: glam::IVec2, size: glam::UVec2) -> Self { Self { - top_left_corner: middle - size.as_ivec2() / 2, + left_top: middle - size.as_ivec2() / 2, extent: size, } } #[inline] - pub fn width(&self) -> u32 { + pub fn width(self) -> u32 { self.extent.x } #[inline] - pub fn height(&self) -> u32 { - self.extent.x + pub fn height(self) -> u32 { + self.extent.y } #[inline] - pub fn wgpu_extent(&self) -> wgpu::Extent3d { + pub fn wgpu_extent(self) -> wgpu::Extent3d { wgpu::Extent3d { width: self.extent.x, height: self.extent.y, diff --git a/crates/re_renderer/src/renderer/debug_overlay.rs b/crates/re_renderer/src/renderer/debug_overlay.rs index f7dc4a4dbcec..6e615cd4a710 100644 --- a/crates/re_renderer/src/renderer/debug_overlay.rs +++ b/crates/re_renderer/src/renderer/debug_overlay.rs @@ -93,7 +93,7 @@ impl DebugOverlayDrawData { "DebugOverlayDrawData".into(), gpu_data::DebugOverlayUniformBuffer { screen_resolution: screen_resolution.as_vec2().into(), - position_in_pixel: overlay_rect.top_left_corner.as_vec2().into(), + position_in_pixel: overlay_rect.left_top.as_vec2().into(), extent_in_pixel: overlay_rect.extent.as_vec2().into(), mode: mode as u32, _padding: 0, @@ -189,7 +189,7 @@ impl Renderer for DebugOverlayRenderer { ); let render_pipeline = pools.render_pipelines.get_or_create( device, - &(RenderPipelineDesc { + &RenderPipelineDesc { label: "DebugOverlayDrawData::render_pipeline_regular".into(), pipeline_layout: pools.pipeline_layouts.get_or_create( device, @@ -212,7 +212,7 @@ impl Renderer for DebugOverlayRenderer { }, depth_stencil: None, multisample: wgpu::MultisampleState::default(), - }), + }, &pools.pipeline_layouts, &pools.shader_modules, ); diff --git a/crates/re_renderer/src/renderer/depth_cloud.rs b/crates/re_renderer/src/renderer/depth_cloud.rs index 16e5f1fe5e70..285c0a2f9fd0 100644 --- a/crates/re_renderer/src/renderer/depth_cloud.rs +++ b/crates/re_renderer/src/renderer/depth_cloud.rs @@ -21,10 +21,10 @@ use crate::{ view_builder::ViewBuilder, wgpu_resources::{ BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuBindGroupLayoutHandle, - GpuRenderPipelineHandle, GpuTexture, PipelineLayoutDesc, RenderPipelineDesc, TextureDesc, - TextureRowDataInfo, + GpuRenderPipelineHandle, GpuTexture, PipelineLayoutDesc, RenderPipelineDesc, + Texture2DBufferInfo, TextureDesc, }, - ColorMap, OutlineMaskPreference, PickingLayerProcessor, + Colormap, OutlineMaskPreference, PickingLayerObjectId, PickingLayerProcessor, }; use super::{ @@ -35,7 +35,7 @@ use super::{ // --- mod gpu_data { - use crate::wgpu_buffer_types; + use crate::{wgpu_buffer_types, PickingLayerObjectId}; /// Keep in sync with mirror in `depth_cloud.wgsl.` #[repr(C, align(256))] @@ -47,6 +47,7 @@ mod gpu_data { pub depth_camera_intrinsics: wgpu_buffer_types::Mat3, pub outline_mask_id: wgpu_buffer_types::UVec2, + pub picking_layer_object_id: PickingLayerObjectId, /// Multiplier to get world-space depth from whatever is in the texture. pub world_depth_from_texture_value: f32, @@ -57,14 +58,13 @@ mod gpu_data { /// The maximum depth value in world-space, for use with the colormap. pub max_depth_in_world: f32, + /// Which colormap should be used. pub colormap: u32, /// Changes over different draw-phases. - pub radius_boost_in_ui_points: f32, + pub radius_boost_in_ui_points: wgpu_buffer_types::F32RowPadded, - pub row_pad: f32, - - pub end_padding: [wgpu_buffer_types::PaddingRow; 16 - 4 - 3 - 1 - 1], + pub end_padding: [wgpu_buffer_types::PaddingRow; 16 - 4 - 3 - 1 - 1 - 1], } impl DepthCloudInfoUBO { @@ -82,6 +82,7 @@ mod gpu_data { depth_data, colormap, outline_mask_id, + picking_object_id, } = depth_cloud; let user_depth_from_texture_value = match depth_data { @@ -99,8 +100,8 @@ mod gpu_data { point_radius_from_world_depth: *point_radius_from_world_depth, max_depth_in_world: *max_depth_in_world, colormap: *colormap as u32, - radius_boost_in_ui_points, - row_pad: Default::default(), + radius_boost_in_ui_points: radius_boost_in_ui_points.into(), + picking_layer_object_id: *picking_object_id, end_padding: Default::default(), } } @@ -160,10 +161,44 @@ pub struct DepthCloud { pub depth_data: DepthCloudDepthData, /// Configures color mapping mode. - pub colormap: ColorMap, + pub colormap: Colormap, /// Option outline mask id preference. pub outline_mask_id: OutlineMaskPreference, + + /// Picking object id that applies for the entire depth cloud. + pub picking_object_id: PickingLayerObjectId, +} + +impl DepthCloud { + /// World-space bounding-box. + pub fn bbox(&self) -> macaw::BoundingBox { + let max_depth = self.max_depth_in_world; + let w = self.depth_dimensions.x as f32; + let h = self.depth_dimensions.y as f32; + let corners = [ + glam::Vec3::ZERO, // camera origin + glam::Vec3::new(0.0, 0.0, max_depth), + glam::Vec3::new(0.0, h, max_depth), + glam::Vec3::new(w, 0.0, max_depth), + glam::Vec3::new(w, h, max_depth), + ]; + + let intrinsics = self.depth_camera_intrinsics; + let focal_length = glam::vec2(intrinsics.col(0).x, intrinsics.col(1).y); + let offset = intrinsics.col(2).truncate(); + + let mut bbox = macaw::BoundingBox::nothing(); + + for corner in corners { + let depth = corner.z; + let pos_in_obj = ((corner.truncate() - offset) * depth / focal_length).extend(depth); + let pos_in_world = self.world_from_obj.project_point3(pos_in_obj); + bbox.extend(pos_in_world); + } + + bbox + } } pub struct DepthClouds { @@ -336,34 +371,33 @@ fn create_and_upload_texture( .textures .alloc(&ctx.device, &depth_texture_desc); - let TextureRowDataInfo { - bytes_per_row_unpadded: bytes_per_row_unaligned, - bytes_per_row_padded, - } = TextureRowDataInfo::new(depth_texture_desc.format, depth_texture_desc.size.width); - // Not supporting compressed formats here. debug_assert!(depth_texture_desc.format.describe().block_dimensions == (1, 1)); + let buffer_info = + Texture2DBufferInfo::new(depth_texture_desc.format, depth_cloud.depth_dimensions); + // TODO(andreas): CpuGpuWriteBelt should make it easier to do this. - let bytes_padding_per_row = (bytes_per_row_padded - bytes_per_row_unaligned) as usize; + let bytes_padding_per_row = + (buffer_info.bytes_per_row_padded - buffer_info.bytes_per_row_unpadded) as usize; // Sanity check the padding size. If this happens something is seriously wrong, as it would imply // that we can't express the required alignment with the block size. debug_assert!( bytes_padding_per_row % std::mem::size_of::() == 0, "Padding is not a multiple of pixel size. Can't correctly pad the texture data" ); - let num_pixel_padding_per_row = bytes_padding_per_row / std::mem::size_of::(); let mut depth_texture_staging = ctx.cpu_write_gpu_read_belt.lock().allocate::( &ctx.device, &ctx.gpu_resources.buffers, - data.len() + num_pixel_padding_per_row * depth_texture_desc.size.height as usize, + buffer_info.buffer_size_padded as usize / std::mem::size_of::(), ); // Fill with a single copy if possible, otherwise do multiple, filling in padding. - if num_pixel_padding_per_row == 0 { + if bytes_padding_per_row == 0 { depth_texture_staging.extend_from_slice(data); } else { + let num_pixel_padding_per_row = bytes_padding_per_row / std::mem::size_of::(); for row in data.chunks(depth_texture_desc.size.width as usize) { depth_texture_staging.extend_from_slice(row); depth_texture_staging @@ -552,7 +586,7 @@ impl Renderer for DepthCloudRenderer { let bind_group = match phase { DrawPhase::OutlineMask => &instance.bind_group_outline, - DrawPhase::Opaque | DrawPhase::PickingLayer => &instance.bind_group_opaque, + DrawPhase::PickingLayer | DrawPhase::Opaque => &instance.bind_group_opaque, _ => unreachable!(), }; diff --git a/crates/re_renderer/src/renderer/lines.rs b/crates/re_renderer/src/renderer/lines.rs index 180398633b47..cf1222acfe1b 100644 --- a/crates/re_renderer/src/renderer/lines.rs +++ b/crates/re_renderer/src/renderer/lines.rs @@ -124,7 +124,8 @@ use crate::{ BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuBindGroupLayoutHandle, GpuRenderPipelineHandle, PipelineLayoutDesc, PoolError, RenderPipelineDesc, TextureDesc, }, - Color32, DebugLabel, OutlineMaskPreference, PickingLayerProcessor, + Color32, DebugLabel, LineStripSeriesBuilder, OutlineMaskPreference, PickingLayerObjectId, + PickingLayerProcessor, }; use super::{ @@ -135,7 +136,7 @@ use super::{ pub mod gpu_data { // Don't use `wgsl_buffer_types` since none of this data goes into a buffer, so its alignment rules don't apply. - use crate::{size::SizeHalf, wgpu_buffer_types, Color32}; + use crate::{size::SizeHalf, wgpu_buffer_types, Color32, PickingLayerObjectId}; use super::LineStripFlags; @@ -173,7 +174,8 @@ pub mod gpu_data { #[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)] pub struct BatchUniformBuffer { pub world_from_obj: wgpu_buffer_types::Mat4, - pub outline_mask_ids: wgpu_buffer_types::UVec2RowPadded, + pub outline_mask_ids: wgpu_buffer_types::UVec2, + pub picking_object_id: PickingLayerObjectId, pub end_padding: [wgpu_buffer_types::PaddingRow; 16 - 5], } @@ -259,6 +261,9 @@ pub struct LineBatchInfo { /// This feature is meant for a limited number of "extra selections" /// If an overall mask is defined as well, the per-vertex-range masks is overwriting the overall mask. pub additional_outline_mask_ids_vertex_ranges: Vec<(Range, OutlineMaskPreference)>, + + /// Picking object id that applies for the entire batch. + pub picking_object_id: PickingLayerObjectId, } /// Style information for a line strip. @@ -321,11 +326,7 @@ impl LineDrawData { /// If no batches are passed, all lines are assumed to be in a single batch with identity transform. pub fn new( ctx: &mut RenderContext, - // TODO(andreas): Take LineBuilder directly - vertices: &[gpu_data::LineVertex], - strips: &[LineStripInfo], - batches: &[LineBatchInfo], - radius_boost_in_ui_points_for_outlines: f32, + line_builder: LineStripSeriesBuilder, ) -> Result { let mut renderers = ctx.renderers.write(); let line_renderer = renderers.get_or_create::<_, LineRenderer>( @@ -335,7 +336,7 @@ impl LineDrawData { &mut ctx.resolver, ); - if strips.is_empty() { + if line_builder.strips.is_empty() { return Ok(LineDrawData { bind_group_all_lines: None, bind_group_all_lines_outline_mask: None, @@ -343,15 +344,23 @@ impl LineDrawData { }); } - let fallback_batches = [LineBatchInfo { - world_from_obj: glam::Mat4::IDENTITY, - label: "LineDrawData::fallback_batch".into(), - line_vertex_count: vertices.len() as _, - overall_outline_mask_ids: OutlineMaskPreference::NONE, - additional_outline_mask_ids_vertex_ranges: Vec::new(), - }]; + let LineStripSeriesBuilder { + vertices, + batches, + strips, + mut picking_instance_ids_buffer, + radius_boost_in_ui_points_for_outlines, + } = line_builder; + let batches = if batches.is_empty() { - &fallback_batches + vec![LineBatchInfo { + world_from_obj: glam::Mat4::IDENTITY, + label: "LineDrawData::fallback_batch".into(), + line_vertex_count: vertices.len() as _, + overall_outline_mask_ids: OutlineMaskPreference::NONE, + picking_object_id: PickingLayerObjectId::default(), + additional_outline_mask_ids_vertex_ranges: Vec::new(), + }] } else { batches }; @@ -373,7 +382,7 @@ impl LineDrawData { See also https://github.com/rerun-io/rerun/issues/957", Self::MAX_NUM_VERTICES, vertices.len() ); &vertices[..Self::MAX_NUM_VERTICES] } else { - vertices + &vertices[..] }; let strips = if strips.len() > Self::MAX_NUM_STRIPS { re_log::error_once!("Reached maximum number of supported line strips. Clamping down to {}, passed were {}. This may lead to rendering artifacts. @@ -387,7 +396,7 @@ impl LineDrawData { { return Err(LineDrawDataError::InvalidStripIndex); } - strips + &strips[..] }; let num_strips = strips.len() as u32; @@ -396,116 +405,152 @@ impl LineDrawData { // TODO(andreas): We want a "stack allocation" here that lives for one frame. // Note also that this doesn't protect against sharing the same texture with several LineDrawData! - let position_data_texture = ctx.gpu_resources.textures.alloc( - &ctx.device, - &TextureDesc { - label: "LineDrawData::position_data_texture".into(), - size: wgpu::Extent3d { - width: POSITION_TEXTURE_SIZE, - height: POSITION_TEXTURE_SIZE, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba32Float, - usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, + let position_data_texture_desc = TextureDesc { + label: "LineDrawData::position_data_texture".into(), + size: wgpu::Extent3d { + width: POSITION_TEXTURE_SIZE, + height: POSITION_TEXTURE_SIZE, + depth_or_array_layers: 1, }, - ); - let line_strip_texture = ctx.gpu_resources.textures.alloc( + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba32Float, + usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, + }; + let position_data_texture = ctx + .gpu_resources + .textures + .alloc(&ctx.device, &position_data_texture_desc); + + let line_strip_texture_desc = TextureDesc { + label: "LineDrawData::line_strip_texture".into(), + size: wgpu::Extent3d { + width: LINE_STRIP_TEXTURE_SIZE, + height: LINE_STRIP_TEXTURE_SIZE, + depth_or_array_layers: 1, + }, + format: wgpu::TextureFormat::Rg32Uint, + ..position_data_texture_desc + }; + let line_strip_texture = ctx + .gpu_resources + .textures + .alloc(&ctx.device, &line_strip_texture_desc); + let picking_instance_id_texture = ctx.gpu_resources.textures.alloc( &ctx.device, &TextureDesc { - label: "LineDrawData::line_strip_texture".into(), - size: wgpu::Extent3d { - width: LINE_STRIP_TEXTURE_SIZE, - height: LINE_STRIP_TEXTURE_SIZE, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: wgpu::TextureDimension::D2, + label: "LineDrawData::picking_instance_id_texture".into(), format: wgpu::TextureFormat::Rg32Uint, - usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST, + ..line_strip_texture_desc }, ); - // TODO(andreas): We want a staging-belt(-like) mechanism to upload data instead of the queue. - // These staging buffers would be provided by the belt. - // To make the data upload simpler (and have it be done in one go), we always update full rows of each of our textures - let mut position_data_staging = - Vec::with_capacity(wgpu::util::align_to(num_segments, POSITION_TEXTURE_SIZE) as usize); - // sentinel at the beginning to facilitate caps. - position_data_staging.push(LineVertex { - position: glam::vec3(f32::MAX, f32::MAX, f32::MAX), - strip_index: u32::MAX, - }); - position_data_staging.extend(vertices.iter()); - // placeholder at the end to facilitate caps. - position_data_staging.push(LineVertex { - position: glam::vec3(f32::MAX, f32::MAX, f32::MAX), - strip_index: u32::MAX, - }); - position_data_staging.extend(std::iter::repeat(gpu_data::LineVertex::zeroed()).take( - (wgpu::util::align_to(num_segments, POSITION_TEXTURE_SIZE) - num_segments) as usize, - )); - - let mut line_strip_info_staging = - Vec::with_capacity(wgpu::util::align_to(num_strips, LINE_STRIP_TEXTURE_SIZE) as usize); - line_strip_info_staging.extend(strips.iter().map(|line_strip| { - gpu_data::LineStripInfo { - color: line_strip.color, - radius: line_strip.radius.into(), - stippling: 0, //(line_strip.stippling.clamp(0.0, 1.0) * 255.0) as u8, - flags: line_strip.flags, - } - })); - line_strip_info_staging.extend(std::iter::repeat(gpu_data::LineStripInfo::zeroed()).take( - (wgpu::util::align_to(num_strips, LINE_STRIP_TEXTURE_SIZE) - num_strips) as usize, - )); - - // Upload data from staging buffers to gpu. - ctx.queue.write_texture( - wgpu::ImageCopyTexture { - texture: &position_data_texture.texture, - mip_level: 0, - origin: wgpu::Origin3d::ZERO, - aspect: wgpu::TextureAspect::All, - }, - bytemuck::cast_slice(&position_data_staging), - wgpu::ImageDataLayout { - offset: 0, - bytes_per_row: NonZeroU32::new( - POSITION_TEXTURE_SIZE * std::mem::size_of::() as u32, - ), - rows_per_image: None, - }, - wgpu::Extent3d { - width: POSITION_TEXTURE_SIZE, - height: (num_segments + POSITION_TEXTURE_SIZE - 1) / POSITION_TEXTURE_SIZE, - depth_or_array_layers: 1, - }, - ); - ctx.queue.write_texture( - wgpu::ImageCopyTexture { - texture: &line_strip_texture.texture, - mip_level: 0, - origin: wgpu::Origin3d::ZERO, - aspect: wgpu::TextureAspect::All, - }, - bytemuck::cast_slice(&line_strip_info_staging), - wgpu::ImageDataLayout { - offset: 0, - bytes_per_row: NonZeroU32::new( - LINE_STRIP_TEXTURE_SIZE * std::mem::size_of::() as u32, - ), - rows_per_image: None, - }, - wgpu::Extent3d { + // Upload position data. + { + // To make the data upload simpler (and have it be done in one go), we always update full rows of each of our textures + let mut position_data_staging = Vec::with_capacity(wgpu::util::align_to( + num_segments, + POSITION_TEXTURE_SIZE, + ) as usize); + // sentinel at the beginning to facilitate caps. + position_data_staging.push(LineVertex { + position: glam::vec3(f32::MAX, f32::MAX, f32::MAX), + strip_index: u32::MAX, + }); + position_data_staging.extend(vertices.iter()); + // placeholder at the end to facilitate caps. + position_data_staging.push(LineVertex { + position: glam::vec3(f32::MAX, f32::MAX, f32::MAX), + strip_index: u32::MAX, + }); + position_data_staging.extend(std::iter::repeat(gpu_data::LineVertex::zeroed()).take( + (wgpu::util::align_to(num_segments, POSITION_TEXTURE_SIZE) - num_segments) as usize, + )); + + // TODO(andreas): Use staging belt here. + ctx.queue.write_texture( + wgpu::ImageCopyTexture { + texture: &position_data_texture.texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + bytemuck::cast_slice(&position_data_staging), + wgpu::ImageDataLayout { + offset: 0, + bytes_per_row: NonZeroU32::new( + POSITION_TEXTURE_SIZE * std::mem::size_of::() as u32, + ), + rows_per_image: None, + }, + wgpu::Extent3d { + width: POSITION_TEXTURE_SIZE, + height: (num_segments + POSITION_TEXTURE_SIZE - 1) / POSITION_TEXTURE_SIZE, + depth_or_array_layers: 1, + }, + ); + } + + // Upload strip data. + { + let mut line_strip_info_staging = Vec::with_capacity(wgpu::util::align_to( + num_strips, + LINE_STRIP_TEXTURE_SIZE, + ) as usize); + line_strip_info_staging.extend(strips.iter().map(|line_strip| { + gpu_data::LineStripInfo { + color: line_strip.color, + radius: line_strip.radius.into(), + stippling: 0, //(line_strip.stippling.clamp(0.0, 1.0) * 255.0) as u8, + flags: line_strip.flags, + } + })); + let num_strips_padding = + (wgpu::util::align_to(num_strips, LINE_STRIP_TEXTURE_SIZE) - num_strips) as usize; + line_strip_info_staging.extend( + std::iter::repeat(gpu_data::LineStripInfo::zeroed()).take(num_strips_padding), + ); + + let strip_texture_extent = wgpu::Extent3d { width: LINE_STRIP_TEXTURE_SIZE, height: (num_strips + LINE_STRIP_TEXTURE_SIZE - 1) / LINE_STRIP_TEXTURE_SIZE, depth_or_array_layers: 1, - }, - ); + }; + + // TODO(andreas): Use staging belt here. + ctx.queue.write_texture( + wgpu::ImageCopyTexture { + texture: &line_strip_texture.texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + bytemuck::cast_slice(&line_strip_info_staging), + wgpu::ImageDataLayout { + offset: 0, + bytes_per_row: NonZeroU32::new( + LINE_STRIP_TEXTURE_SIZE + * std::mem::size_of::() as u32, + ), + rows_per_image: None, + }, + strip_texture_extent, + ); + + picking_instance_ids_buffer + .extend(std::iter::repeat(Default::default()).take(num_strips_padding)); + picking_instance_ids_buffer.copy_to_texture2d( + ctx.active_frame.before_view_builder_encoder.lock().get(), + wgpu::ImageCopyTexture { + texture: &picking_instance_id_texture.texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + glam::uvec2(strip_texture_extent.width, strip_texture_extent.height), + ); + } let draw_data_uniform_buffer_bindings = create_and_fill_uniform_buffer_batch( ctx, @@ -530,6 +575,7 @@ impl LineDrawData { entries: smallvec![ BindGroupEntry::DefaultTextureView(position_data_texture.handle), BindGroupEntry::DefaultTextureView(line_strip_texture.handle), + BindGroupEntry::DefaultTextureView(picking_instance_id_texture.handle), draw_data_uniform_buffer_bindings[0].clone(), ], layout: line_renderer.bind_group_layout_all_lines, @@ -543,6 +589,7 @@ impl LineDrawData { entries: smallvec![ BindGroupEntry::DefaultTextureView(position_data_texture.handle), BindGroupEntry::DefaultTextureView(line_strip_texture.handle), + BindGroupEntry::DefaultTextureView(picking_instance_id_texture.handle), draw_data_uniform_buffer_bindings[1].clone(), ], layout: line_renderer.bind_group_layout_all_lines, @@ -564,6 +611,7 @@ impl LineDrawData { .0 .unwrap_or_default() .into(), + picking_object_id: batch_info.picking_object_id, end_padding: Default::default(), }), ); @@ -583,6 +631,7 @@ impl LineDrawData { .map(|(_, mask)| gpu_data::BatchUniformBuffer { world_from_obj: batch_info.world_from_obj.into(), outline_mask_ids: mask.0.unwrap_or_default().into(), + picking_object_id: batch_info.picking_object_id, end_padding: Default::default(), }) }) @@ -598,7 +647,7 @@ impl LineDrawData { let line_vertex_range_end = (start_vertex_for_next_batch + batch_info.line_vertex_count) .min(Self::MAX_NUM_VERTICES as u32); - let mut active_phases = enum_set![DrawPhase::Opaque]; + let mut active_phases = enum_set![DrawPhase::Opaque | DrawPhase::PickingLayer]; // Does the entire batch participate in the outline mask phase? if batch_info.overall_outline_mask_ids.is_some() { active_phases.insert(DrawPhase::OutlineMask); @@ -724,6 +773,16 @@ impl Renderer for LineRenderer { wgpu::BindGroupLayoutEntry { binding: 2, visibility: wgpu::ShaderStages::VERTEX, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Uint, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 3, + visibility: wgpu::ShaderStages::VERTEX, ty: wgpu::BindingType::Buffer { ty: wgpu::BufferBindingType::Uniform, has_dynamic_offset: false, diff --git a/crates/re_renderer/src/renderer/mod.rs b/crates/re_renderer/src/renderer/mod.rs index 7eb9714b7a2e..3b4284bc2a6d 100644 --- a/crates/re_renderer/src/renderer/mod.rs +++ b/crates/re_renderer/src/renderer/mod.rs @@ -2,7 +2,10 @@ mod generic_skybox; pub use generic_skybox::GenericSkyboxDrawData; mod lines; -pub use lines::{gpu_data::LineVertex, LineBatchInfo, LineDrawData, LineStripFlags, LineStripInfo}; +pub use lines::{ + gpu_data::LineVertex, LineBatchInfo, LineDrawData, LineDrawDataError, LineStripFlags, + LineStripInfo, +}; mod point_cloud; pub use point_cloud::{ @@ -19,7 +22,10 @@ mod test_triangle; pub use test_triangle::TestTriangleDrawData; mod rectangles; -pub use rectangles::{RectangleDrawData, TextureFilterMag, TextureFilterMin, TexturedRect}; +pub use rectangles::{ + ColorMapper, ColormappedTexture, RectangleDrawData, RectangleOptions, TextureFilterMag, + TextureFilterMin, TexturedRect, +}; mod mesh_renderer; pub(crate) use mesh_renderer::MeshRenderer; diff --git a/crates/re_renderer/src/renderer/point_cloud.rs b/crates/re_renderer/src/renderer/point_cloud.rs index 1e9bfb77a578..639db5a17ceb 100644 --- a/crates/re_renderer/src/renderer/point_cloud.rs +++ b/crates/re_renderer/src/renderer/point_cloud.rs @@ -142,6 +142,7 @@ pub struct PointCloudBatchInfo { } /// Description of a point cloud. +#[derive(Clone)] pub struct PointCloudVertex { /// Connected points. Must be at least 2. pub position: glam::Vec3, @@ -173,9 +174,9 @@ impl PointCloudDrawData { /// Number of vertices and colors has to be equal. /// /// If no batches are passed, all points are assumed to be in a single batch with identity transform. - pub fn new( + pub fn new( ctx: &mut RenderContext, - mut builder: PointCloudBuilder, + mut builder: PointCloudBuilder, ) -> Result { crate::profile_function!(); @@ -225,7 +226,7 @@ impl PointCloudDrawData { 0 ); - let vertices = if vertices.len() >= Self::MAX_NUM_POINTS { + let vertices = if vertices.len() > Self::MAX_NUM_POINTS { re_log::error_once!( "Reached maximum number of supported points. Clamping down to {}, passed were {}. See also https://github.com/rerun-io/rerun/issues/957", @@ -268,7 +269,7 @@ impl PointCloudDrawData { let picking_instance_id_texture = ctx.gpu_resources.textures.alloc( &ctx.device, &TextureDesc { - label: "PointCloudDrawData::picking_layer_instance_id_texture".into(), + label: "PointCloudDrawData::picking_instance_id_texture".into(), format: wgpu::TextureFormat::Rg32Uint, ..position_data_texture_desc }, diff --git a/crates/re_renderer/src/renderer/rectangles.rs b/crates/re_renderer/src/renderer/rectangles.rs index 4cbebb883412..5692c247cc51 100644 --- a/crates/re_renderer/src/renderer/rectangles.rs +++ b/crates/re_renderer/src/renderer/rectangles.rs @@ -10,6 +10,7 @@ //! Since we're not allowed to bind many textures at once (no widespread bindless support!), //! we are forced to have individual bind groups per rectangle and thus a draw call per rectangle. +use itertools::{izip, Itertools as _}; use smallvec::smallvec; use crate::{ @@ -23,7 +24,7 @@ use crate::{ BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuBindGroupLayoutHandle, GpuRenderPipelineHandle, PipelineLayoutDesc, RenderPipelineDesc, SamplerDesc, }, - OutlineMaskPreference, PickingLayerProcessor, Rgba, + Colormap, OutlineMaskPreference, PickingLayerProcessor, Rgba, }; use super::{ @@ -31,24 +32,6 @@ use super::{ WgpuResourcePools, }; -mod gpu_data { - use crate::wgpu_buffer_types; - - // Keep in sync with mirror in rectangle.wgsl - #[repr(C, align(256))] - #[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)] - pub struct UniformBuffer { - pub top_left_corner_position: wgpu_buffer_types::Vec3RowPadded, - pub extent_u: wgpu_buffer_types::Vec3RowPadded, - pub extent_v: wgpu_buffer_types::Vec3Unpadded, - pub depth_offset: f32, - pub multiplicative_tint: crate::Rgba, - pub outline_mask: wgpu_buffer_types::UVec2RowPadded, - - pub end_padding: [wgpu_buffer_types::PaddingRow; 16 - 5], - } -} - /// Texture filter setting for magnification (a texel covers several pixels). #[derive(Debug)] pub enum TextureFilterMag { @@ -65,6 +48,56 @@ pub enum TextureFilterMin { // TODO(andreas): Offer mipmapping here? } +/// Describes a texture and how to map it to a color. +#[derive(Clone)] +pub struct ColormappedTexture { + pub texture: GpuTexture2DHandle, + + /// Min/max range of the values in the texture. + /// Used to normalize the input values (squash them to the 0-1 range). + pub range: [f32; 2], + + /// Raise the normalized values to this power (before any color mapping). + /// Acts like an inverse brightness. + /// + /// Default: 1.0 + pub gamma: f32, + + /// For any one-component texture, you need to supply a color mapper, + /// which maps the normalized `.r` component to a color. + /// + /// Setting a color mapper for a four-component texture is an error. + /// Failure to set a color mapper for a one-component texture is an error. + pub color_mapper: Option, +} + +/// How to map the normalized `.r` component to a color. +#[derive(Clone)] +pub enum ColorMapper { + /// Apply the given function. + Function(Colormap), + + /// Look up the color in this texture. + /// + /// The texture is indexed in a row-major fashion, so that the top left pixel + /// corresponds to the the normalized value of 0.0, and the + /// bottom right pixel is 1.0. + /// + /// The texture must have the format [`wgpu::TextureFormat::Rgba8UnormSrgb`]. + Texture(GpuTexture2DHandle), +} + +impl ColormappedTexture { + pub fn from_unorm_srgba(texture: GpuTexture2DHandle) -> Self { + Self { + texture, + range: [0.0, 1.0], + gamma: 1.0, + color_mapper: None, + } + } +} + pub struct TexturedRect { /// Top left corner position in world space. pub top_left_corner_position: glam::Vec3, @@ -76,8 +109,12 @@ pub struct TexturedRect { pub extent_v: glam::Vec3, /// Texture that fills the rectangle - pub texture: GpuTexture2DHandle, + pub colormapped_texture: ColormappedTexture, + pub options: RectangleOptions, +} + +pub struct RectangleOptions { pub texture_filter_magnification: TextureFilterMag, pub texture_filter_minification: TextureFilterMin, @@ -90,13 +127,9 @@ pub struct TexturedRect { pub outline_mask: OutlineMaskPreference, } -impl Default for TexturedRect { +impl Default for RectangleOptions { fn default() -> Self { Self { - top_left_corner_position: glam::Vec3::ZERO, - extent_u: glam::Vec3::ZERO, - extent_v: glam::Vec3::ZERO, - texture: GpuTexture2DHandle::invalid(), texture_filter_magnification: TextureFilterMag::Nearest, texture_filter_minification: TextureFilterMin::Linear, multiplicative_tint: Rgba::WHITE, @@ -106,6 +139,182 @@ impl Default for TexturedRect { } } +#[derive(thiserror::Error, Debug)] +pub enum RectangleError { + #[error(transparent)] + ResourceManagerError(#[from] ResourceManagerError), + + #[error("Texture required special features: {0:?}")] + SpecialFeatures(wgpu::Features), + + // There's really no need for users to be able to sample depth textures. + // We don't get filtering of depth textures any way. + #[error("Depth textures not supported - use float or integer textures instead.")] + DepthTexturesNotSupported, + + #[error("Color mapping is being applied to a four-component RGBA texture")] + ColormappingRgbaTexture, + + #[error("Only 1 and 4 component textures are supported, got {0} components")] + UnsupportedComponentCount(u8), + + #[error("No color mapper was supplied for this 1-component texture")] + MissingColorMapper, + + #[error("Invalid color map texture format: {0:?}")] + UnsupportedColormapTextureFormat(wgpu::TextureFormat), +} + +mod gpu_data { + use crate::wgpu_buffer_types; + + use super::{ColorMapper, RectangleError, TexturedRect}; + + // Keep in sync with mirror in rectangle.wgsl + + // Which texture to read from? + const SAMPLE_TYPE_FLOAT_FILTER: u32 = 1; + const SAMPLE_TYPE_FLOAT_NOFILTER: u32 = 2; + const SAMPLE_TYPE_SINT_NOFILTER: u32 = 3; + const SAMPLE_TYPE_UINT_NOFILTER: u32 = 4; + + // How do we do colormapping? + const COLOR_MAPPER_OFF: u32 = 1; + const COLOR_MAPPER_FUNCTION: u32 = 2; + const COLOR_MAPPER_TEXTURE: u32 = 3; + + const FILTER_NEAREST: u32 = 1; + const FILTER_BILINEAR: u32 = 2; + + #[repr(C, align(256))] + #[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)] + pub struct UniformBuffer { + top_left_corner_position: wgpu_buffer_types::Vec3Unpadded, + colormap_function: u32, + + extent_u: wgpu_buffer_types::Vec3Unpadded, + sample_type: u32, + + extent_v: wgpu_buffer_types::Vec3Unpadded, + depth_offset: f32, + + multiplicative_tint: crate::Rgba, + outline_mask: wgpu_buffer_types::UVec2, + + /// Range of the texture values. + /// Will be mapped to the [0, 1] range before we colormap. + range_min_max: wgpu_buffer_types::Vec2, + + color_mapper: u32, + gamma: f32, + minification_filter: u32, + magnification_filter: u32, + + _end_padding: [wgpu_buffer_types::PaddingRow; 16 - 6], + } + + impl UniformBuffer { + pub fn from_textured_rect( + rectangle: &super::TexturedRect, + texture_format: &wgpu::TextureFormat, + ) -> Result { + let texture_info = texture_format.describe(); + + let TexturedRect { + top_left_corner_position, + extent_u, + extent_v, + colormapped_texture, + options, + } = rectangle; + + let super::ColormappedTexture { + texture: _, + range, + gamma, + color_mapper, + } = colormapped_texture; + + let super::RectangleOptions { + texture_filter_magnification: _, + texture_filter_minification: _, + multiplicative_tint, + depth_offset, + outline_mask, + } = options; + + let sample_type = match texture_info.sample_type { + wgpu::TextureSampleType::Float { .. } => { + if super::is_float_filterable(texture_format) { + SAMPLE_TYPE_FLOAT_FILTER + } else { + SAMPLE_TYPE_FLOAT_NOFILTER + } + } + wgpu::TextureSampleType::Depth => { + return Err(RectangleError::DepthTexturesNotSupported); + } + wgpu::TextureSampleType::Sint => SAMPLE_TYPE_SINT_NOFILTER, + wgpu::TextureSampleType::Uint => SAMPLE_TYPE_UINT_NOFILTER, + }; + + let mut colormap_function = 0; + let color_mapper_int; + + match texture_info.components { + 1 => match color_mapper { + Some(ColorMapper::Function(colormap)) => { + color_mapper_int = COLOR_MAPPER_FUNCTION; + colormap_function = *colormap as u32; + } + Some(ColorMapper::Texture(_)) => { + color_mapper_int = COLOR_MAPPER_TEXTURE; + } + None => { + return Err(RectangleError::MissingColorMapper); + } + }, + 4 => { + if color_mapper.is_some() { + return Err(RectangleError::ColormappingRgbaTexture); + } else { + color_mapper_int = COLOR_MAPPER_OFF; + } + } + num_components => { + return Err(RectangleError::UnsupportedComponentCount(num_components)) + } + } + + let minification_filter = match rectangle.options.texture_filter_minification { + super::TextureFilterMin::Linear => FILTER_BILINEAR, + super::TextureFilterMin::Nearest => FILTER_NEAREST, + }; + let magnification_filter = match rectangle.options.texture_filter_magnification { + super::TextureFilterMag::Linear => FILTER_BILINEAR, + super::TextureFilterMag::Nearest => FILTER_NEAREST, + }; + + Ok(Self { + top_left_corner_position: (*top_left_corner_position).into(), + colormap_function, + extent_u: (*extent_u).into(), + sample_type, + extent_v: (*extent_v).into(), + depth_offset: *depth_offset as f32, + multiplicative_tint: *multiplicative_tint, + outline_mask: outline_mask.0.unwrap_or_default().into(), + range_min_max: (*range).into(), + color_mapper: color_mapper_int, + gamma: *gamma, + minification_filter, + magnification_filter, + _end_padding: Default::default(), + }) + } + } +} + #[derive(Clone)] struct RectangleInstance { bind_group: GpuBindGroup, @@ -125,7 +334,7 @@ impl RectangleDrawData { pub fn new( ctx: &mut RenderContext, rectangles: &[TexturedRect], - ) -> Result { + ) -> Result { crate::profile_function!(); let mut renderers = ctx.renderers.write(); @@ -142,40 +351,45 @@ impl RectangleDrawData { }); } + let textures: Vec<_> = rectangles + .iter() + .map(|rectangle| { + ctx.texture_manager_2d + .get(&rectangle.colormapped_texture.texture) + }) + .collect(); + + // TODO(emilk): continue on error (skipping just that rectangle)? + let uniform_buffers: Vec<_> = izip!(rectangles, &textures) + .map(|(rect, texture)| { + gpu_data::UniformBuffer::from_textured_rect(rect, &texture.creation_desc.format) + }) + .try_collect()?; + let uniform_buffer_bindings = create_and_fill_uniform_buffer_batch( ctx, "rectangle uniform buffers".into(), - rectangles.iter().map(|rectangle| gpu_data::UniformBuffer { - top_left_corner_position: rectangle.top_left_corner_position.into(), - extent_u: rectangle.extent_u.into(), - extent_v: rectangle.extent_v.into(), - depth_offset: rectangle.depth_offset as f32, - multiplicative_tint: rectangle.multiplicative_tint, - outline_mask: rectangle.outline_mask.0.unwrap_or_default().into(), - end_padding: Default::default(), - }), + uniform_buffers.into_iter(), ); let mut instances = Vec::with_capacity(rectangles.len()); - for (rectangle, uniform_buffer) in - rectangles.iter().zip(uniform_buffer_bindings.into_iter()) + for (rectangle, uniform_buffer, texture) in + izip!(rectangles, uniform_buffer_bindings, textures) { - let texture = ctx.texture_manager_2d.get(&rectangle.texture)?; - + let options = &rectangle.options; let sampler = ctx.gpu_resources.samplers.get_or_create( &ctx.device, &SamplerDesc { label: format!( "rectangle sampler mag {:?} min {:?}", - rectangle.texture_filter_magnification, - rectangle.texture_filter_minification + options.texture_filter_magnification, options.texture_filter_minification ) .into(), - mag_filter: match rectangle.texture_filter_magnification { + mag_filter: match options.texture_filter_magnification { TextureFilterMag::Linear => wgpu::FilterMode::Linear, TextureFilterMag::Nearest => wgpu::FilterMode::Nearest, }, - min_filter: match rectangle.texture_filter_minification { + min_filter: match options.texture_filter_minification { TextureFilterMin::Linear => wgpu::FilterMode::Linear, TextureFilterMin::Nearest => wgpu::FilterMode::Nearest, }, @@ -184,6 +398,54 @@ impl RectangleDrawData { }, ); + let texture_format = texture.creation_desc.format; + let texture_description = texture_format.describe(); + if texture_description.required_features != Default::default() { + return Err(RectangleError::SpecialFeatures( + texture_description.required_features, + )); + } + + // We set up several texture sources, then instruct the shader to read from at most one of them. + let mut texture_float_filterable = ctx.texture_manager_2d.zeroed_texture_float().handle; + let mut texture_float_nofilter = ctx.texture_manager_2d.zeroed_texture_float().handle; + let mut texture_sint = ctx.texture_manager_2d.zeroed_texture_sint().handle; + let mut texture_uint = ctx.texture_manager_2d.zeroed_texture_uint().handle; + + match texture_description.sample_type { + wgpu::TextureSampleType::Float { .. } => { + if is_float_filterable(&texture_format) { + texture_float_filterable = texture.handle; + } else { + texture_float_nofilter = texture.handle; + } + } + wgpu::TextureSampleType::Depth => { + return Err(RectangleError::DepthTexturesNotSupported); + } + wgpu::TextureSampleType::Sint => { + texture_sint = texture.handle; + } + wgpu::TextureSampleType::Uint => { + texture_uint = texture.handle; + } + } + + // We also set up an optional colormap texture. + let colormap_texture = if let Some(ColorMapper::Texture(handle)) = + &rectangle.colormapped_texture.color_mapper + { + let colormap_texture = ctx.texture_manager_2d.get(handle); + if colormap_texture.creation_desc.format != wgpu::TextureFormat::Rgba8UnormSrgb { + return Err(RectangleError::UnsupportedColormapTextureFormat( + colormap_texture.creation_desc.format, + )); + } + colormap_texture.handle + } else { + ctx.texture_manager_2d.zeroed_texture_float().handle + }; + instances.push(RectangleInstance { bind_group: ctx.gpu_resources.bind_groups.alloc( &ctx.device, @@ -192,13 +454,17 @@ impl RectangleDrawData { label: "RectangleInstance::bind_group".into(), entries: smallvec![ uniform_buffer, - BindGroupEntry::DefaultTextureView(texture.handle), - BindGroupEntry::Sampler(sampler) + BindGroupEntry::Sampler(sampler), + BindGroupEntry::DefaultTextureView(texture_float_nofilter), + BindGroupEntry::DefaultTextureView(texture_sint), + BindGroupEntry::DefaultTextureView(texture_uint), + BindGroupEntry::DefaultTextureView(colormap_texture), + BindGroupEntry::DefaultTextureView(texture_float_filterable), ], layout: rectangle_renderer.bind_group_layout, }, ), - draw_outline_mask: rectangle.outline_mask.is_some(), + draw_outline_mask: rectangle.options.outline_mask.is_some(), }); } @@ -244,9 +510,50 @@ impl Renderer for RectangleRenderer { }, count: None, }, + // float sampler: wgpu::BindGroupLayoutEntry { binding: 1, visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }, + // float textures without filtering (e.g. R32Float): + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // sint texture: + wgpu::BindGroupLayoutEntry { + binding: 3, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Sint, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // uint texture: + wgpu::BindGroupLayoutEntry { + binding: 4, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Uint, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // colormap texture: + wgpu::BindGroupLayoutEntry { + binding: 5, + visibility: wgpu::ShaderStages::FRAGMENT, ty: wgpu::BindingType::Texture { sample_type: wgpu::TextureSampleType::Float { filterable: true }, view_dimension: wgpu::TextureViewDimension::D2, @@ -254,10 +561,15 @@ impl Renderer for RectangleRenderer { }, count: None, }, + // float textures with filtering (e.g. Rgba8UnormSrgb): wgpu::BindGroupLayoutEntry { - binding: 2, + binding: 6, visibility: wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, count: None, }, ], @@ -273,19 +585,24 @@ impl Renderer for RectangleRenderer { &pools.bind_group_layouts, ); - let shader_module = pools.shader_modules.get_or_create( + let shader_module_vs = pools.shader_modules.get_or_create( + device, + resolver, + &include_shader_module!("../../shader/rectangle_vs.wgsl"), + ); + let shader_module_fs = pools.shader_modules.get_or_create( device, resolver, - &include_shader_module!("../../shader/rectangle.wgsl"), + &include_shader_module!("../../shader/rectangle_fs.wgsl"), ); let render_pipeline_desc_color = RenderPipelineDesc { label: "RectangleRenderer::render_pipeline_color".into(), pipeline_layout, vertex_entrypoint: "vs_main".into(), - vertex_handle: shader_module, + vertex_handle: shader_module_vs, fragment_entrypoint: "fs_main".into(), - fragment_handle: shader_module, + fragment_handle: shader_module_fs, vertex_buffers: smallvec![], render_targets: smallvec![Some(wgpu::ColorTargetState { format: ViewBuilder::MAIN_TARGET_COLOR_FORMAT, @@ -386,3 +703,11 @@ impl Renderer for RectangleRenderer { ] } } + +fn is_float_filterable(format: &wgpu::TextureFormat) -> bool { + format + .describe() + .guaranteed_format_features + .flags + .contains(wgpu::TextureFormatFeatureFlags::FILTERABLE) +} diff --git a/crates/re_renderer/src/resource_managers/texture_manager.rs b/crates/re_renderer/src/resource_managers/texture_manager.rs index bdfb82755b4f..daed0fbce80f 100644 --- a/crates/re_renderer/src/resource_managers/texture_manager.rs +++ b/crates/re_renderer/src/resource_managers/texture_manager.rs @@ -7,19 +7,28 @@ use crate::{ DebugLabel, }; -use super::ResourceManagerError; - /// Handle to a 2D resource. /// /// Currently, this is solely a more strongly typed regular gpu texture handle. /// Since all textures have "long lived" behavior (no temp allocation, alive until unused), /// there is no difference as with buffer reliant data like meshes or most contents of draw-data. #[derive(Clone)] -pub struct GpuTexture2DHandle(Option); +pub struct GpuTexture2DHandle(GpuTexture); impl GpuTexture2DHandle { - pub fn invalid() -> Self { - Self(None) + /// Width of the texture. + pub fn width(&self) -> u32 { + self.0.texture.width() + } + + /// Height of the texture. + pub fn height(&self) -> u32 { + self.0.texture.height() + } + + /// Width and height of the texture. + pub fn width_height(&self) -> [u32; 2] { + [self.width(), self.height()] } } @@ -32,7 +41,7 @@ pub struct Texture2DCreationDesc<'a> { /// Data for the highest mipmap level. /// Must be padded according to wgpu rules and ready for upload. /// TODO(andreas): This should be a kind of factory function/builder instead which gets target memory passed in. - pub data: &'a [u8], + pub data: std::borrow::Cow<'a, [u8]>, pub format: wgpu::TextureFormat, pub width: u32, pub height: u32, @@ -60,6 +69,9 @@ pub struct TextureManager2D { // optimize for short lived textures as we do with buffer data. //manager: ResourceManager, white_texture_unorm: GpuTexture2DHandle, + zeroed_texture_float: GpuTexture2DHandle, + zeroed_texture_depth: GpuTexture2DHandle, + zeroed_texture_sint: GpuTexture2DHandle, zeroed_texture_uint: GpuTexture2DHandle, // For convenience to reduce amount of times we need to pass them around @@ -91,33 +103,27 @@ impl TextureManager2D { texture_pool, &Texture2DCreationDesc { label: "white pixel - unorm".into(), - data: &[255, 255, 255, 255], + data: vec![255, 255, 255, 255].into(), format: wgpu::TextureFormat::Rgba8Unorm, width: 1, height: 1, }, ); - // Wgpu zeros out new textures automatically - let zeroed_texture_uint = GpuTexture2DHandle(Some(texture_pool.alloc( - &device, - &TextureDesc { - label: "zeroed pixel - uint".into(), - format: wgpu::TextureFormat::Rgba8Uint, - size: wgpu::Extent3d { - width: 1, - height: 1, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - usage: wgpu::TextureUsages::TEXTURE_BINDING, - }, - ))); + let zeroed_texture_float = + create_zero_texture(texture_pool, &device, wgpu::TextureFormat::Rgba8Unorm); + let zeroed_texture_depth = + create_zero_texture(texture_pool, &device, wgpu::TextureFormat::Depth16Unorm); + let zeroed_texture_sint = + create_zero_texture(texture_pool, &device, wgpu::TextureFormat::Rgba8Sint); + let zeroed_texture_uint = + create_zero_texture(texture_pool, &device, wgpu::TextureFormat::Rgba8Uint); Self { white_texture_unorm, + zeroed_texture_float, + zeroed_texture_depth, + zeroed_texture_sint, zeroed_texture_uint, device, queue, @@ -158,13 +164,45 @@ impl TextureManager2D { &mut self, key: u64, texture_pool: &mut GpuTexturePool, - creation_desc: &Texture2DCreationDesc<'_>, + texture_desc: Texture2DCreationDesc<'_>, ) -> GpuTexture2DHandle { - let texture_handle = self.texture_cache.entry(key).or_insert_with(|| { - Self::create_and_upload_texture(&self.device, &self.queue, texture_pool, creation_desc) - }); + enum Never {} + match self.get_or_create_with(key, texture_pool, || -> Result<_, Never> { + Ok(texture_desc) + }) { + Ok(tex_handle) => tex_handle, + Err(never) => match never {}, + } + } + + /// Creates a new 2D texture resource and schedules data upload to the GPU if a texture + /// wasn't already created using the same key. + pub fn get_or_create_with<'a, Err>( + &mut self, + key: u64, + texture_pool: &mut GpuTexturePool, + try_create_texture_desc: impl FnOnce() -> Result, Err>, + ) -> Result { + let texture_handle = match self.texture_cache.entry(key) { + std::collections::hash_map::Entry::Occupied(texture_handle) => { + texture_handle.get().clone() // already inserted + } + std::collections::hash_map::Entry::Vacant(entry) => { + // Run potentially expensive texture creation code: + let tex_creation_desc = try_create_texture_desc()?; + entry + .insert(Self::create_and_upload_texture( + &self.device, + &self.queue, + texture_pool, + &tex_creation_desc, + )) + .clone() + } + }; + self.accessed_textures.insert(key); - texture_handle.clone() + Ok(texture_handle) } /// Returns a single pixel white pixel with an rgba8unorm format. @@ -174,12 +212,27 @@ impl TextureManager2D { /// Returns a single pixel white pixel with an rgba8unorm format. pub fn white_texture_unorm(&self) -> &GpuTexture { - self.white_texture_unorm.0.as_ref().unwrap() + &self.white_texture_unorm.0 } - /// Returns a single pixel white pixel with an rgba8unorm format. + /// Returns a single zero pixel with format [`wgpu::TextureFormat::Rgba8Unorm`]. + pub fn zeroed_texture_float(&self) -> &GpuTexture { + &self.zeroed_texture_float.0 + } + + /// Returns a single zero pixel with format [`wgpu::TextureFormat::Depth16Unorm`]. + pub fn zeroed_texture_depth(&self) -> &GpuTexture { + &self.zeroed_texture_depth.0 + } + + /// Returns a single zero pixel with format [`wgpu::TextureFormat::Rgba8Sint`]. + pub fn zeroed_texture_sint(&self) -> &GpuTexture { + &self.zeroed_texture_sint.0 + } + + /// Returns a single zero pixel with format [`wgpu::TextureFormat::Rgba8Uint`]. pub fn zeroed_texture_uint(&self) -> &GpuTexture { - self.zeroed_texture_uint.0.as_ref().unwrap() + &self.zeroed_texture_uint.0 } fn create_and_upload_texture( @@ -213,7 +266,7 @@ impl TextureManager2D { // TODO(andreas): Once we have our own temp buffer for uploading, we can do the padding inplace // I.e. the only difference will be if we do one memcopy or one memcopy per row, making row padding a nuisance! - let data = creation_desc.data; + let data: &[u8] = creation_desc.data.as_ref(); // TODO(andreas): temp allocator for staging data? // We don't do any further validation of the buffer here as wgpu does so extensively. @@ -238,20 +291,13 @@ impl TextureManager2D { // TODO(andreas): mipmap generation - GpuTexture2DHandle(Some(texture)) + GpuTexture2DHandle(texture) } /// Retrieves gpu handle. #[allow(clippy::unused_self)] - pub(crate) fn get( - &self, - handle: &GpuTexture2DHandle, - ) -> Result { - handle - .0 - .as_ref() - .ok_or(ResourceManagerError::NullHandle) - .map(|h| h.clone()) + pub fn get(&self, handle: &GpuTexture2DHandle) -> GpuTexture { + handle.0.clone() } pub(crate) fn begin_frame(&mut self, _frame_index: u64) { @@ -261,3 +307,27 @@ impl TextureManager2D { self.accessed_textures.clear(); } } + +fn create_zero_texture( + texture_pool: &mut GpuTexturePool, + device: &Arc, + format: wgpu::TextureFormat, +) -> GpuTexture2DHandle { + // Wgpu zeros out new textures automatically + GpuTexture2DHandle(texture_pool.alloc( + device, + &TextureDesc { + label: format!("zeroed pixel {format:?}").into(), + format, + size: wgpu::Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + usage: wgpu::TextureUsages::TEXTURE_BINDING, + }, + )) +} diff --git a/crates/re_renderer/src/view_builder.rs b/crates/re_renderer/src/view_builder.rs index 49fde9b8bf09..3d57b21a6f1c 100644 --- a/crates/re_renderer/src/view_builder.rs +++ b/crates/re_renderer/src/view_builder.rs @@ -10,7 +10,7 @@ use crate::{ }, global_bindings::FrameUniformBuffer, renderer::{CompositorDrawData, DebugOverlayDrawData, DrawData, Renderer}, - wgpu_resources::{GpuBindGroup, GpuTexture, TextureDesc}, + wgpu_resources::{GpuBindGroup, GpuTexture, PoolError, TextureDesc}, DebugLabel, IntRect, Rgba, Size, }; @@ -32,9 +32,6 @@ struct QueuedDraw { #[derive(thiserror::Error, Debug)] pub enum ViewBuilderError { - #[error("ViewBuilder::setup_view needs to be called first.")] - ViewNotSetup, - #[error("Screenshot was already scheduled.")] ScreenshotAlreadyScheduled, @@ -43,11 +40,9 @@ pub enum ViewBuilderError { } /// The highest level rendering block in `re_renderer`. -/// Used to build up/collect various resources and then send them off for rendering of a single view. -#[derive(Default)] +/// Used to build up/collect various resources and then send them off for rendering of a single view. pub struct ViewBuilder { - /// Result of [`ViewBuilder::setup_view`] - needs to be `Option` sine some of the fields don't have a default. - setup: Option, + setup: ViewTargetSetup, queued_draws: Vec, // TODO(andreas): Consider making "render processors" a "thing" by establishing a form of hardcoded/limited-flexibility render-graph @@ -248,11 +243,7 @@ impl ViewBuilder { }, }); - pub fn setup_view( - &mut self, - ctx: &mut RenderContext, - config: TargetConfiguration, - ) -> Result<&mut Self, ViewBuilderError> { + pub fn new(ctx: &mut RenderContext, config: TargetConfiguration) -> Self { crate::profile_function!(); // Can't handle 0 size resolution since this would imply creating zero sized textures. @@ -297,24 +288,6 @@ impl ViewBuilder { }, ); - self.outline_mask_processor = config.outline_config.as_ref().map(|outline_config| { - OutlineMaskProcessor::new( - ctx, - outline_config, - &config.name, - config.resolution_in_pixel, - ) - }); - - self.queue_draw(&CompositorDrawData::new( - ctx, - &main_target_resolved, - self.outline_mask_processor - .as_ref() - .map(|p| p.final_voronoi_texture()), - &config.outline_config, - )); - let aspect_ratio = config.resolution_in_pixel[0] as f32 / config.resolution_in_pixel[1] as f32; @@ -453,7 +426,25 @@ impl ViewBuilder { frame_uniform_buffer, ); - self.setup = Some(ViewTargetSetup { + let outline_mask_processor = config.outline_config.as_ref().map(|outline_config| { + OutlineMaskProcessor::new( + ctx, + outline_config, + &config.name, + config.resolution_in_pixel, + ) + }); + + let composition_draw = queued_draw(&CompositorDrawData::new( + ctx, + &main_target_resolved, + outline_mask_processor + .as_ref() + .map(|p| p.final_voronoi_texture()), + &config.outline_config, + )); + + let setup = ViewTargetSetup { name: config.name, bind_group_0, main_target_msaa: hdr_render_target_msaa, @@ -461,9 +452,15 @@ impl ViewBuilder { depth_buffer, resolution_in_pixel: config.resolution_in_pixel, frame_uniform_buffer_content, - }); + }; - Ok(self) + Self { + setup, + queued_draws: vec![composition_draw], + outline_mask_processor, + screenshot_processor: Default::default(), + picking_processor: Default::default(), + } } fn draw_phase<'a>( @@ -491,21 +488,7 @@ impl ViewBuilder { draw_data: &D, ) -> &mut Self { crate::profile_function!(); - self.queued_draws.push(QueuedDraw { - draw_func: Box::new(move |ctx, phase, pass, draw_data| { - let renderers = ctx.renderers.read(); - let renderer = renderers - .get::() - .context("failed to retrieve renderer")?; - let draw_data = draw_data - .downcast_ref::() - .expect("passed wrong type of draw data"); - renderer.draw(&ctx.gpu_resources, phase, pass, draw_data) - }), - draw_data: Box::new(draw_data.clone()), - renderer_name: std::any::type_name::(), - participated_phases: D::Renderer::participated_phases(), - }); + self.queued_draws.push(queued_draw(draw_data)); self } @@ -515,13 +498,10 @@ impl ViewBuilder { &mut self, ctx: &RenderContext, clear_color: Rgba, - ) -> anyhow::Result { + ) -> Result { crate::profile_function!(); - let setup = self - .setup - .as_ref() - .context("ViewBuilder::setup_view wasn't called yet")?; + let setup = &self.setup; let mut encoder = ctx .device @@ -582,7 +562,7 @@ impl ViewBuilder { //pass.set_bind_group(0, &setup.bind_group_0, &[]); self.draw_phase(ctx, DrawPhase::PickingLayer, &mut pass); } - picking_processor.end_render_pass(&mut encoder); + picking_processor.end_render_pass(&mut encoder, &ctx.gpu_resources)?; } if let Some(outline_mask_processor) = self.outline_mask_processor.take() { @@ -610,7 +590,7 @@ impl ViewBuilder { /// Schedules the taking of a screenshot. /// - /// Needs to be called after [`ViewBuilder::setup_view`] and before [`ViewBuilder::draw`]. + /// Needs to be called before [`ViewBuilder::draw`]. /// Can only be called once per frame per [`ViewBuilder`]. /// /// Data from the screenshot needs to be retrieved via [`crate::ScreenshotProcessor::next_readback_result`]. @@ -640,12 +620,10 @@ impl ViewBuilder { return Err(ViewBuilderError::ScreenshotAlreadyScheduled); }; - let setup = self.setup.as_ref().ok_or(ViewBuilderError::ViewNotSetup)?; - self.screenshot_processor = Some(ScreenshotProcessor::new( ctx, - &setup.name, - setup.resolution_in_pixel.into(), + &self.setup.name, + self.setup.resolution_in_pixel.into(), identifier, user_data, )); @@ -655,7 +633,7 @@ impl ViewBuilder { /// Schedules the readback of a rectangle from the picking layer. /// - /// Needs to be called after [`ViewBuilder::setup_view`] and before [`ViewBuilder::draw`]. + /// Needs to be called before [`ViewBuilder::draw`]. /// Can only be called once per frame per [`ViewBuilder`]. /// /// The result will still be valid if the rectangle is partially or fully outside of bounds. @@ -697,14 +675,12 @@ impl ViewBuilder { return Err(ViewBuilderError::PickingRectAlreadyScheduled); }; - let setup = self.setup.as_ref().ok_or(ViewBuilderError::ViewNotSetup)?; - let picking_processor = PickingLayerProcessor::new( ctx, - &setup.name, - setup.resolution_in_pixel.into(), + &self.setup.name, + self.setup.resolution_in_pixel.into(), picking_rect, - &setup.frame_uniform_buffer_content, + &self.setup.frame_uniform_buffer_content, show_debug_view, readback_identifier, readback_user_data, @@ -714,7 +690,7 @@ impl ViewBuilder { self.queue_draw(&DebugOverlayDrawData::new( ctx, &picking_processor.picking_target, - setup.resolution_in_pixel.into(), + self.setup.resolution_in_pixel.into(), picking_rect, )); } @@ -733,22 +709,37 @@ impl ViewBuilder { ctx: &'a RenderContext, pass: &mut wgpu::RenderPass<'a>, screen_position: glam::Vec2, - ) -> Result<(), ViewBuilderError> { + ) { crate::profile_function!(); - let setup = self.setup.as_ref().ok_or(ViewBuilderError::ViewNotSetup)?; pass.set_viewport( screen_position.x, screen_position.y, - setup.resolution_in_pixel[0] as f32, - setup.resolution_in_pixel[1] as f32, + self.setup.resolution_in_pixel[0] as f32, + self.setup.resolution_in_pixel[1] as f32, 0.0, 1.0, ); - pass.set_bind_group(0, &setup.bind_group_0, &[]); + pass.set_bind_group(0, &self.setup.bind_group_0, &[]); self.draw_phase(ctx, DrawPhase::Compositing, pass); + } +} - Ok(()) +fn queued_draw(draw_data: &D) -> QueuedDraw { + QueuedDraw { + draw_func: Box::new(move |ctx, phase, pass, draw_data| { + let renderers = ctx.renderers.read(); + let renderer = renderers + .get::() + .context("failed to retrieve renderer")?; + let draw_data = draw_data + .downcast_ref::() + .expect("passed wrong type of draw data"); + renderer.draw(&ctx.gpu_resources, phase, pass, draw_data) + }), + draw_data: Box::new(draw_data.clone()), + renderer_name: std::any::type_name::(), + participated_phases: D::Renderer::participated_phases(), } } diff --git a/crates/re_renderer/src/wgpu_buffer_types.rs b/crates/re_renderer/src/wgpu_buffer_types.rs index af60adac00c3..ee7276337b55 100644 --- a/crates/re_renderer/src/wgpu_buffer_types.rs +++ b/crates/re_renderer/src/wgpu_buffer_types.rs @@ -61,6 +61,13 @@ impl From for Vec2 { } } +impl From<[f32; 2]> for Vec2 { + #[inline] + fn from([x, y]: [f32; 2]) -> Self { + Vec2 { x, y } + } +} + #[repr(C, align(16))] #[derive(Clone, Copy, Zeroable, Pod)] pub struct Vec2RowPadded { @@ -98,10 +105,10 @@ impl From for UVec2 { impl From<[u8; 2]> for UVec2 { #[inline] - fn from(v: [u8; 2]) -> Self { + fn from([x, y]: [u8; 2]) -> Self { UVec2 { - x: v[0] as u32, - y: v[1] as u32, + x: x as u32, + y: y as u32, } } } @@ -129,10 +136,10 @@ impl From for UVec2RowPadded { impl From<[u8; 2]> for UVec2RowPadded { #[inline] - fn from(v: [u8; 2]) -> Self { + fn from([x, y]: [u8; 2]) -> Self { UVec2RowPadded { - x: v[0] as u32, - y: v[1] as u32, + x: x as u32, + y: y as u32, padding0: 0, padding1: 0, } diff --git a/crates/re_renderer/src/wgpu_resources/mod.rs b/crates/re_renderer/src/wgpu_resources/mod.rs index ef272248e3dd..06f30b6292d3 100644 --- a/crates/re_renderer/src/wgpu_resources/mod.rs +++ b/crates/re_renderer/src/wgpu_resources/mod.rs @@ -117,9 +117,9 @@ impl WgpuResourcePools { } } -/// Utility for dealing with rows of raw texture data. -#[derive(Clone, Copy)] -pub struct TextureRowDataInfo { +/// Utility for dealing with buffers containing raw 2D texture data. +#[derive(Clone)] +pub struct Texture2DBufferInfo { /// How many bytes per row contain actual data. pub bytes_per_row_unpadded: u32, @@ -127,34 +127,57 @@ pub struct TextureRowDataInfo { /// /// Padding bytes are always at the end of a row. pub bytes_per_row_padded: u32, + + /// Size required for an unpadded buffer. + pub buffer_size_unpadded: wgpu::BufferAddress, + + /// Size required for a padded buffer as it is read/written from/to the GPU. + pub buffer_size_padded: wgpu::BufferAddress, } -impl TextureRowDataInfo { - pub fn new(format: wgpu::TextureFormat, width: u32) -> Self { +impl Texture2DBufferInfo { + #[inline] + pub fn new(format: wgpu::TextureFormat, extent: glam::UVec2) -> Self { let format_info = format.describe(); - let width_blocks = width / format_info.block_dimensions.0 as u32; - let bytes_per_row_unaligned = width_blocks * format_info.block_size as u32; + + let width_blocks = extent.x / format_info.block_dimensions.0 as u32; + let height_blocks = extent.y / format_info.block_dimensions.1 as u32; + + let bytes_per_row_unpadded = width_blocks * format_info.block_size as u32; + let bytes_per_row_padded = + wgpu::util::align_to(bytes_per_row_unpadded, wgpu::COPY_BYTES_PER_ROW_ALIGNMENT); Self { - bytes_per_row_unpadded: bytes_per_row_unaligned, - bytes_per_row_padded: wgpu::util::align_to( - bytes_per_row_unaligned, - wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, - ), + bytes_per_row_unpadded, + bytes_per_row_padded, + buffer_size_unpadded: (bytes_per_row_unpadded * height_blocks) as wgpu::BufferAddress, + buffer_size_padded: (bytes_per_row_padded * height_blocks) as wgpu::BufferAddress, } } + #[inline] + pub fn num_rows(&self) -> u32 { + self.buffer_size_padded as u32 / self.bytes_per_row_padded + } + /// Removes the padding from a buffer containing gpu texture data. + /// + /// The passed in buffer is to be expected to be exactly of size [`Texture2DBufferInfo::buffer_size_padded`]. + /// + /// Note that if you're passing in gpu data, there no alignment guarantees on the returned slice, + /// do NOT convert it using [`bytemuck`]. Use [`Texture2DBufferInfo::remove_padding_and_convert`] instead. pub fn remove_padding<'a>(&self, buffer: &'a [u8]) -> Cow<'a, [u8]> { + crate::profile_function!(); + + assert_eq!(buffer.len() as wgpu::BufferAddress, self.buffer_size_padded); + if self.bytes_per_row_padded == self.bytes_per_row_unpadded { return Cow::Borrowed(buffer); } - let height = (buffer.len() as u32) / self.bytes_per_row_padded; - let mut unpadded_buffer = - Vec::with_capacity((self.bytes_per_row_unpadded * height) as usize); + let mut unpadded_buffer = Vec::with_capacity(self.buffer_size_unpadded as _); - for row in 0..height { + for row in 0..self.num_rows() { let offset = (self.bytes_per_row_padded * row) as usize; unpadded_buffer.extend_from_slice( &buffer[offset..(offset + self.bytes_per_row_unpadded as usize)], @@ -163,4 +186,42 @@ impl TextureRowDataInfo { unpadded_buffer.into() } + + /// Removes the padding from a buffer containing gpu texture data and remove convert to a given type. + /// + /// The passed in buffer is to be expected to be exactly of size [`Texture2DBufferInfo::buffer_size_padded`]. + /// + /// The unpadded row size is expected to be a multiple of the size of the target type. + /// (Which means that, while uncommon, it technically doesn't need to be as big as a block in the pixel - this can be useful for e.g. packing wide bitfields) + pub fn remove_padding_and_convert(&self, buffer: &[u8]) -> Vec { + crate::profile_function!(); + + assert_eq!(buffer.len() as wgpu::BufferAddress, self.buffer_size_padded); + assert!(self.bytes_per_row_unpadded % std::mem::size_of::() as u32 == 0); + + // Due to https://github.com/gfx-rs/wgpu/issues/3508 the data might be completely unaligned, + // so much, that we can't even interpret it as e.g. a u32 slice. + // Therefore, we have to do a copy of the data regardless of whether it's padded or not. + + let mut unpadded_buffer: Vec = vec![ + T::zeroed(); + (self.num_rows() * self.bytes_per_row_unpadded / std::mem::size_of::() as u32) + as usize + ]; // TODO(andreas): Consider using unsafe set_len() instead of vec![] to avoid zeroing the memory. + + // The copy has to happen on a u8 slice, because any other type would assume some alignment that we can't guarantee because of the above. + let unpadded_buffer_u8_view = bytemuck::cast_slice_mut(&mut unpadded_buffer); + + for row in 0..self.num_rows() { + let offset_padded = (self.bytes_per_row_padded * row) as usize; + let offset_unpadded = (self.bytes_per_row_unpadded * row) as usize; + unpadded_buffer_u8_view + [offset_unpadded..(offset_unpadded + self.bytes_per_row_unpadded as usize)] + .copy_from_slice( + &buffer[offset_padded..(offset_padded + self.bytes_per_row_unpadded as usize)], + ); + } + + unpadded_buffer + } } diff --git a/crates/re_renderer/src/workspace_shaders.rs b/crates/re_renderer/src/workspace_shaders.rs index c2f37fe7ae41..7d999da73645 100644 --- a/crates/re_renderer/src/workspace_shaders.rs +++ b/crates/re_renderer/src/workspace_shaders.rs @@ -25,6 +25,12 @@ pub fn init() { fs.create_file(virtpath, content).unwrap(); } + { + let virtpath = Path::new("shader/copy_texture.wgsl"); + let content = include_str!("../shader/copy_texture.wgsl").into(); + fs.create_file(virtpath, content).unwrap(); + } + { let virtpath = Path::new("shader/debug_overlay.wgsl"); let content = include_str!("../shader/debug_overlay.wgsl").into(); @@ -103,6 +109,18 @@ pub fn init() { fs.create_file(virtpath, content).unwrap(); } + { + let virtpath = Path::new("shader/rectangle_fs.wgsl"); + let content = include_str!("../shader/rectangle_fs.wgsl").into(); + fs.create_file(virtpath, content).unwrap(); + } + + { + let virtpath = Path::new("shader/rectangle_vs.wgsl"); + let content = include_str!("../shader/rectangle_vs.wgsl").into(); + fs.create_file(virtpath, content).unwrap(); + } + { let virtpath = Path::new("shader/screen_triangle.wgsl"); let content = include_str!("../shader/screen_triangle.wgsl").into(); diff --git a/crates/re_sdk/Cargo.toml b/crates/re_sdk/Cargo.toml index 1eb913bfcf30..018bc298a43b 100644 --- a/crates/re_sdk/Cargo.toml +++ b/crates/re_sdk/Cargo.toml @@ -33,17 +33,13 @@ image = ["re_log_types/image"] [dependencies] -re_build_info.workspace = true -re_error.workspace = true -re_log_types = { workspace = true, features = ["save"] } +re_log_encoding = { workspace = true, features = ["encoder"] } +re_log_types.workspace = true re_log.workspace = true re_memory.workspace = true re_sdk_comms = { workspace = true, features = ["client"] } -re_smart_channel.workspace = true -anyhow.workspace = true document-features = "0.2" -nohash-hasher = "0.2" parking_lot.workspace = true thiserror.workspace = true diff --git a/crates/re_sdk/src/lib.rs b/crates/re_sdk/src/lib.rs index 03d329a6435e..a7e7fe27607f 100644 --- a/crates/re_sdk/src/lib.rs +++ b/crates/re_sdk/src/lib.rs @@ -9,9 +9,6 @@ // ---------------- // Private modules: -#[cfg(not(target_arch = "wasm32"))] -mod file_sink; - #[cfg(feature = "global_session")] mod global; @@ -34,6 +31,13 @@ pub use re_log_types::{ ApplicationId, Component, ComponentName, EntityPath, RecordingId, SerializableComponent, }; +#[cfg(not(target_arch = "wasm32"))] +impl crate::sink::LogSink for re_log_encoding::FileSink { + fn send(&self, msg: re_log_types::LogMsg) { + re_log_encoding::FileSink::send(self, msg); + } +} + // --------------- // Public modules: @@ -45,15 +49,17 @@ pub mod demo_util; /// This is how you select whether the log stream ends up /// sent over TCP, written to file, etc. pub mod sink { - pub use crate::log_sink::{disabled, BufferedSink, LogSink, TcpSink}; + pub use crate::log_sink::{ + disabled, BufferedSink, LogSink, MemorySink, MemorySinkStorage, TcpSink, + }; #[cfg(not(target_arch = "wasm32"))] - pub use crate::file_sink::{FileSink, FileSinkError}; + pub use re_log_encoding::{FileSink, FileSinkError}; } /// Things directly related to logging. pub mod log { - pub use re_log_types::{DataCell, DataRow, DataTable, LogMsg, MsgId, PathOp}; + pub use re_log_types::{DataCell, DataRow, DataTable, LogMsg, PathOp, RowId, TableId}; } /// Time-related types. @@ -71,8 +77,7 @@ pub mod components { EncodedMesh3D, InstanceKey, KeypointId, Label, LineStrip2D, LineStrip3D, Mat3x3, Mesh3D, MeshFormat, MeshId, Pinhole, Point2D, Point3D, Quaternion, Radius, RawMesh3D, Rect2D, Rigid3, Scalar, ScalarPlotProps, Size3D, Tensor, TensorData, TensorDataMeaning, - TensorDimension, TensorId, TensorTrait, TextEntry, Transform, Vec2D, Vec3D, Vec4D, - ViewCoordinates, + TensorDimension, TensorId, TextEntry, Transform, Vec2D, Vec3D, Vec4D, ViewCoordinates, }; } diff --git a/crates/re_sdk/src/log_sink.rs b/crates/re_sdk/src/log_sink.rs index 2dd7fa8bf75f..639c682aaf21 100644 --- a/crates/re_sdk/src/log_sink.rs +++ b/crates/re_sdk/src/log_sink.rs @@ -76,6 +76,53 @@ impl LogSink for BufferedSink { } } +/// Store log messages directly in memory +/// +/// Although very similar to `BufferedSink` this sink is a real-endpoint. When creating +/// a new sink the logged messages stay with the `MemorySink` (`drain_backlog` does nothing). +/// +/// Additionally the raw storage can be accessed and used to create an in-memory RRD. +/// This is useful for things like the inline rrd-viewer in Jupyter notebooks. +#[derive(Default)] +pub struct MemorySink(MemorySinkStorage); + +impl MemorySink { + /// Access the raw `MemorySinkStorage` + pub fn buffer(&self) -> MemorySinkStorage { + self.0.clone() + } +} + +impl LogSink for MemorySink { + fn send(&self, msg: LogMsg) { + self.0.lock().push(msg); + } + + fn send_all(&self, mut messages: Vec) { + self.0.lock().append(&mut messages); + } +} + +/// The storage used by [`MemorySink`] +#[derive(Default, Clone)] +pub struct MemorySinkStorage(std::sync::Arc>>); + +/// +impl MemorySinkStorage { + /// Lock the contained buffer + fn lock(&self) -> parking_lot::MutexGuard<'_, Vec> { + self.0.lock() + } + + /// Convert the stored messages into an in-memory Rerun log file + pub fn rrd_as_bytes(&self) -> Result, re_log_encoding::encoder::EncodeError> { + let messages = self.lock(); + let mut buffer = std::io::Cursor::new(Vec::new()); + re_log_encoding::encoder::encode(messages.iter(), &mut buffer)?; + Ok(buffer.into_inner()) + } +} + // ---------------------------------------------------------------------------- /// Stream log messages to a Rerun TCP server. diff --git a/crates/re_sdk/src/msg_sender.rs b/crates/re_sdk/src/msg_sender.rs index 790a06e71bd4..0bd6832a3f2f 100644 --- a/crates/re_sdk/src/msg_sender.rs +++ b/crates/re_sdk/src/msg_sender.rs @@ -1,11 +1,13 @@ -use re_log_types::{component_types::InstanceKey, DataRow, DataTableError}; +use std::borrow::Borrow; + +use re_log_types::{component_types::InstanceKey, DataRow, DataTableError, RecordingId, RowId}; use crate::{ components::Transform, - log::{DataCell, LogMsg, MsgId}, + log::{DataCell, LogMsg}, sink::LogSink, time::{Time, TimeInt, TimePoint, Timeline}, - Component, EntityPath, SerializableComponent, + Component, EntityPath, SerializableComponent, Session, }; // TODO(#1619): Rust SDK batching @@ -229,13 +231,17 @@ impl MsgSender { /// Consumes, packs, sanity checks and finally sends the message to the currently configured /// target of the SDK. - pub fn send(self, sink: &impl std::borrow::Borrow) -> Result<(), DataTableError> { - self.send_to_sink(sink.borrow()) + pub fn send(self, session: &Session) -> Result<(), DataTableError> { + self.send_to_sink(session.recording_id(), session.borrow()) } /// Consumes, packs, sanity checks and finally sends the message to the currently configured /// target of the SDK. - fn send_to_sink(self, sink: &dyn LogSink) -> Result<(), DataTableError> { + fn send_to_sink( + self, + recording_id: RecordingId, + sink: &dyn LogSink, + ) -> Result<(), DataTableError> { if !sink.is_enabled() { return Ok(()); // silently drop the message } @@ -243,15 +249,24 @@ impl MsgSender { let [row_standard, row_transforms, row_splats] = self.into_rows(); if let Some(row_transforms) = row_transforms { - sink.send(LogMsg::ArrowMsg((&row_transforms.into_table()).try_into()?)); + sink.send(LogMsg::ArrowMsg( + recording_id, + row_transforms.into_table().to_arrow_msg()?, + )); } if let Some(row_splats) = row_splats { - sink.send(LogMsg::ArrowMsg((&row_splats.into_table()).try_into()?)); + sink.send(LogMsg::ArrowMsg( + recording_id, + row_splats.into_table().to_arrow_msg()?, + )); } // Always the primary component last so range-based queries will include the other data. // Since the primary component can't be splatted it must be in msg_standard, see(#1215). if let Some(row_standard) = row_standard { - sink.send(LogMsg::ArrowMsg((&row_standard.into_table()).try_into()?)); + sink.send(LogMsg::ArrowMsg( + recording_id, + row_standard.into_table().to_arrow_msg()?, + )); } Ok(()) @@ -305,7 +320,7 @@ impl MsgSender { // Standard rows[0] = (!standard_cells.is_empty()).then(|| { DataRow::from_cells( - MsgId::random(), + RowId::random(), timepoint.clone(), entity_path.clone(), num_instances.unwrap_or(0), @@ -316,7 +331,7 @@ impl MsgSender { // Transforms rows[1] = (!transform_cells.is_empty()).then(|| { DataRow::from_cells( - MsgId::random(), + RowId::random(), timepoint.clone(), entity_path.clone(), num_transform_instances, @@ -325,10 +340,10 @@ impl MsgSender { }); // Splats - // TODO(cmc): unsplit splats once new data cells are in + // TODO(#1629): unsplit splats once new data cells are in rows[2] = (!splatted.is_empty()).then(|| { splatted.push(DataCell::from_native(&[InstanceKey::SPLAT])); - DataRow::from_cells(MsgId::random(), timepoint, entity_path, 1, splatted) + DataRow::from_cells(RowId::random(), timepoint, entity_path, 1, splatted) }); rows diff --git a/crates/re_sdk/src/session.rs b/crates/re_sdk/src/session.rs index ba2eb9a3b1fa..b9fbc1d419d3 100644 --- a/crates/re_sdk/src/session.rs +++ b/crates/re_sdk/src/session.rs @@ -133,7 +133,7 @@ impl SessionBuilder { pub fn save( self, path: impl Into, - ) -> Result { + ) -> Result { let (rerun_enabled, recording_info) = self.finalize(); if rerun_enabled { Ok(Session::new( @@ -189,6 +189,7 @@ impl SessionBuilder { #[must_use] #[derive(Clone)] pub struct Session { + recording_info: RecordingInfo, sink: Arc, // TODO(emilk): add convenience `TimePoint` here so that users can // do things like `session.set_time_sequence("frame", frame_idx);` @@ -221,14 +222,17 @@ impl Session { sink.send( re_log_types::BeginRecordingMsg { - msg_id: re_log_types::MsgId::random(), - info: recording_info, + row_id: re_log_types::RowId::random(), + info: recording_info.clone(), } .into(), ); } - Self { sink: sink.into() } + Self { + recording_info, + sink: sink.into(), + } } /// Construct a new session with a disabled "dummy" sink that drops all logging messages. @@ -236,6 +240,16 @@ impl Session { /// [`Self::is_enabled`] will return `false`. pub fn disabled() -> Self { Self { + recording_info: RecordingInfo { + application_id: ApplicationId::unknown(), + recording_id: Default::default(), + is_official_example: crate::called_from_official_rust_example(), + started: Time::now(), + recording_source: RecordingSource::RustSdk { + rustc_version: env!("RE_BUILD_RUSTC_VERSION").into(), + llvm_version: env!("RE_BUILD_LLVM_VERSION").into(), + }, + }, sink: crate::sink::disabled().into(), } } @@ -272,17 +286,25 @@ impl Session { time_point: &re_log_types::TimePoint, path_op: re_log_types::PathOp, ) { - self.send(LogMsg::EntityPathOpMsg(re_log_types::EntityPathOpMsg { - msg_id: re_log_types::MsgId::random(), - time_point: time_point.clone(), - path_op, - })); + self.send(LogMsg::EntityPathOpMsg( + self.recording_id(), + re_log_types::EntityPathOpMsg { + row_id: re_log_types::RowId::random(), + time_point: time_point.clone(), + path_op, + }, + )); } /// Drain all buffered [`LogMsg`]es and return them. pub fn drain_backlog(&self) -> Vec { self.sink.drain_backlog() } + + /// The current [`RecordingId`]. + pub fn recording_id(&self) -> RecordingId { + self.recording_info.recording_id + } } impl AsRef for Session { diff --git a/crates/re_sdk_comms/src/buffered_client.rs b/crates/re_sdk_comms/src/buffered_client.rs index a50f951e3663..797a4a23b34e 100644 --- a/crates/re_sdk_comms/src/buffered_client.rs +++ b/crates/re_sdk_comms/src/buffered_client.rs @@ -2,7 +2,7 @@ use std::{net::SocketAddr, thread::JoinHandle}; use crossbeam::channel::{select, Receiver, Sender}; -use re_log_types::{LogMsg, MsgId}; +use re_log_types::{LogMsg, RowId}; #[derive(Debug, PartialEq, Eq)] struct FlushedMsg; @@ -146,12 +146,14 @@ impl Drop for Client { /// Wait until everything has been sent. fn drop(&mut self) { re_log::debug!("Shutting down the client connection…"); - self.send(LogMsg::Goodbye(MsgId::random())); + self.send(LogMsg::Goodbye(RowId::random())); self.flush(); + // First shut down the encoder: self.encode_quit_tx.send(QuitMsg).ok(); + self.encode_join.take().map(|j| j.join().ok()); + // Then the other threads: self.send_quit_tx.send(InterruptMsg::Quit).ok(); self.drop_quit_tx.send(QuitMsg).ok(); - self.encode_join.take().map(|j| j.join().ok()); self.send_join.take().map(|j| j.join().ok()); self.drop_join.take().map(|j| j.join().ok()); re_log::debug!("TCP client has shut down."); @@ -196,11 +198,14 @@ fn msg_encode( MsgMsg::Flush => PacketMsg::Flush, }; - packet_tx - .send(packet_msg) - .expect("tcp_sender thread should live longer"); - - msg_drop_tx.send(msg_msg).expect("Main thread should still be alive"); + if packet_tx.send(packet_msg).is_err() { + re_log::error!("Failed to send message to tcp_sender thread. Likely a shutdown race-condition."); + return; + } + if msg_drop_tx.send(msg_msg).is_err() { + re_log::error!("Failed to send message to msg_drop thread. Likely a shutdown race-condition"); + return; + } } else { return; // channel has closed } diff --git a/crates/re_sdk_comms/src/server.rs b/crates/re_sdk_comms/src/server.rs index 71c7c786f763..75f766d40b7c 100644 --- a/crates/re_sdk_comms/src/server.rs +++ b/crates/re_sdk_comms/src/server.rs @@ -158,7 +158,8 @@ async fn run_client( let msg = crate::decode_log_msg(&packet)?; if matches!(msg, LogMsg::Goodbye(_)) { - re_log::debug!("Client sent goodbye message."); + re_log::debug!("Received goodbye message."); + tx.send(msg)?; return Ok(()); } @@ -209,9 +210,11 @@ impl CongestionManager { #[allow(clippy::match_same_arms)] match msg { // we don't want to drop any of these - LogMsg::BeginRecordingMsg(_) | LogMsg::EntityPathOpMsg(_) | LogMsg::Goodbye(_) => true, + LogMsg::BeginRecordingMsg(_) | LogMsg::EntityPathOpMsg(_, _) | LogMsg::Goodbye(_) => { + true + } - LogMsg::ArrowMsg(arrow_msg) => self.should_send_time_point(&arrow_msg.timepoint_max), + LogMsg::ArrowMsg(_, arrow_msg) => self.should_send_time_point(&arrow_msg.timepoint_max), } } diff --git a/crates/re_smart_channel/src/lib.rs b/crates/re_smart_channel/src/lib.rs index 0a278dc9e078..38fcb352dfdf 100644 --- a/crates/re_smart_channel/src/lib.rs +++ b/crates/re_smart_channel/src/lib.rs @@ -18,6 +18,11 @@ pub enum Source { /// Streaming an `.rrd` file over http. RrdHttpStream { url: String }, + /// Loading an `.rrd` file from a `postMessage` js event + /// + /// Only applicable to web browser iframes + RrdWebEventListener, + /// The source is the logging sdk directly, same process. Sdk, @@ -36,7 +41,7 @@ pub enum Source { impl Source { pub fn is_network(&self) -> bool { match self { - Self::File { .. } | Self::Sdk => false, + Self::File { .. } | Self::Sdk | Self::RrdWebEventListener => false, Self::RrdHttpStream { .. } | Self::WsClient { .. } | Self::TcpServer { .. } => true, } } diff --git a/crates/re_tensor_ops/src/dimension_mapping.rs b/crates/re_tensor_ops/src/dimension_mapping.rs index d2f9dbab1595..37b7517c8281 100644 --- a/crates/re_tensor_ops/src/dimension_mapping.rs +++ b/crates/re_tensor_ops/src/dimension_mapping.rs @@ -1,6 +1,6 @@ use re_log_types::component_types; -#[derive(Clone, Debug, PartialEq, Eq, serde::Deserialize, serde::Serialize)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, serde::Deserialize, serde::Serialize)] pub struct DimensionSelector { pub visible: bool, pub dim_idx: usize, @@ -15,7 +15,7 @@ impl DimensionSelector { } } -#[derive(Clone, Debug, PartialEq, Eq, serde::Deserialize, serde::Serialize)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, serde::Deserialize, serde::Serialize)] pub struct DimensionMapping { /// Which dimensions have selectors, and are they visible? pub selectors: Vec, diff --git a/crates/re_tensor_ops/tests/tensor_tests.rs b/crates/re_tensor_ops/tests/tensor_tests.rs index f13893c3f58e..c04cd4ebad98 100644 --- a/crates/re_tensor_ops/tests/tensor_tests.rs +++ b/crates/re_tensor_ops/tests/tensor_tests.rs @@ -1,5 +1,5 @@ use re_log_types::component_types::{ - Tensor, TensorCastError, TensorData, TensorDataMeaning, TensorDimension, TensorId, TensorTrait, + Tensor, TensorCastError, TensorData, TensorDataMeaning, TensorDimension, TensorId, }; #[test] diff --git a/crates/re_tuid/Cargo.toml b/crates/re_tuid/Cargo.toml index 4f8e66c70dd2..6dc45f5c34ff 100644 --- a/crates/re_tuid/Cargo.toml +++ b/crates/re_tuid/Cargo.toml @@ -19,23 +19,24 @@ all-features = true [features] default = [] +## Enable converting Tuid to arrow2 +arrow2_convert = ["dep:arrow2", "dep:arrow2_convert"] + ## Enable (de)serialization using serde. serde = ["dep:serde"] [dependencies] -arrow2_convert.workspace = true -arrow2.workspace = true document-features = "0.2" +getrandom = "0.2" +instant = "0.1" once_cell = "1.16" # Optional dependencies: +arrow2 = { workspace = true, optional = true } # used by arrow2_convert +arrow2_convert = { workspace = true, optional = true } serde = { version = "1", features = ["derive"], optional = true } -# native dependencies: -[target.'cfg(not(target_arch = "wasm32"))'.dependencies] -getrandom = "0.2" - [dev-dependencies] criterion = "0.4" diff --git a/crates/re_tuid/src/lib.rs b/crates/re_tuid/src/lib.rs index 49c0840dad3d..a23a971fbc32 100644 --- a/crates/re_tuid/src/lib.rs +++ b/crates/re_tuid/src/lib.rs @@ -6,10 +6,10 @@ #![doc = document_features::document_features!()] //! -use arrow2_convert::{ArrowDeserialize, ArrowField, ArrowSerialize}; - -#[derive( - Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, ArrowField, ArrowSerialize, ArrowDeserialize, +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr( + feature = "arrow2_convert", + derive(arrow2_convert::ArrowSerialize, arrow2_convert::ArrowDeserialize) )] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct Tuid { @@ -21,6 +21,29 @@ pub struct Tuid { inc: u64, } +#[cfg(feature = "arrow2_convert")] +arrow2_convert::arrow_enable_vec_for_type!(Tuid); + +// TODO(#1774): shouldn't have to write this manually +#[cfg(feature = "arrow2_convert")] +impl arrow2_convert::field::ArrowField for Tuid { + type Type = Self; + + fn data_type() -> arrow2::datatypes::DataType { + let datatype = arrow2::datatypes::DataType::Struct(<[_]>::into_vec(Box::new([ + ::field("time_ns"), + ::field("inc"), + ]))); + arrow2::datatypes::DataType::Extension("rerun.tuid".into(), Box::new(datatype), None) + } +} + +impl std::fmt::Display for Tuid { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:032X}", self.as_u128()) + } +} + impl std::fmt::Debug for Tuid { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:032X}", self.as_u128()) @@ -38,7 +61,6 @@ impl Tuid { }; #[inline] - #[cfg(not(target_arch = "wasm32"))] // TODO(emilk): implement for wasm32 (needs ms since epoch). pub fn random() -> Self { use std::cell::RefCell; @@ -79,39 +101,55 @@ impl Tuid { pub fn nanoseconds_since_epoch(&self) -> u64 { self.time_ns } + + /// A shortened string representation of the `Tuid`. + #[inline] + pub fn short_string(&self) -> String { + // We still want this to look like a part of the full TUID (i.e. what is printed on + // `std::fmt::Display`). + // Per Thread randomness plus increment is in the last part, so show only that. + // (the first half is time in nanoseconds which for the _most part_ doesn't change that + // often) + let str = self.to_string(); + str[(str.len() - 8)..].to_string() + } } /// Returns a high-precision, monotonically increasing count that approximates nanoseconds since unix epoch. #[inline] -#[cfg(not(target_arch = "wasm32"))] fn monotonic_nanos_since_epoch() -> u64 { // This can maybe be optimized + use instant::Instant; use once_cell::sync::Lazy; - use std::time::Instant; - - fn epoch_offset_and_start() -> (u64, Instant) { - if let Ok(duration_since_epoch) = std::time::UNIX_EPOCH.elapsed() { - let nanos_since_epoch = duration_since_epoch.as_nanos() as u64; - (nanos_since_epoch, Instant::now()) - } else { - // system time is set before 1970. this should be quite rare. - (0, Instant::now()) - } - } - static START_TIME: Lazy<(u64, Instant)> = Lazy::new(epoch_offset_and_start); + static START_TIME: Lazy<(u64, Instant)> = Lazy::new(|| (nanos_since_epoch(), Instant::now())); START_TIME.0 + START_TIME.1.elapsed().as_nanos() as u64 } +fn nanos_since_epoch() -> u64 { + if let Ok(duration_since_epoch) = instant::SystemTime::UNIX_EPOCH.elapsed() { + let mut nanos_since_epoch = duration_since_epoch.as_nanos() as u64; + + if cfg!(target_arch = "wasm32") { + // Web notriously round to the nearest millisecond (because of spectre/meltdown) + // so we add a bit of extra randomenss here to increase our entropy and reduce the chance of collisions: + nanos_since_epoch += random_u64() % 1_000_000; + } + + nanos_since_epoch + } else { + // system time is set before 1970. this should be quite rare. + 0 + } +} + #[inline] -#[cfg(not(target_arch = "wasm32"))] fn random_u64() -> u64 { let mut bytes = [0_u8; 8]; - getrandom::getrandom(&mut bytes).expect("Couldn't get inc"); + getrandom::getrandom(&mut bytes).expect("Couldn't get random bytes"); u64::from_le_bytes(bytes) } -#[cfg(not(target_arch = "wasm32"))] #[test] fn test_tuid() { use std::collections::{BTreeSet, HashSet}; diff --git a/crates/re_ui/src/command.rs b/crates/re_ui/src/command.rs index 24a3821f22fd..f41bf4fa43b0 100644 --- a/crates/re_ui/src/command.rs +++ b/crates/re_ui/src/command.rs @@ -45,6 +45,7 @@ pub enum Command { PlaybackTogglePlayPause, PlaybackStepBack, PlaybackStepForward, + PlaybackRestart, } impl Command { @@ -124,6 +125,7 @@ impl Command { "Step time forward", "Move the time marker to the next point in time with any data", ), + Command::PlaybackRestart => ("Restart", "Restart from beginning of timeline"), } } @@ -183,6 +185,7 @@ impl Command { Command::PlaybackTogglePlayPause => Some(key(Key::Space)), Command::PlaybackStepBack => Some(key(Key::ArrowLeft)), Command::PlaybackStepForward => Some(key(Key::ArrowRight)), + Command::PlaybackRestart => Some(cmd(Key::ArrowLeft)), } } diff --git a/crates/re_ui/src/lib.rs b/crates/re_ui/src/lib.rs index 222473b8ed15..d5d9569cf7b2 100644 --- a/crates/re_ui/src/lib.rs +++ b/crates/re_ui/src/lib.rs @@ -480,11 +480,13 @@ impl ReUi { .inner } - /// Grid to be used in selection view. + /// Two-column grid to be used in selection view. #[allow(clippy::unused_self)] pub fn selection_grid(&self, ui: &mut egui::Ui, id: &str) -> egui::Grid { // Spread rows a bit to make it easier to see the groupings - egui::Grid::new(id).spacing(ui.style().spacing.item_spacing + egui::vec2(0.0, 8.0)) + egui::Grid::new(id) + .num_columns(2) + .spacing(ui.style().spacing.item_spacing + egui::vec2(0.0, 8.0)) } /// Draws a shadow into the given rect with the shadow direction given from dark to light diff --git a/crates/re_viewer/Cargo.toml b/crates/re_viewer/Cargo.toml index aebdbfc3db2d..b242e56f2751 100644 --- a/crates/re_viewer/Cargo.toml +++ b/crates/re_viewer/Cargo.toml @@ -41,19 +41,13 @@ re_build_info.workspace = true re_data_store = { workspace = true, features = ["serde"] } re_error.workspace = true re_format.workspace = true +re_log_encoding = { workspace = true, features = ["decoder", "encoder"] } +re_log_types = { workspace = true, features = ["ecolor", "glam", "image"] } re_log.workspace = true -re_log_types = { workspace = true, features = [ - "ecolor", - "glam", - "image", - "save", - "load", -] } re_memory.workspace = true re_query.workspace = true re_renderer = { workspace = true, features = ["arrow", "serde"] } re_smart_channel.workspace = true -re_string_interner.workspace = true re_tensor_ops.workspace = true re_ui.workspace = true re_ws_comms = { workspace = true, features = ["client"] } @@ -82,7 +76,6 @@ egui = { workspace = true, features = ["extra_debug_asserts", "tracing"] } egui_dock = { workspace = true, features = ["serde"] } egui_extras = { workspace = true, features = ["tracing"] } egui-wgpu.workspace = true -ehttp = "0.2" enumset.workspace = true glam = { workspace = true, features = [ "mint", @@ -104,7 +97,7 @@ rfd = { version = "0.11.3", default_features = false, features = [ ] } serde = { version = "1", features = ["derive"] } slotmap = { version = "1.0.6", features = ["serde"] } -smallvec = { version = "1.10", features = ["serde"] } +smallvec = { workspace = true, features = ["serde"] } thiserror.workspace = true time = { workspace = true, default-features = false, features = ["formatting", "wasm-bindgen"] } uuid = { version = "1.1", features = ["serde", "v4", "js"] } @@ -131,13 +124,7 @@ winapi = "0.3.9" [target.'cfg(target_arch = "wasm32")'.dependencies] console_error_panic_hook = "0.1.6" wasm-bindgen-futures = "0.4" -js-sys = "0.3" -wasm-bindgen = "0.2" - -[dependencies.web-sys] -version = "0.3.52" -features = ["Window"] - +web-sys = { version = "0.3.52", features = ["Window"] } [build-dependencies] re_build_build_info.workspace = true diff --git a/crates/re_viewer/src/app.rs b/crates/re_viewer/src/app.rs index 0e0e81b568f3..c51d5e9a3534 100644 --- a/crates/re_viewer/src/app.rs +++ b/crates/re_viewer/src/app.rs @@ -8,7 +8,7 @@ use itertools::Itertools as _; use nohash_hasher::IntMap; use poll_promise::Promise; -use re_arrow_store::DataStoreStats; +use re_arrow_store::{DataStoreConfig, DataStoreStats}; use re_data_store::log_db::LogDb; use re_format::format_number; use re_log_types::{ApplicationId, LogMsg, RecordingId}; @@ -38,6 +38,7 @@ enum TimeControlCommand { TogglePlayPause, StepBack, StepForward, + Restart, } // ---------------------------------------------------------------------------- @@ -46,6 +47,7 @@ enum TimeControlCommand { #[derive(Clone, Copy, Default)] pub struct StartupOptions { pub memory_limit: re_memory::MemoryLimit, + pub persist_state: bool, } // ---------------------------------------------------------------------------- @@ -120,9 +122,13 @@ impl App { ); } - let state: AppState = storage - .and_then(|storage| eframe::get_value(storage, eframe::APP_KEY)) - .unwrap_or_default(); + let state: AppState = if startup_options.persist_state { + storage + .and_then(|storage| eframe::get_value(storage, eframe::APP_KEY)) + .unwrap_or_default() + } else { + AppState::default() + }; let mut analytics = ViewerAnalytics::new(); analytics.on_viewer_started(&build_info, app_env); @@ -339,6 +345,9 @@ impl App { Command::PlaybackStepForward => { self.run_time_control_command(TimeControlCommand::StepForward); } + Command::PlaybackRestart => { + self.run_time_control_command(TimeControlCommand::Restart); + } } } @@ -360,6 +369,9 @@ impl App { TimeControlCommand::StepForward => { time_ctrl.step_time_fwd(times_per_timeline); } + TimeControlCommand::Restart => { + time_ctrl.restart(times_per_timeline); + } } } @@ -387,6 +399,7 @@ impl App { &mut self, ui: &mut egui::Ui, gpu_resource_stats: &WgpuResourcePoolStatistics, + store_config: &DataStoreConfig, store_stats: &DataStoreStats, ) { let frame = egui::Frame { @@ -403,6 +416,7 @@ impl App { ui, &self.startup_options.memory_limit, gpu_resource_stats, + store_config, store_stats, ); }); @@ -421,7 +435,9 @@ impl eframe::App for App { } fn save(&mut self, storage: &mut dyn eframe::Storage) { - eframe::set_value(storage, eframe::APP_KEY, &self.state); + if self.startup_options.persist_state { + eframe::set_value(storage, eframe::APP_KEY, &self.state); + } } fn update(&mut self, egui_ctx: &egui::Context, frame: &mut eframe::Frame) { @@ -475,9 +491,11 @@ impl eframe::App for App { render_ctx.gpu_resources.statistics() }; + let store_config = self.log_db().entity_db.data_store.config().clone(); let store_stats = DataStoreStats::from_store(&self.log_db().entity_db.data_store); - self.memory_panel.update(&gpu_resource_stats, &store_stats); // do first, before doing too many allocations + // do first, before doing too many allocations + self.memory_panel.update(&gpu_resource_stats, &store_stats); self.check_keyboard_shortcuts(egui_ctx); @@ -506,7 +524,7 @@ impl eframe::App for App { top_panel(ui, frame, self, &gpu_resource_stats); - self.memory_panel_ui(ui, &gpu_resource_stats, &store_stats); + self.memory_panel_ui(ui, &gpu_resource_stats, &store_config, &store_stats); let log_db = self.log_dbs.entry(self.state.selected_rec_id).or_default(); let selected_app_id = log_db @@ -527,7 +545,7 @@ impl eframe::App for App { log_db, ) .selection_state - .on_frame_start(log_db, blueprint); + .on_frame_start(blueprint); { // TODO(andreas): store the re_renderer somewhere else. @@ -637,6 +655,9 @@ fn wait_screen_ui(ui: &mut egui::Ui, rx: &Receiver) { re_smart_channel::Source::RrdHttpStream { url } => { ui.strong(format!("Loading {url}…")); } + re_smart_channel::Source::RrdWebEventListener => { + ready_and_waiting(ui, "Waiting for logging data…"); + } re_smart_channel::Source::Sdk => { ready_and_waiting(ui, "Waiting for logging data from SDK"); } @@ -685,34 +706,37 @@ impl App { let start = instant::Instant::now(); while let Ok(msg) = self.rx.try_recv() { - let is_new_recording = if let LogMsg::BeginRecordingMsg(msg) = &msg { - re_log::debug!("Opening a new recording: {:?}", msg.info); - self.state.selected_rec_id = msg.info.recording_id; - true - } else { - false - }; + // All messages except [`LogMsg::GoodBye`] should have an associated recording id + if let Some(recording_id) = msg.recording_id() { + let is_new_recording = if let LogMsg::BeginRecordingMsg(msg) = &msg { + re_log::debug!("Opening a new recording: {:?}", msg.info); + self.state.selected_rec_id = msg.info.recording_id; + true + } else { + false + }; - let log_db = self.log_dbs.entry(self.state.selected_rec_id).or_default(); + let log_db = self.log_dbs.entry(*recording_id).or_default(); - if log_db.data_source.is_none() { - log_db.data_source = Some(self.rx.source().clone()); - } + if log_db.data_source.is_none() { + log_db.data_source = Some(self.rx.source().clone()); + } - if let Err(err) = log_db.add(msg) { - re_log::error!("Failed to add incoming msg: {err}"); - }; + if let Err(err) = log_db.add(&msg) { + re_log::error!("Failed to add incoming msg: {err}"); + }; - if is_new_recording { - // Do analytics after ingesting the new message, - // because thats when the `log_db.recording_info` is set, - // which we use in the analytics call. - self.analytics.on_open_recording(log_db); - } + if is_new_recording { + // Do analytics after ingesting the new message, + // because thats when the `log_db.recording_info` is set, + // which we use in the analytics call. + self.analytics.on_open_recording(log_db); + } - if start.elapsed() > instant::Duration::from_millis(10) { - egui_ctx.request_repaint(); // make sure we keep receiving messages asap - break; // don't block the main thread for too long + if start.elapsed() > instant::Duration::from_millis(10) { + egui_ctx.request_repaint(); // make sure we keep receiving messages asap + break; // don't block the main thread for too long + } } } } @@ -910,17 +934,6 @@ fn preview_files_being_dropped(egui_ctx: &egui::Context) { enum PanelSelection { #[default] Viewport, - - EventLog, -} - -impl fmt::Display for PanelSelection { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - PanelSelection::Viewport => write!(f, "Viewport"), - PanelSelection::EventLog => write!(f, "Event log"), - } - } } #[derive(Default, serde::Deserialize, serde::Serialize)] @@ -944,8 +957,6 @@ struct AppState { /// Which view panel is currently being shown panel_selection: PanelSelection, - event_log_view: crate::event_log_view::EventLogView, - selection_panel: crate::selection_panel::SelectionPanel, time_panel: crate::time_panel::TimePanel, @@ -976,7 +987,6 @@ impl AppState { selected_rec_id, recording_configs, panel_selection, - event_log_view, blueprints, selection_panel, time_panel, @@ -1024,7 +1034,6 @@ impl AppState { .entry(selected_app_id) .or_insert_with(|| Blueprint::new(ui.ctx())) .blueprint_panel_and_viewport(&mut ctx, ui), - PanelSelection::EventLog => event_log_view.ui(&mut ctx, ui), }); // move time last, so we get to see the first data first! @@ -1533,7 +1542,13 @@ fn save(app: &mut App, loop_selection: Option<(re_data_store::Timeline, TimeRang .set_title(title) .save_file() { - let f = save_database_to_file(app.log_db(), path, loop_selection); + let f = match save_database_to_file(app.log_db(), path, loop_selection) { + Ok(f) => f, + Err(err) => { + re_log::error!("File saving failed: {err}"); + return; + } + }; if let Err(err) = app.spawn_threaded_promise(FILE_SAVER_PROMISE, f) { // NOTE: Shouldn't even be possible as the "Save" button is already // grayed out at this point... better safe than sorry though. @@ -1556,16 +1571,6 @@ fn main_view_selector_ui(ui: &mut egui::Ui, app: &mut App) { { ui.close_menu(); } - if ui - .selectable_value( - &mut app.state.panel_selection, - PanelSelection::EventLog, - "Event Log", - ) - .clicked() - { - ui.close_menu(); - } }); } } @@ -1770,68 +1775,67 @@ fn save_database_to_file( log_db: &LogDb, path: std::path::PathBuf, time_selection: Option<(re_data_store::Timeline, TimeRangeF)>, -) -> impl FnOnce() -> anyhow::Result { - use re_log_types::{EntityPathOpMsg, TimeInt}; - - let msgs = match time_selection { - // Fast path: no query, just dump everything. - None => log_db - .chronological_log_messages() - .cloned() - .collect::>(), - - // Query path: time to filter! - Some((timeline, range)) => { - use std::ops::RangeInclusive; - let range: RangeInclusive = range.min.floor()..=range.max.ceil(); - log_db - .chronological_log_messages() - .filter(|msg| { - match msg { - LogMsg::BeginRecordingMsg(_) | LogMsg::Goodbye(_) => { - true // timeless - } - LogMsg::EntityPathOpMsg(EntityPathOpMsg { time_point, .. }) => { - time_point.is_timeless() || { - let is_within_range = time_point - .get(&timeline) - .map_or(false, |t| range.contains(t)); - is_within_range - } - } - LogMsg::ArrowMsg(_) => { - // TODO(john) - false - } - } - }) - .cloned() - .collect::>() - } - }; +) -> anyhow::Result anyhow::Result> { + use re_arrow_store::TimeRange; + + crate::profile_scope!("dump_messages"); + + let begin_rec_msg = log_db + .recording_msg() + .map(|msg| LogMsg::BeginRecordingMsg(msg.clone())); + + let ent_op_msgs = log_db + .iter_entity_op_msgs() + .map(|msg| LogMsg::EntityPathOpMsg(log_db.recording_id(), msg.clone())) + .collect_vec(); - move || { + let time_filter = time_selection.map(|(timeline, range)| { + ( + timeline, + TimeRange::new(range.min.floor(), range.max.ceil()), + ) + }); + let data_msgs: Result, _> = log_db + .entity_db + .data_store + .to_data_tables(time_filter) + .map(|table| { + table + .to_arrow_msg() + .map(|msg| LogMsg::ArrowMsg(log_db.recording_id(), msg)) + }) + .collect(); + + use anyhow::Context as _; + let data_msgs = data_msgs.with_context(|| "Failed to export to data tables")?; + + let msgs = std::iter::once(begin_rec_msg) + .flatten() // option + .chain(ent_op_msgs) + .chain(data_msgs); + + Ok(move || { crate::profile_scope!("save_to_file"); use anyhow::Context as _; let file = std::fs::File::create(path.as_path()) .with_context(|| format!("Failed to create file at {path:?}"))?; - re_log_types::encoding::encode(msgs.iter(), file) + re_log_encoding::encoder::encode_owned(msgs, file) .map(|_| path) .context("Message encode") - } + }) } #[allow(unused_mut)] fn load_rrd_to_log_db(mut read: impl std::io::Read) -> anyhow::Result { crate::profile_function!(); - let decoder = re_log_types::encoding::Decoder::new(read)?; + let decoder = re_log_encoding::decoder::Decoder::new(read)?; let mut log_db = LogDb::default(); for msg in decoder { - log_db.add(msg?)?; + log_db.add(&msg?)?; } Ok(log_db) } @@ -1906,9 +1910,9 @@ fn new_recording_confg( let play_state = match data_source { // Play files from the start by default - it feels nice and alive./ // RrdHttpStream downloads the whole file before decoding it, so we treat it the same as a file. - re_smart_channel::Source::File { .. } | re_smart_channel::Source::RrdHttpStream { .. } => { - PlayState::Playing - } + re_smart_channel::Source::File { .. } + | re_smart_channel::Source::RrdHttpStream { .. } + | re_smart_channel::Source::RrdWebEventListener => PlayState::Playing, // Live data - follow it! re_smart_channel::Source::Sdk diff --git a/crates/re_viewer/src/gpu_bridge/mod.rs b/crates/re_viewer/src/gpu_bridge/mod.rs new file mode 100644 index 000000000000..42c73acf03ef --- /dev/null +++ b/crates/re_viewer/src/gpu_bridge/mod.rs @@ -0,0 +1,227 @@ +//! Bridge to `re_renderer` + +mod tensor_to_gpu; +pub use tensor_to_gpu::tensor_to_gpu; + +// ---------------------------------------------------------------------------- + +use egui::mutex::Mutex; + +use re_renderer::{ + renderer::{ColormappedTexture, RectangleOptions}, + resource_managers::{GpuTexture2DHandle, Texture2DCreationDesc}, + RenderContext, ViewBuilder, +}; + +// ---------------------------------------------------------------------------- + +/// Errors that can happen when supplying a tensor range to the GPU. +#[derive(thiserror::Error, Debug, PartialEq, Eq)] +pub enum RangeError { + /// This is weird. Should only happen with JPEGs, and those should have been decoded already + #[error("Missing a range.")] + MissingRange, + + #[error("Non-finite range of values")] + NonfiniteRange, +} + +/// Get a valid, finite range for the gpu to use. +pub fn range(tensor_stats: &crate::misc::caches::TensorStats) -> Result<[f32; 2], RangeError> { + let (min, max) = tensor_stats.range.ok_or(RangeError::MissingRange)?; + + let min = min as f32; + let max = max as f32; + + if !min.is_finite() || !max.is_finite() { + Err(RangeError::NonfiniteRange) + } else if min == max { + // uniform range. This can explode the colormapping, so let's map all colors to the middle: + Ok([min - 1.0, max + 1.0]) + } else { + Ok([min, max]) + } +} + +// ---------------------------------------------------------------------------- + +pub fn viewport_resolution_in_pixels(clip_rect: egui::Rect, pixels_from_point: f32) -> [u32; 2] { + let min = (clip_rect.min.to_vec2() * pixels_from_point).round(); + let max = (clip_rect.max.to_vec2() * pixels_from_point).round(); + let resolution = max - min; + [resolution.x as u32, resolution.y as u32] +} + +pub fn try_get_or_create_texture<'a, Err>( + render_ctx: &mut RenderContext, + texture_key: u64, + try_create_texture_desc: impl FnOnce() -> Result, Err>, +) -> Result { + render_ctx.texture_manager_2d.get_or_create_with( + texture_key, + &mut render_ctx.gpu_resources.textures, + try_create_texture_desc, + ) +} + +pub fn get_or_create_texture<'a>( + render_ctx: &mut RenderContext, + texture_key: u64, + create_texture_desc: impl FnOnce() -> Texture2DCreationDesc<'a>, +) -> GpuTexture2DHandle { + enum Never {} + let result: Result = render_ctx + .texture_manager_2d + .get_or_create_with(texture_key, &mut render_ctx.gpu_resources.textures, || { + Ok(create_texture_desc()) + }); + match result { + Ok(handle) => handle, + Err(never) => match never {}, + } +} + +/// Render a `re_render` view using the given clip rectangle. +pub fn renderer_paint_callback( + render_ctx: &mut re_renderer::RenderContext, + command_buffer: wgpu::CommandBuffer, + view_builder: re_renderer::ViewBuilder, + clip_rect: egui::Rect, + pixels_from_point: f32, +) -> egui::PaintCallback { + crate::profile_function!(); + + slotmap::new_key_type! { pub struct ViewBuilderHandle; } + + type ViewBuilderMap = slotmap::SlotMap; + + // egui paint callback are copyable / not a FnOnce (this in turn is because egui primitives can be callbacks and are copyable) + let command_buffer = std::sync::Arc::new(Mutex::new(Some(command_buffer))); + + let composition_view_builder_map = render_ctx + .active_frame + .per_frame_data_helper + .entry::() + .or_insert_with(Default::default); + let view_builder_handle = composition_view_builder_map.insert(view_builder); + + let screen_position = (clip_rect.min.to_vec2() * pixels_from_point).round(); + let screen_position = glam::vec2(screen_position.x, screen_position.y); + + egui::PaintCallback { + rect: clip_rect, + callback: std::sync::Arc::new( + egui_wgpu::CallbackFn::new() + .prepare( + move |_device, _queue, _encoder, _paint_callback_resources| { + let mut command_buffer = command_buffer.lock(); + vec![std::mem::replace(&mut *command_buffer, None) + .expect("egui_wgpu prepare callback called more than once")] + }, + ) + .paint(move |_info, render_pass, paint_callback_resources| { + crate::profile_scope!("paint"); + // TODO(andreas): This should work as well but doesn't work in the 3d view. + // Looks like a bug in egui, but unclear what's going on. + //let clip_rect = info.clip_rect_in_pixels(); + + let ctx = paint_callback_resources.get::().unwrap(); + ctx.active_frame + .per_frame_data_helper + .get::() + .unwrap()[view_builder_handle] + .composite(ctx, render_pass, screen_position); + }), + ), + } +} + +/// Render the given image, respecting the clip rectangle of the given painter. +pub fn render_image( + render_ctx: &mut re_renderer::RenderContext, + painter: &egui::Painter, + image_rect_on_screen: egui::Rect, + colormapped_texture: ColormappedTexture, + texture_options: egui::TextureOptions, + debug_name: &str, +) -> anyhow::Result<()> { + crate::profile_function!(); + + use re_renderer::renderer::{TextureFilterMag, TextureFilterMin}; + + let clip_rect = painter.clip_rect().intersect(image_rect_on_screen); + if !clip_rect.is_positive() { + return Ok(()); + } + + // Where in "world space" to paint the image. + let space_rect = egui::Rect::from_min_size(egui::Pos2::ZERO, image_rect_on_screen.size()); + + let textured_rectangle = re_renderer::renderer::TexturedRect { + top_left_corner_position: glam::vec3(space_rect.min.x, space_rect.min.y, 0.0), + extent_u: glam::Vec3::X * space_rect.width(), + extent_v: glam::Vec3::Y * space_rect.height(), + colormapped_texture, + options: RectangleOptions { + texture_filter_magnification: match texture_options.magnification { + egui::TextureFilter::Nearest => TextureFilterMag::Nearest, + egui::TextureFilter::Linear => TextureFilterMag::Linear, + }, + texture_filter_minification: match texture_options.minification { + egui::TextureFilter::Nearest => TextureFilterMin::Nearest, + egui::TextureFilter::Linear => TextureFilterMin::Linear, + }, + multiplicative_tint: egui::Rgba::WHITE, + ..Default::default() + }, + }; + + // ------------------------------------------------------------------------ + + let pixels_from_points = painter.ctx().pixels_per_point(); + let ui_from_space = egui::emath::RectTransform::from_to(space_rect, image_rect_on_screen); + let space_from_ui = ui_from_space.inverse(); + let space_from_points = space_from_ui.scale().y; + let points_from_pixels = 1.0 / painter.ctx().pixels_per_point(); + let space_from_pixel = space_from_points * points_from_pixels; + + let resolution_in_pixel = + crate::gpu_bridge::viewport_resolution_in_pixels(clip_rect, pixels_from_points); + anyhow::ensure!(resolution_in_pixel[0] > 0 && resolution_in_pixel[1] > 0); + + let camera_position_space = space_from_ui.transform_pos(clip_rect.min); + + let top_left_position = glam::vec2(camera_position_space.x, camera_position_space.y); + let target_config = re_renderer::view_builder::TargetConfiguration { + name: debug_name.into(), + resolution_in_pixel, + view_from_world: macaw::IsoTransform::from_translation(-top_left_position.extend(0.0)), + projection_from_view: re_renderer::view_builder::Projection::Orthographic { + camera_mode: re_renderer::view_builder::OrthographicCameraMode::TopLeftCornerAndExtendZ, + vertical_world_size: space_from_pixel * resolution_in_pixel[1] as f32, + far_plane_distance: 1000.0, + }, + pixels_from_point: pixels_from_points, + auto_size_config: Default::default(), + outline_config: None, + }; + + let mut view_builder = ViewBuilder::new(render_ctx, target_config); + + view_builder.queue_draw(&re_renderer::renderer::RectangleDrawData::new( + render_ctx, + &[textured_rectangle], + )?); + + let command_buffer = view_builder.draw(render_ctx, re_renderer::Rgba::TRANSPARENT)?; + + painter.add(crate::gpu_bridge::renderer_paint_callback( + render_ctx, + command_buffer, + view_builder, + clip_rect, + painter.ctx().pixels_per_point(), + )); + + Ok(()) +} diff --git a/crates/re_viewer/src/gpu_bridge/tensor_to_gpu.rs b/crates/re_viewer/src/gpu_bridge/tensor_to_gpu.rs new file mode 100644 index 000000000000..990be4172fed --- /dev/null +++ b/crates/re_viewer/src/gpu_bridge/tensor_to_gpu.rs @@ -0,0 +1,484 @@ +//! Upload [`Tensor`] to [`re_renderer`]. + +use std::borrow::Cow; + +use bytemuck::{allocation::pod_collect_to_vec, cast_slice, Pod}; +use egui::util::hash; +use wgpu::TextureFormat; + +use re_log_types::component_types::{Tensor, TensorData}; +use re_renderer::{ + renderer::{ColorMapper, ColormappedTexture}, + resource_managers::Texture2DCreationDesc, + RenderContext, +}; + +use crate::{gpu_bridge::get_or_create_texture, misc::caches::TensorStats}; + +use super::try_get_or_create_texture; + +// ---------------------------------------------------------------------------- + +/// Set up tensor for rendering on the GPU. +/// +/// This will only upload the tensor if it isn't on the GPU already. +/// +/// `tensor_stats` is used for determining the range of the texture. +// TODO(emilk): allow user to specify the range in ui. +pub fn tensor_to_gpu( + render_ctx: &mut RenderContext, + debug_name: &str, + tensor: &Tensor, + tensor_stats: &TensorStats, + annotations: &crate::ui::Annotations, +) -> anyhow::Result { + crate::profile_function!(format!( + "meaning: {:?}, dtype: {}, shape: {:?}", + tensor.meaning, + tensor.dtype(), + tensor.shape() + )); + + use re_log_types::component_types::TensorDataMeaning; + + match tensor.meaning { + TensorDataMeaning::Unknown => { + color_tensor_to_gpu(render_ctx, debug_name, tensor, tensor_stats) + } + TensorDataMeaning::ClassId => { + class_id_tensor_to_gpu(render_ctx, debug_name, tensor, tensor_stats, annotations) + } + TensorDataMeaning::Depth => { + depth_tensor_to_gpu(render_ctx, debug_name, tensor, tensor_stats) + } + } +} + +// ---------------------------------------------------------------------------- +// Color textures: + +fn color_tensor_to_gpu( + render_ctx: &mut RenderContext, + debug_name: &str, + tensor: &Tensor, + tensor_stats: &TensorStats, +) -> anyhow::Result { + let texture_handle = try_get_or_create_texture(render_ctx, hash(tensor.id()), || { + let [height, width, depth] = height_width_depth(tensor)?; + let (data, format) = match (depth, &tensor.data) { + // Use R8Unorm and R8Snorm to get filtering on the GPU: + (1, TensorData::U8(buf)) => (cast_slice_to_cow(buf.as_slice()), TextureFormat::R8Unorm), + (1, TensorData::I8(buf)) => (cast_slice_to_cow(buf), TextureFormat::R8Snorm), + + // Special handling for sRGB(A) textures: + (3, TensorData::U8(buf)) => ( + pad_and_cast(buf.as_slice(), 255), + TextureFormat::Rgba8UnormSrgb, + ), + (4, TensorData::U8(buf)) => ( + // TODO(emilk): premultiply alpha + cast_slice_to_cow(buf.as_slice()), + TextureFormat::Rgba8UnormSrgb, + ), + + _ => { + // Fallback to general case: + return general_texture_creation_desc_from_tensor(debug_name, tensor); + } + }; + + Ok(Texture2DCreationDesc { + label: debug_name.into(), + data, + format, + width, + height, + }) + })?; + + let gpu_texture = render_ctx.texture_manager_2d.get(&texture_handle); + let texture_format = gpu_texture.creation_desc.format; + + // Special casing for normalized textures used above: + let range = if matches!( + texture_format, + TextureFormat::R8Unorm | TextureFormat::Rgba8UnormSrgb + ) { + [0.0, 1.0] + } else if texture_format == TextureFormat::R8Snorm { + [-1.0, 1.0] + } else { + crate::gpu_bridge::range(tensor_stats)? + }; + + let color_mapper = if texture_format.describe().components == 1 { + // Single-channel images = luminance = grayscale + Some(ColorMapper::Function(re_renderer::Colormap::Grayscale)) + } else { + None + }; + + Ok(ColormappedTexture { + texture: texture_handle, + range, + gamma: 1.0, + color_mapper, + }) +} + +// ---------------------------------------------------------------------------- +// Textures with class_id annotations: + +fn class_id_tensor_to_gpu( + render_ctx: &mut RenderContext, + debug_name: &str, + tensor: &Tensor, + tensor_stats: &TensorStats, + annotations: &crate::ui::Annotations, +) -> anyhow::Result { + let [_height, _width, depth] = height_width_depth(tensor)?; + anyhow::ensure!( + depth == 1, + "Cannot apply annotations to tensor of shape {:?}", + tensor.shape + ); + anyhow::ensure!( + tensor.dtype().is_integer(), + "Only integer tensors can be annotated" + ); + + let (min, max) = tensor_stats + .range + .ok_or_else(|| anyhow::anyhow!("compressed_tensor!?"))?; + anyhow::ensure!(0.0 <= min, "Negative class id"); + + // create a lookup texture for the colors that's 256 wide, + // and as many rows as needed to fit all the classes. + anyhow::ensure!(max <= 65535.0, "Too many class ids"); + + // We pack the colormap into a 2D texture so we don't go over the max texture size. + // We only support u8 and u16 class ids, so 256^2 is the biggest texture we need. + let colormap_width = 256; + let colormap_height = (max as usize + colormap_width - 1) / colormap_width; + + let colormap_texture_handle = + get_or_create_texture(render_ctx, hash(annotations.row_id), || { + let data: Vec = (0..(colormap_width * colormap_height)) + .flat_map(|id| { + let color = annotations + .class_description(Some(re_log_types::component_types::ClassId(id as u16))) + .annotation_info() + .color(None, crate::ui::DefaultColor::TransparentBlack); + color.to_array() // premultiplied! + }) + .collect(); + + Texture2DCreationDesc { + label: "class_id_colormap".into(), + data: data.into(), + format: TextureFormat::Rgba8UnormSrgb, + width: colormap_width as u32, + height: colormap_height as u32, + } + }); + + let main_texture_handle = try_get_or_create_texture(render_ctx, hash(tensor.id()), || { + general_texture_creation_desc_from_tensor(debug_name, tensor) + })?; + + Ok(ColormappedTexture { + texture: main_texture_handle, + range: [0.0, (colormap_width * colormap_height) as f32], + gamma: 1.0, + color_mapper: Some(ColorMapper::Texture(colormap_texture_handle)), + }) +} + +// ---------------------------------------------------------------------------- +// Depth textures: + +fn depth_tensor_to_gpu( + render_ctx: &mut RenderContext, + debug_name: &str, + tensor: &Tensor, + tensor_stats: &TensorStats, +) -> anyhow::Result { + let [_height, _width, depth] = height_width_depth(tensor)?; + anyhow::ensure!( + depth == 1, + "Depth tensor of weird shape: {:?}", + tensor.shape + ); + let (min, max) = depth_tensor_range(tensor, tensor_stats)?; + + let texture = try_get_or_create_texture(render_ctx, hash(tensor.id()), || { + general_texture_creation_desc_from_tensor(debug_name, tensor) + })?; + + Ok(ColormappedTexture { + texture, + range: [min as f32, max as f32], + gamma: 1.0, + color_mapper: Some(ColorMapper::Function(re_renderer::Colormap::Turbo)), + }) +} + +fn depth_tensor_range(tensor: &Tensor, tensor_stats: &TensorStats) -> anyhow::Result<(f64, f64)> { + let range = tensor_stats.range.ok_or(anyhow::anyhow!( + "Tensor has no range!? Was this compressed?" + ))?; + let (mut min, mut max) = range; + + anyhow::ensure!( + min.is_finite() && max.is_finite(), + "Tensor has non-finite values" + ); + + min = min.min(0.0); // Depth usually start at zero. + + if min == max { + // Uniform image. We can't remap it to a 0-1 range, so do whatever: + min = 0.0; + max = if tensor.dtype().is_float() { + 1.0 + } else { + tensor.dtype().max_value() + }; + } + + Ok((min, max)) +} + +// ---------------------------------------------------------------------------- + +/// Uploads the tensor to a texture in a format that closely resembled the input. +/// Uses no `Unorm/Snorm` formats. +fn general_texture_creation_desc_from_tensor<'a>( + debug_name: &str, + tensor: &'a Tensor, +) -> anyhow::Result> { + let [height, width, depth] = height_width_depth(tensor)?; + + let (data, format) = match depth { + 1 => { + match &tensor.data { + TensorData::U8(buf) => (cast_slice_to_cow(buf.as_slice()), TextureFormat::R8Uint), + TensorData::U16(buf) => (cast_slice_to_cow(buf), TextureFormat::R16Uint), + TensorData::U32(buf) => (cast_slice_to_cow(buf), TextureFormat::R32Uint), + TensorData::U64(buf) => (narrow_u64_to_f32s(buf), TextureFormat::R32Float), // narrowing to f32! + + TensorData::I8(buf) => (cast_slice_to_cow(buf), TextureFormat::R8Sint), + TensorData::I16(buf) => (cast_slice_to_cow(buf), TextureFormat::R16Sint), + TensorData::I32(buf) => (cast_slice_to_cow(buf), TextureFormat::R32Sint), + TensorData::I64(buf) => (narrow_i64_to_f32s(buf), TextureFormat::R32Float), // narrowing to f32! + + // TensorData::F16(buf) => (cast_slice_to_cow(buf), TextureFormat::R16Float), TODO(#854) + TensorData::F32(buf) => (cast_slice_to_cow(buf), TextureFormat::R32Float), + TensorData::F64(buf) => (narrow_f64_to_f32s(buf), TextureFormat::R32Float), // narrowing to f32! + + TensorData::JPEG(_) => { + anyhow::bail!("JPEGs should have been decoded at this point") + } + } + } + 2 => { + // NOTE: 2-channel images are not supported by the shader yet, but are included here for completeness: + match &tensor.data { + TensorData::U8(buf) => (cast_slice_to_cow(buf.as_slice()), TextureFormat::Rg8Uint), + TensorData::U16(buf) => (cast_slice_to_cow(buf), TextureFormat::Rg16Uint), + TensorData::U32(buf) => (cast_slice_to_cow(buf), TextureFormat::Rg32Uint), + TensorData::U64(buf) => (narrow_u64_to_f32s(buf), TextureFormat::Rg32Float), // narrowing to f32! + + TensorData::I8(buf) => (cast_slice_to_cow(buf), TextureFormat::Rg8Sint), + TensorData::I16(buf) => (cast_slice_to_cow(buf), TextureFormat::Rg16Sint), + TensorData::I32(buf) => (cast_slice_to_cow(buf), TextureFormat::Rg32Sint), + TensorData::I64(buf) => (narrow_i64_to_f32s(buf), TextureFormat::Rg32Float), // narrowing to f32! + + // TensorData::F16(buf) => (cast_slice_to_cow(buf), TextureFormat::Rg16Float), TODO(#854) + TensorData::F32(buf) => (cast_slice_to_cow(buf), TextureFormat::Rg32Float), + TensorData::F64(buf) => (narrow_f64_to_f32s(buf), TextureFormat::Rg32Float), // narrowing to f32! + + TensorData::JPEG(_) => { + anyhow::bail!("JPEGs should have been decoded at this point") + } + } + } + 3 => { + // There are no 3-channel textures in wgpu, so we need to pad to 4 channels. + // What should we pad with? It depends on whether or not the shader interprets these as alpha. + // To be safe, we pad with the MAX value of integers, and with 1.0 for floats. + // TODO(emilk): tell the shader to ignore the alpha channel instead! + match &tensor.data { + TensorData::U8(buf) => ( + pad_and_cast(buf.as_slice(), u8::MAX), + TextureFormat::Rgba8Uint, + ), + TensorData::U16(buf) => (pad_and_cast(buf, u16::MAX), TextureFormat::Rgba16Uint), + TensorData::U32(buf) => (pad_and_cast(buf, u32::MAX), TextureFormat::Rgba32Uint), + TensorData::U64(buf) => ( + pad_and_narrow_and_cast(buf, 1.0, |x: u64| x as f32), + TextureFormat::Rgba32Float, + ), + + TensorData::I8(buf) => (pad_and_cast(buf, i8::MAX), TextureFormat::Rgba8Sint), + TensorData::I16(buf) => (pad_and_cast(buf, i16::MAX), TextureFormat::Rgba16Sint), + TensorData::I32(buf) => (pad_and_cast(buf, i32::MAX), TextureFormat::Rgba32Sint), + TensorData::I64(buf) => ( + pad_and_narrow_and_cast(buf, 1.0, |x: i64| x as f32), + TextureFormat::Rgba32Float, + ), + + // TensorData::F16(buf) => (pad_and_cast(buf, 1.0), TextureFormat::Rgba16Float), TODO(#854) + TensorData::F32(buf) => (pad_and_cast(buf, 1.0), TextureFormat::Rgba32Float), + TensorData::F64(buf) => ( + pad_and_narrow_and_cast(buf, 1.0, |x: f64| x as f32), + TextureFormat::Rgba32Float, + ), + + TensorData::JPEG(_) => { + anyhow::bail!("JPEGs should have been decoded at this point") + } + } + } + 4 => { + // TODO(emilk): premultiply alpha, or tell the shader to assume unmultiplied alpha + match &tensor.data { + TensorData::U8(buf) => { + (cast_slice_to_cow(buf.as_slice()), TextureFormat::Rgba8Uint) + } + TensorData::U16(buf) => (cast_slice_to_cow(buf), TextureFormat::Rgba16Uint), + TensorData::U32(buf) => (cast_slice_to_cow(buf), TextureFormat::Rgba32Uint), + TensorData::U64(buf) => (narrow_u64_to_f32s(buf), TextureFormat::Rgba32Float), // narrowing to f32! + + TensorData::I8(buf) => (cast_slice_to_cow(buf), TextureFormat::Rgba8Sint), + TensorData::I16(buf) => (cast_slice_to_cow(buf), TextureFormat::Rgba16Sint), + TensorData::I32(buf) => (cast_slice_to_cow(buf), TextureFormat::Rgba32Sint), + TensorData::I64(buf) => (narrow_i64_to_f32s(buf), TextureFormat::Rgba32Float), // narrowing to f32! + + // TensorData::F16(buf) => (cast_slice_to_cow(buf), TextureFormat::Rgba16Float), TODO(#854) + TensorData::F32(buf) => (cast_slice_to_cow(buf), TextureFormat::Rgba32Float), + TensorData::F64(buf) => (narrow_f64_to_f32s(buf), TextureFormat::Rgba32Float), // narrowing to f32! + + TensorData::JPEG(_) => { + anyhow::bail!("JPEGs should have been decoded at this point") + } + } + } + depth => { + anyhow::bail!("Cannot create texture from tensor of depth {depth}"); + } + }; + + Ok(Texture2DCreationDesc { + label: debug_name.into(), + data, + format, + width, + height, + }) +} + +fn cast_slice_to_cow(slice: &[From]) -> Cow<'_, [u8]> { + cast_slice(slice).into() +} + +// wgpu doesn't support u64 textures, so we need to narrow to f32: +fn narrow_u64_to_f32s(slice: &[u64]) -> Cow<'static, [u8]> { + crate::profile_function!(); + let bytes: Vec = slice + .iter() + .flat_map(|&f| (f as f32).to_le_bytes()) + .collect(); + bytes.into() +} + +// wgpu doesn't support i64 textures, so we need to narrow to f32: +fn narrow_i64_to_f32s(slice: &[i64]) -> Cow<'static, [u8]> { + crate::profile_function!(); + let bytes: Vec = slice + .iter() + .flat_map(|&f| (f as f32).to_le_bytes()) + .collect(); + bytes.into() +} + +// wgpu doesn't support f64 textures, so we need to narrow to f32: +fn narrow_f64_to_f32s(slice: &[f64]) -> Cow<'static, [u8]> { + crate::profile_function!(); + let bytes: Vec = slice + .iter() + .flat_map(|&f| (f as f32).to_le_bytes()) + .collect(); + bytes.into() +} + +fn pad_to_four_elements(data: &[T], pad: T) -> Vec { + crate::profile_function!(); + if cfg!(debug_assertions) { + // fastest version in debug builds. + // 5x faster in debug builds, but 2x slower in release + let mut padded = vec![pad; data.len() / 3 * 4]; + for i in 0..(data.len() / 3) { + padded[4 * i] = data[3 * i]; + padded[4 * i + 1] = data[3 * i + 1]; + padded[4 * i + 2] = data[3 * i + 2]; + } + padded + } else { + // fastest version in optimized builds + data.chunks_exact(3) + .flat_map(|chunk| [chunk[0], chunk[1], chunk[2], pad]) + .collect() + } +} + +fn pad_and_cast(data: &[T], pad: T) -> Cow<'static, [u8]> { + crate::profile_function!(); + let padded: Vec = pad_to_four_elements(data, pad); + let bytes: Vec = pod_collect_to_vec(&padded); + bytes.into() +} + +fn pad_and_narrow_and_cast( + data: &[T], + pad: f32, + narrow: impl Fn(T) -> f32, +) -> Cow<'static, [u8]> { + crate::profile_function!(); + + let floats: Vec = data + .chunks_exact(3) + .flat_map(|chunk| [narrow(chunk[0]), narrow(chunk[1]), narrow(chunk[2]), pad]) + .collect(); + pod_collect_to_vec(&floats).into() +} + +// ----------------------------------------------------------------------------; + +fn height_width_depth(tensor: &Tensor) -> anyhow::Result<[u32; 3]> { + use anyhow::Context as _; + + let shape = &tensor.shape(); + + anyhow::ensure!( + shape.len() == 2 || shape.len() == 3, + "Expected a 2D or 3D tensor, got {shape:?}", + ); + + let [height, width] = [ + u32::try_from(shape[0].size).context("tensor too large")?, + u32::try_from(shape[1].size).context("tensor too large")?, + ]; + let depth = if shape.len() == 2 { 1 } else { shape[2].size }; + + anyhow::ensure!( + depth == 1 || depth == 3 || depth == 4, + "Expected depth of 1,3,4 (gray, RGB, RGBA), found {depth:?}. Tensor shape: {shape:?}" + ); + debug_assert!( + tensor.is_shaped_like_an_image(), + "We should make the same checks above, but with actual error messages" + ); + + Ok([height, width, depth as u32]) +} diff --git a/crates/re_viewer/src/lib.rs b/crates/re_viewer/src/lib.rs index 53d3398cfa35..32ba88c9a2d5 100644 --- a/crates/re_viewer/src/lib.rs +++ b/crates/re_viewer/src/lib.rs @@ -6,16 +6,16 @@ mod app; mod depthai; pub mod env_vars; +pub(crate) mod gpu_bridge; pub mod math; mod misc; mod remote_viewer_app; -pub mod stream_rrd_from_http; mod ui; mod viewer_analytics; pub(crate) use misc::{mesh_loader, Item, TimeControl, TimeView, ViewerContext}; use re_log_types::PythonVersion; -pub(crate) use ui::{event_log_view, memory_panel, selection_panel, time_panel, UiVerbosity}; +pub(crate) use ui::{memory_panel, selection_panel, time_panel, UiVerbosity}; pub use app::{App, StartupOptions}; pub use remote_viewer_app::RemoteViewerApp; diff --git a/crates/re_viewer/src/misc/caches/mod.rs b/crates/re_viewer/src/misc/caches/mod.rs index db61a56109e2..af2b8b989ebc 100644 --- a/crates/re_viewer/src/misc/caches/mod.rs +++ b/crates/re_viewer/src/misc/caches/mod.rs @@ -1,46 +1,38 @@ mod mesh_cache; mod tensor_decode_cache; -mod tensor_image_cache; -use re_log_types::component_types::{self, TensorTrait}; -pub use tensor_image_cache::ColoredTensorView; +use re_log_types::component_types::{self}; /// Does memoization of different things for the immediate mode UI. #[derive(Default)] pub struct Caches { /// For displaying images efficiently in immediate mode. - pub image: tensor_image_cache::ImageCache, pub decode: tensor_decode_cache::DecodeCache, /// For displaying meshes efficiently in immediate mode. pub mesh: mesh_cache::MeshCache, - pub tensor_stats: nohash_hasher::IntMap, + tensor_stats: nohash_hasher::IntMap, } impl Caches { /// Call once per frame to potentially flush the cache(s). pub fn begin_frame(&mut self) { - let max_image_cache_use = 1_000_000_000; - #[cfg(not(target_arch = "wasm32"))] let max_decode_cache_use = 4_000_000_000; #[cfg(target_arch = "wasm32")] let max_decode_cache_use = 1_000_000_000; - self.image.begin_frame(max_image_cache_use); self.decode.begin_frame(max_decode_cache_use); } pub fn purge_memory(&mut self) { let Self { - image, decode, tensor_stats, mesh: _, // TODO(emilk) } = self; - image.purge_memory(); decode.purge_memory(); tensor_stats.clear(); } @@ -52,6 +44,7 @@ impl Caches { } } +#[derive(Clone, Copy, Debug)] pub struct TensorStats { /// This will currently only be `None` for jpeg-encoded tensors. pub range: Option<(f64, f64)>, diff --git a/crates/re_viewer/src/misc/caches/tensor_decode_cache.rs b/crates/re_viewer/src/misc/caches/tensor_decode_cache.rs index f555ef2d8a53..e942293e5f66 100644 --- a/crates/re_viewer/src/misc/caches/tensor_decode_cache.rs +++ b/crates/re_viewer/src/misc/caches/tensor_decode_cache.rs @@ -1,4 +1,4 @@ -use re_log_types::component_types::{Tensor, TensorDimension, TensorId, TensorTrait}; +use re_log_types::component_types::{Tensor, TensorDimension, TensorId}; #[derive(thiserror::Error, Clone, Debug)] pub enum TensorDecodeError { diff --git a/crates/re_viewer/src/misc/caches/tensor_image_cache.rs b/crates/re_viewer/src/misc/caches/tensor_image_cache.rs deleted file mode 100644 index b346dc779d85..000000000000 --- a/crates/re_viewer/src/misc/caches/tensor_image_cache.rs +++ /dev/null @@ -1,530 +0,0 @@ -use std::{hash::Hash, sync::Arc}; - -use egui::{Color32, ColorImage}; -use egui_extras::RetainedImage; -use image::DynamicImage; -use re_log_types::{ - component_types::{self, ClassId, Tensor, TensorData, TensorDataMeaning, TensorTrait}, - MsgId, -}; -use re_renderer::{ - resource_managers::{GpuTexture2DHandle, Texture2DCreationDesc}, - RenderContext, -}; - -use crate::{ - misc::caches::TensorStats, - ui::{Annotations, DefaultColor, MISSING_ANNOTATIONS}, -}; - -// --- - -/// The [`ColoredTensorView`] is a wrapper on top of [`Tensor`] -/// -/// It consolidates the common operations of going from the raw tensor storage -/// into an object that can be more natively displayed as an Image. -/// -/// In the case of images that leverage a `ColorMapping` this includes conversion from -/// the native Tensor type A -> Color32. -pub struct ColoredTensorView<'store, 'cache> { - /// Key used to retrieve this cached view - key: ImageCacheKey, - - /// Borrowed tensor from the data store - pub tensor: &'store Tensor, - - /// Annotations used to create the view - pub annotations: &'store Arc, - - /// Image with annotations applied and converted to Srgb - pub colored_image: Option<&'cache ColorImage>, - - // For egui - // TODO(jleibs): This should go away. See [#506](https://github.com/rerun-io/rerun/issues/506) - pub retained_image: Option<&'cache RetainedImage>, -} - -impl<'store, 'cache> ColoredTensorView<'store, 'cache> { - /// Try to get a [`GpuTexture2DHandle`] for the cached [`Tensor`]. - /// - /// Will return None if a valid [`ColorImage`] could not be derived from the [`Tensor`]. - pub fn texture_handle(&self, render_ctx: &mut RenderContext) -> Option { - crate::profile_function!(); - self.colored_image.map(|i| { - let texture_key = self.key.hash64(); - - let debug_name = format!("tensor {:?}", self.tensor.shape()); - // TODO(andreas): The renderer should ingest images with less conversion (e.g. keep luma as 8bit texture, don't flip bits on bgra etc.) - render_ctx.texture_manager_2d.get_or_create( - texture_key, - &mut render_ctx.gpu_resources.textures, - &Texture2DCreationDesc { - label: debug_name.into(), - data: bytemuck::cast_slice(&i.pixels), - format: wgpu::TextureFormat::Rgba8UnormSrgb, - width: i.width() as u32, - height: i.height() as u32, - }, - ) - }) - } - - /// Try to get a [`DynamicImage`] for the the cached [`Tensor`]. - /// - /// Note: this is a `DynamicImage` created from the cached [`ColorImage`], not from the - /// raw [`Tensor`], as such it will always be a [`DynamicImage::ImageRgba8`]. - /// - /// Will return None if a valid [`ColorImage`] could not be derived from the [`Tensor`]. - pub fn dynamic_img(&self) -> Option { - crate::profile_function!(); - self.colored_image.and_then(|i| { - let bytes: &[u8] = bytemuck::cast_slice(&i.pixels); - image::RgbaImage::from_raw(i.width() as _, i.height() as _, bytes.into()) - .map(DynamicImage::ImageRgba8) - }) - } -} - -// Use this for the cache index so that we don't cache across -// changes to the annotations -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -struct ImageCacheKey { - tensor_id: component_types::TensorId, - annotation_msg_id: MsgId, -} - -impl ImageCacheKey { - fn hash64(&self) -> u64 { - let msg_hash = self.tensor_id.0.as_u128() as u64; - let annotation_hash = (self.annotation_msg_id.as_u128() >> 1) as u64; - msg_hash ^ annotation_hash - } -} - -impl nohash_hasher::IsEnabled for ImageCacheKey {} - -// required for [`nohash_hasher`]. -#[allow(clippy::derive_hash_xor_eq)] -impl Hash for ImageCacheKey { - #[inline] - fn hash(&self, state: &mut H) { - state.write_u64(self.hash64()); - } -} - -#[derive(Default)] -pub struct ImageCache { - images: nohash_hasher::IntMap, - memory_used: u64, - generation: u64, -} - -impl ImageCache { - pub(crate) fn get_colormapped_view<'store, 'cache>( - &'cache mut self, - tensor: &'store Tensor, - annotations: &'store Arc, - ) -> ColoredTensorView<'store, 'cache> { - let key = ImageCacheKey { - tensor_id: tensor.id(), - annotation_msg_id: annotations.msg_id, - }; - let ci = self.images.entry(key).or_insert_with(|| { - let debug_name = format!("tensor {:?}", tensor.shape()); - let ci = CachedImage::from_tensor(&debug_name, tensor, annotations); - self.memory_used += ci.memory_used; - ci - }); - ci.last_use_generation = self.generation; - - ColoredTensorView::<'store, '_> { - key, - tensor, - annotations, - colored_image: ci.colored_image.as_ref(), - retained_image: ci.retained_image.as_ref(), - } - } - - pub(crate) fn get_view<'store, 'cache>( - &'cache mut self, - tensor: &'store Tensor, - ) -> ColoredTensorView<'store, 'cache> { - self.get_colormapped_view(tensor, &MISSING_ANNOTATIONS) - } - - /// Call once per frame to (potentially) flush the cache. - pub fn begin_frame(&mut self, max_memory_use: u64) { - if self.memory_used > max_memory_use { - self.purge_memory(); - } - - self.generation += 1; - } - - /// Attempt to free up memory. - pub fn purge_memory(&mut self) { - crate::profile_function!(); - - // Very aggressively flush everything not used in this frame - - let before = self.memory_used; - - self.images.retain(|_, ci| { - let retain = ci.last_use_generation == self.generation; - if !retain { - self.memory_used -= ci.memory_used; - } - retain - }); - - re_log::debug!( - "Flushed image cache. Before: {:.2} GB. After: {:.2} GB", - before as f64 / 1e9, - self.memory_used as f64 / 1e9, - ); - } -} - -struct CachedImage { - /// For uploading to GPU - colored_image: Option, - - // For egui - // TODO(jleibs): This should go away. See [#506](https://github.com/rerun-io/rerun/issues/506) - retained_image: Option, - - /// Total memory used by this image. - memory_used: u64, - - /// When [`ImageCache::generation`] was we last used? - last_use_generation: u64, -} - -impl CachedImage { - fn from_tensor(debug_name: &str, tensor: &Tensor, annotations: &Arc) -> Self { - crate::profile_function!(); - - match apply_color_map(tensor, annotations) { - Ok(colored_image) => { - let memory_used = colored_image.pixels.len() * std::mem::size_of::(); - - let retained_image = { - crate::profile_scope!("retained_image"); - let options = egui::TextureOptions { - // This is best for low-res depth-images and the like - magnification: egui::TextureFilter::Nearest, - minification: egui::TextureFilter::Linear, - }; - RetainedImage::from_color_image(debug_name, colored_image.clone()) - .with_options(options) - }; - - Self { - colored_image: Some(colored_image), - retained_image: Some(retained_image), - memory_used: memory_used as u64, - last_use_generation: 0, - } - } - Err(err) => { - re_log::warn!("Bad image {debug_name:?}: {}", re_error::format(&err)); - - Self { - colored_image: None, - retained_image: None, - memory_used: 0, - last_use_generation: 0, - } - } - } - } -} - -fn apply_color_map(tensor: &Tensor, annotations: &Arc) -> anyhow::Result { - match tensor.meaning { - TensorDataMeaning::Unknown => color_tensor_as_color_image(tensor), - TensorDataMeaning::ClassId => class_id_tensor_as_color_image(tensor, annotations), - TensorDataMeaning::Depth => depth_tensor_as_color_image(tensor), - } -} - -fn height_width_depth(tensor: &Tensor) -> anyhow::Result<[u32; 3]> { - use anyhow::Context as _; - - let shape = &tensor.shape(); - - anyhow::ensure!( - shape.len() == 2 || shape.len() == 3, - "Expected a 2D or 3D tensor, got {shape:?}", - ); - - let [height, width] = [ - u32::try_from(shape[0].size).context("tensor too large")?, - u32::try_from(shape[1].size).context("tensor too large")?, - ]; - let depth = if shape.len() == 2 { 1 } else { shape[2].size }; - - anyhow::ensure!( - depth == 1 || depth == 3 || depth == 4, - "Expected depth of 1,3,4 (gray, RGB, RGBA), found {depth:?}. Tensor shape: {shape:?}" - ); - debug_assert!( - tensor.is_shaped_like_an_image(), - "We should make the same checks above, but with actual error messages" - ); - - Ok([height, width, depth as u32]) -} - -fn color_tensor_as_color_image(tensor: &Tensor) -> anyhow::Result { - crate::profile_function!(format!( - "dtype: {}, shape: {:?}", - tensor.dtype(), - tensor.shape() - )); - - let [height, width, depth] = height_width_depth(tensor)?; - - use egui::epaint::ecolor::{gamma_u8_from_linear_f32, linear_u8_from_linear_f32}; - - let size = [width as _, height as _]; - - match (depth, &tensor.data) { - (1, TensorData::U8(buf)) => { - // TODO(emilk): we should read some meta-data to check if this is luminance or alpha. - let pixels = buf - .0 - .iter() - .map(|pixel| Color32::from_gray(*pixel)) - .collect(); - Ok(ColorImage { size, pixels }) - } - (1, TensorData::U16(buf)) => { - // TODO(emilk): we should read some meta-data to check if this is luminance or alpha. - let pixels = buf - .iter() - .map(|pixel| Color32::from_gray((*pixel / 256) as u8)) - .collect(); - - Ok(ColorImage { size, pixels }) - } - (1, TensorData::F32(buf)) => { - let pixels = buf - .iter() - .map(|pixel| Color32::from_gray(linear_u8_from_linear_f32(*pixel))) - .collect(); - - Ok(ColorImage { size, pixels }) - } - (3, TensorData::U8(buf)) => Ok(ColorImage::from_rgb(size, buf.0.as_slice())), - (3, TensorData::U16(buf)) => { - let u8_buf: Vec = buf.iter().map(|pixel| (*pixel / 256) as u8).collect(); - - Ok(ColorImage::from_rgb(size, &u8_buf)) - } - (3, TensorData::F32(buf)) => { - let rgb: &[[f32; 3]] = bytemuck::cast_slice(buf.as_slice()); - let pixels: Vec = rgb - .iter() - .map(|&[r, g, b]| { - let r = gamma_u8_from_linear_f32(r); - let g = gamma_u8_from_linear_f32(g); - let b = gamma_u8_from_linear_f32(b); - Color32::from_rgb(r, g, b) - }) - .collect(); - - Ok(ColorImage { size, pixels }) - } - - (4, TensorData::U8(buf)) => Ok(ColorImage::from_rgba_unmultiplied(size, buf.0.as_slice())), - (4, TensorData::U16(buf)) => { - let u8_buf: Vec = buf.iter().map(|pixel| (*pixel / 256) as u8).collect(); - - Ok(ColorImage::from_rgba_unmultiplied(size, &u8_buf)) - } - (4, TensorData::F32(buf)) => { - let rgba: &[[f32; 4]] = bytemuck::cast_slice(buf.as_slice()); - let pixels: Vec = rgba - .iter() - .map(|&[r, g, b, a]| { - let r = gamma_u8_from_linear_f32(r); - let g = gamma_u8_from_linear_f32(g); - let b = gamma_u8_from_linear_f32(b); - let a = linear_u8_from_linear_f32(a); - Color32::from_rgba_unmultiplied(r, g, b, a) - }) - .collect(); - - Ok(ColorImage { size, pixels }) - } - - (_depth, dtype) => { - anyhow::bail!("Don't know how to turn a tensor of shape={:?} and dtype={dtype:?} into a color image", tensor.shape) - } - } -} - -fn class_id_tensor_as_color_image( - tensor: &Tensor, - annotations: &Annotations, -) -> anyhow::Result { - crate::profile_function!(format!( - "dtype: {}, shape: {:?}", - tensor.dtype(), - tensor.shape() - )); - - let [height, width, depth] = height_width_depth(tensor)?; - anyhow::ensure!( - depth == 1, - "Cannot apply annotations to tensor of shape {:?}", - tensor.shape - ); - let size = [width as _, height as _]; - - match &tensor.data { - TensorData::U8(buf) => { - // Apply annotation mapping to raw bytes interpreted as u8 - let color_lookup: Vec = (0..256) - .map(|id| { - annotations - .class_description(Some(ClassId(id))) - .annotation_info() - .color(None, DefaultColor::TransparentBlack) - }) - .collect(); - let pixels: Vec = buf - .0 - .iter() - .map(|p: &u8| color_lookup[*p as usize]) - .collect(); - crate::profile_scope!("from_raw"); - Ok(ColorImage { size, pixels }) - } - TensorData::U16(buf) => { - // Apply annotations mapping to bytes interpreted as u16 - let mut color_lookup: ahash::HashMap = Default::default(); - let pixels = buf - .iter() - .map(|id: &u16| { - *color_lookup.entry(*id).or_insert_with(|| { - annotations - .class_description(Some(ClassId(*id))) - .annotation_info() - .color(None, DefaultColor::TransparentBlack) - }) - }) - .collect(); - crate::profile_scope!("from_raw"); - Ok(ColorImage { size, pixels }) - } - _ => { - anyhow::bail!( - "Cannot apply annotations to tensor of dtype {}", - tensor.dtype() - ) - } - } -} - -fn depth_tensor_as_color_image(tensor: &Tensor) -> anyhow::Result { - if tensor.data.is_empty() { - return Ok(ColorImage::default()); - } - - // This function applies color mapping to a depth image. - // We are planning on moving this to the GPU: https://github.com/rerun-io/rerun/issues/1612 - // One big downside of the approach below is that if we have two depth images - // in the same range, they cannot be visually compared with each other, - // because their individual max-depths will be scaled to 65535. - - crate::profile_function!(format!( - "dtype: {}, shape: {:?}", - tensor.dtype(), - tensor.shape() - )); - - let [height, width, depth] = height_width_depth(tensor)?; - anyhow::ensure!(depth == 1, "Depth tensor of shape {:?}", tensor.shape); - let size = [width as _, height as _]; - - let range = TensorStats::new(tensor).range.ok_or(anyhow::anyhow!( - "Depth image had no range!? Was this compressed?" - ))?; - - let (mut min, mut max) = range; - - anyhow::ensure!( - min.is_finite() && max.is_finite(), - "Depth image had non-finite values" - ); - - min = min.min(0.0); // Depth usually start at zero. - - if min == max { - // Uniform image. We can't remap it to a 0-1 range, so do whatever: - min = 0.0; - max = if tensor.dtype().is_float() { - 1.0 - } else { - tensor.dtype().max_value() - }; - } - - let colormap = |value: f64| { - let t = egui::remap(value, min..=max, 0.0..=1.0) as f32; - let [r, g, b, _] = re_renderer::colormap_turbo_srgb(t); - egui::Color32::from_rgb(r, g, b) - }; - - match &tensor.data { - TensorData::U8(buf) => { - let pixels = buf.iter().map(|&value| colormap(value as _)).collect(); - Ok(ColorImage { size, pixels }) - } - TensorData::U16(buf) => { - let pixels = buf.iter().map(|&value| colormap(value as _)).collect(); - Ok(ColorImage { size, pixels }) - } - TensorData::U32(buf) => { - let pixels = buf.iter().map(|&value| colormap(value as _)).collect(); - Ok(ColorImage { size, pixels }) - } - TensorData::U64(buf) => { - let pixels = buf.iter().map(|&value| colormap(value as _)).collect(); - Ok(ColorImage { size, pixels }) - } - - TensorData::I8(buf) => { - let pixels = buf.iter().map(|&value| colormap(value as _)).collect(); - Ok(ColorImage { size, pixels }) - } - TensorData::I16(buf) => { - let pixels = buf.iter().map(|&value| colormap(value as _)).collect(); - Ok(ColorImage { size, pixels }) - } - TensorData::I32(buf) => { - let pixels = buf.iter().map(|&value| colormap(value as _)).collect(); - Ok(ColorImage { size, pixels }) - } - TensorData::I64(buf) => { - let pixels = buf.iter().map(|&value| colormap(value as _)).collect(); - Ok(ColorImage { size, pixels }) - } - - TensorData::F32(buf) => { - let pixels = buf.iter().map(|&value| colormap(value as _)).collect(); - Ok(ColorImage { size, pixels }) - } - TensorData::F64(buf) => { - let pixels = buf.iter().map(|&value| colormap(value)).collect(); - Ok(ColorImage { size, pixels }) - } - - TensorData::JPEG(_) => { - anyhow::bail!("Cannot apply colormap to JPEG image") - } - } -} diff --git a/crates/re_viewer/src/misc/item.rs b/crates/re_viewer/src/misc/item.rs index 358e293a4d09..a73b3c583f0d 100644 --- a/crates/re_viewer/src/misc/item.rs +++ b/crates/re_viewer/src/misc/item.rs @@ -1,6 +1,6 @@ use itertools::Itertools; -use re_data_store::{InstancePath, LogDb}; -use re_log_types::{ComponentPath, MsgId}; +use re_data_store::InstancePath; +use re_log_types::ComponentPath; use crate::ui::SpaceViewId; @@ -11,7 +11,6 @@ use crate::ui::SpaceViewId; /// A set of these is a an [`ItemCollection`]. #[derive(Clone, PartialEq, Eq, Hash, serde::Deserialize, serde::Serialize)] pub enum Item { - MsgId(MsgId), ComponentPath(ComponentPath), SpaceView(SpaceViewId), InstancePath(Option, InstancePath), @@ -21,7 +20,6 @@ pub enum Item { impl std::fmt::Debug for Item { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Item::MsgId(s) => s.fmt(f), Item::ComponentPath(s) => s.fmt(f), Item::SpaceView(s) => write!(f, "{s:?}"), Item::InstancePath(sid, path) => write!(f, "({sid:?}, {path})"), @@ -32,13 +30,12 @@ impl std::fmt::Debug for Item { impl Item { /// If `false`, the selection is referring to data that is no longer present. - pub(crate) fn is_valid(&self, log_db: &LogDb, blueprint: &crate::ui::Blueprint) -> bool { + pub(crate) fn is_valid(&self, blueprint: &crate::ui::Blueprint) -> bool { match self { Item::ComponentPath(_) => true, Item::InstancePath(space_view_id, _) => space_view_id .map(|space_view_id| blueprint.viewport.space_view(&space_view_id).is_some()) .unwrap_or(true), - Item::MsgId(msg_id) => log_db.get_log_msg(msg_id).is_some(), Item::SpaceView(space_view_id) => { blueprint.viewport.space_view(space_view_id).is_some() } @@ -57,7 +54,6 @@ impl Item { pub fn kind(self: &Item) -> &'static str { match self { - Item::MsgId(_) => "Message", Item::InstancePath(space_view_id, instance_path) => { match ( instance_path.instance_key.is_specific(), @@ -138,8 +134,7 @@ impl ItemCollection { } /// Remove all invalid selections. - pub fn purge_invalid(&mut self, log_db: &LogDb, blueprint: &crate::ui::Blueprint) { - self.items - .retain(|selection| selection.is_valid(log_db, blueprint)); + pub fn purge_invalid(&mut self, blueprint: &crate::ui::Blueprint) { + self.items.retain(|selection| selection.is_valid(blueprint)); } } diff --git a/crates/re_viewer/src/misc/selection_state.rs b/crates/re_viewer/src/misc/selection_state.rs index a57c5a40cae2..6adaacafd0a2 100644 --- a/crates/re_viewer/src/misc/selection_state.rs +++ b/crates/re_viewer/src/misc/selection_state.rs @@ -3,11 +3,11 @@ use egui::NumExt; use lazy_static::lazy_static; use nohash_hasher::IntMap; -use re_data_store::{EntityPath, LogDb}; +use re_data_store::{EntityPath, InstancePath, InstancePathHash}; use re_log_types::{component_types::InstanceKey, EntityPathHash}; use re_renderer::OutlineMaskPreference; -use crate::ui::{Blueprint, HistoricalSelection, SelectionHistory, SpaceView, SpaceViewId}; +use crate::ui::{Blueprint, SelectionHistory, SpaceView, SpaceViewId}; use super::{Item, ItemCollection}; @@ -29,8 +29,15 @@ pub enum HoveredSpace { /// The 3D space with the camera(s) space_3d: EntityPath, - /// 2D spaces and pixel coordinates (with Z=depth) - target_spaces: Vec<(EntityPath, Option)>, + /// The point in 3D space that is hovered, if any. + pos: Option, + + /// Path of a space camera, this 3D space is viewed through. + /// (None for a free floating Eye) + tracked_space_camera: Option, + + /// Corresponding 2D spaces and pixel coordinates (with Z=depth) + point_in_space_cameras: Vec<(InstancePathHash, Option)>, }, } @@ -187,10 +194,10 @@ pub struct SelectionState { impl SelectionState { /// Called at the start of each frame - pub fn on_frame_start(&mut self, log_db: &LogDb, blueprint: &Blueprint) { + pub fn on_frame_start(&mut self, blueprint: &Blueprint) { crate::profile_function!(); - self.history.on_frame_start(log_db, blueprint); + self.history.on_frame_start(blueprint); self.hovered_space_previous_frame = std::mem::replace(&mut self.hovered_space_this_frame, HoveredSpace::None); @@ -198,13 +205,17 @@ impl SelectionState { } /// Selects the previous element in the history if any. - pub fn select_previous(&mut self) -> Option { - self.history.select_previous() + pub fn select_previous(&mut self) { + if let Some(selection) = self.history.select_previous() { + self.selection = selection; + } } /// Selections the next element in the history if any. - pub fn select_next(&mut self) -> Option { - self.history.select_next() + pub fn select_next(&mut self) { + if let Some(selection) = self.history.select_next() { + self.selection = selection; + } } /// Clears the current selection out. @@ -287,10 +298,9 @@ impl SelectionState { .hovered_previous_frame .iter() .any(|current| match current { - Item::MsgId(_) - | Item::ComponentPath(_) - | Item::SpaceView(_) - | Item::DataBlueprintGroup(_, _) => current == test, + Item::ComponentPath(_) | Item::SpaceView(_) | Item::DataBlueprintGroup(_, _) => { + current == test + } Item::InstancePath(current_space_view_id, current_instance_path) => { if let Item::InstancePath(test_space_view_id, test_instance_path) = test { @@ -345,7 +355,7 @@ impl SelectionState { for current_selection in self.selection.iter() { match current_selection { - Item::MsgId(_) | Item::ComponentPath(_) | Item::SpaceView(_) => {} + Item::ComponentPath(_) | Item::SpaceView(_) => {} Item::DataBlueprintGroup(group_space_view_id, group_handle) => { if *group_space_view_id == space_view_id { @@ -421,7 +431,7 @@ impl SelectionState { for current_hover in self.hovered_previous_frame.iter() { match current_hover { - Item::MsgId(_) | Item::ComponentPath(_) | Item::SpaceView(_) => {} + Item::ComponentPath(_) | Item::SpaceView(_) => {} Item::DataBlueprintGroup(group_space_view_id, group_handle) => { // Unlike for selected objects/data we are more picky for data blueprints with our hover highlights diff --git a/crates/re_viewer/src/misc/time_control.rs b/crates/re_viewer/src/misc/time_control.rs index 8b8eeb7da001..e9839cefec45 100644 --- a/crates/re_viewer/src/misc/time_control.rs +++ b/crates/re_viewer/src/misc/time_control.rs @@ -293,6 +293,14 @@ impl TimeControl { } } + pub fn restart(&mut self, times_per_timeline: &TimesPerTimeline) { + if let Some(time_points) = times_per_timeline.get(&self.timeline) { + if let Some(state) = self.states.get_mut(&self.timeline) { + state.time = min(time_points).into(); + } + } + } + pub fn toggle_play_pause(&mut self, times_per_timeline: &TimesPerTimeline) { #[allow(clippy::collapsible_else_if)] if self.playing { @@ -501,11 +509,11 @@ impl TimeControl { } fn min(values: &BTreeSet) -> TimeInt { - *values.iter().next().unwrap() + *values.iter().next().unwrap_or(&TimeInt::BEGINNING) } fn max(values: &BTreeSet) -> TimeInt { - *values.iter().rev().next().unwrap() + *values.iter().rev().next().unwrap_or(&TimeInt::BEGINNING) } fn range(values: &BTreeSet) -> TimeRange { diff --git a/crates/re_viewer/src/misc/viewer_context.rs b/crates/re_viewer/src/misc/viewer_context.rs index 6974db7c1167..93584b9c254a 100644 --- a/crates/re_viewer/src/misc/viewer_context.rs +++ b/crates/re_viewer/src/misc/viewer_context.rs @@ -1,5 +1,5 @@ use re_data_store::{log_db::LogDb, InstancePath}; -use re_log_types::{ComponentPath, EntityPath, MsgId, TimeInt, Timeline}; +use re_log_types::{ComponentPath, EntityPath, TimeInt, Timeline}; use crate::ui::{ data_ui::{ComponentUiRegistry, DataUi}, @@ -38,19 +38,6 @@ pub struct ViewerContext<'a> { } impl<'a> ViewerContext<'a> { - /// Show an [`MsgId`] and make it selectable - pub fn msg_id_button(&mut self, ui: &mut egui::Ui, msg_id: MsgId) -> egui::Response { - let item = Item::MsgId(msg_id); - let response = ui - .selectable_label(self.selection().contains(&item), msg_id.short_string()) - .on_hover_ui(|ui| { - ui.label(format!("Message ID: {msg_id}")); - ui.separator(); - msg_id.data_ui(self, ui, UiVerbosity::Small, &self.current_query()); - }); - self.cursor_interact_with_selectable(response, item) - } - /// Show an entity path and make it selectable. pub fn entity_path_button( &mut self, diff --git a/crates/re_viewer/src/remote_viewer_app.rs b/crates/re_viewer/src/remote_viewer_app.rs index bcd6f37e609d..2012512f4f2f 100644 --- a/crates/re_viewer/src/remote_viewer_app.rs +++ b/crates/re_viewer/src/remote_viewer_app.rs @@ -56,7 +56,7 @@ impl RemoteViewerApp { } } Err(err) => { - re_log::error!("Failed to parse message: {}", re_error::format(&err)); + re_log::error!("Failed to parse message: {err}"); std::ops::ControlFlow::Break(()) } } diff --git a/crates/re_viewer/src/ui/annotations.rs b/crates/re_viewer/src/ui/annotations.rs index 2e5ab34c8394..39a00b7d9d26 100644 --- a/crates/re_viewer/src/ui/annotations.rs +++ b/crates/re_viewer/src/ui/annotations.rs @@ -8,7 +8,7 @@ use re_data_store::EntityPath; use re_log_types::{ component_types::{ClassId, KeypointId}, context::{AnnotationInfo, ClassDescription}, - AnnotationContext, Component, MsgId, + AnnotationContext, RowId, }; use re_query::query_entity_with_primary; @@ -16,44 +16,59 @@ use crate::{misc::ViewerContext, ui::scene::SceneQuery}; #[derive(Clone, Debug)] pub struct Annotations { - pub msg_id: MsgId, + pub row_id: RowId, pub context: AnnotationContext, } impl Annotations { + #[inline] pub fn class_description(&self, class_id: Option) -> ResolvedClassDescription<'_> { - ResolvedClassDescription( - class_id.and_then(|class_id| self.context.class_map.get(&class_id)), - ) + ResolvedClassDescription { + class_id, + class_description: class_id.and_then(|class_id| self.context.class_map.get(&class_id)), + } } } -pub struct ResolvedClassDescription<'a>(pub Option<&'a ClassDescription>); +pub struct ResolvedClassDescription<'a> { + pub class_id: Option, + pub class_description: Option<&'a ClassDescription>, +} impl<'a> ResolvedClassDescription<'a> { + #[inline] pub fn annotation_info(&self) -> ResolvedAnnotationInfo { - ResolvedAnnotationInfo(self.0.map(|desc| desc.info.clone())) + ResolvedAnnotationInfo { + class_id: self.class_id, + annotation_info: self.class_description.map(|desc| desc.info.clone()), + } } /// Merges class annotation info with keypoint annotation info (if existing respectively). pub fn annotation_info_with_keypoint(&self, keypoint_id: KeypointId) -> ResolvedAnnotationInfo { - if let Some(desc) = self.0 { + if let Some(desc) = self.class_description { // Assuming that keypoint annotation is the rarer case, merging the entire annotation ahead of time // is cheaper than doing it lazily (which would cause more branches down the line for callsites without keypoints) if let Some(keypoint_annotation_info) = desc.keypoint_map.get(&keypoint_id) { - ResolvedAnnotationInfo(Some(AnnotationInfo { - id: keypoint_id.0, - label: keypoint_annotation_info - .label - .clone() - .or_else(|| desc.info.label.clone()), - color: keypoint_annotation_info.color.or(desc.info.color), - })) + ResolvedAnnotationInfo { + class_id: self.class_id, + annotation_info: Some(AnnotationInfo { + id: keypoint_id.0, + label: keypoint_annotation_info + .label + .clone() + .or_else(|| desc.info.label.clone()), + color: keypoint_annotation_info.color.or(desc.info.color), + }), + } } else { self.annotation_info() } } else { - ResolvedAnnotationInfo(None) + ResolvedAnnotationInfo { + class_id: self.class_id, + annotation_info: None, + } } } } @@ -66,7 +81,10 @@ pub enum DefaultColor<'a> { } #[derive(Clone)] -pub struct ResolvedAnnotationInfo(pub Option); +pub struct ResolvedAnnotationInfo { + pub class_id: Option, + pub annotation_info: Option, +} impl ResolvedAnnotationInfo { pub fn color( @@ -76,17 +94,18 @@ impl ResolvedAnnotationInfo { ) -> re_renderer::Color32 { if let Some([r, g, b, a]) = color { re_renderer::Color32::from_rgba_premultiplied(*r, *g, *b, *a) - } else if let Some(color) = self.0.as_ref().and_then(|info| { + } else if let Some(color) = self.annotation_info.as_ref().and_then(|info| { info.color .map(|c| c.into()) .or_else(|| Some(auto_color(info.id))) }) { color } else { - match default_color { - DefaultColor::TransparentBlack => re_renderer::Color32::TRANSPARENT, - DefaultColor::OpaqueWhite => re_renderer::Color32::WHITE, - DefaultColor::EntityPath(entity_path) => { + match (self.class_id, default_color) { + (Some(class_id), _) if class_id.0 != 0 => auto_color(class_id.0), + (_, DefaultColor::TransparentBlack) => re_renderer::Color32::TRANSPARENT, + (_, DefaultColor::OpaqueWhite) => re_renderer::Color32::WHITE, + (_, DefaultColor::EntityPath(entity_path)) => { auto_color((entity_path.hash64() % std::u16::MAX as u64) as u16) } } @@ -97,7 +116,7 @@ impl ResolvedAnnotationInfo { if let Some(label) = label { Some(label.clone()) } else { - self.0 + self.annotation_info .as_ref() .and_then(|info| info.label.as_ref().map(|label| label.0.clone())) } @@ -145,18 +164,16 @@ impl AnnotationMap { data_store, &latest_at_query, &parent, - &[MsgId::name()], + &[], ) .ok() .and_then(|entity| { - if let (Some(context), Some(msg_id)) = ( - entity.iter_primary().ok()?.next()?, - entity.iter_component::().ok()?.next()?, - ) { - Some(entry.insert(Arc::new(Annotations { msg_id, context }))) - } else { - None - } + entity.iter_primary().ok()?.next()?.map(|context| { + entry.insert(Arc::new(Annotations { + row_id: entity.row_id(), + context, + })) + }) }) .is_some() { @@ -172,15 +189,15 @@ impl AnnotationMap { } // Search through the all prefixes of this entity path until we find a - // matching annotation. If we find nothing return the default `MISSING_ANNOTATIONS`. - pub fn find<'a>(&self, entity_path: impl Into<&'a EntityPath>) -> Arc { - let mut next_parent = Some(entity_path.into().clone()); + // matching annotation. If we find nothing return the default [`MISSING_ANNOTATIONS`]. + pub fn find(&self, entity_path: &EntityPath) -> Arc { + let mut next_parent = Some(entity_path.clone()); while let Some(parent) = next_parent { if let Some(legend) = self.0.get(&parent) { return legend.clone(); } - next_parent = parent.parent().clone(); + next_parent = parent.parent(); } // Otherwise return the missing legend @@ -190,12 +207,12 @@ impl AnnotationMap { // --- -const MISSING_MSG_ID: MsgId = MsgId::ZERO; +const MISSING_ROW_ID: RowId = RowId::ZERO; lazy_static! { pub static ref MISSING_ANNOTATIONS: Arc = { Arc::new(Annotations { - msg_id: MISSING_MSG_ID, + row_id: MISSING_ROW_ID, context: Default::default(), }) }; diff --git a/crates/re_viewer/src/ui/data_ui/annotation_context.rs b/crates/re_viewer/src/ui/data_ui/annotation_context.rs index 3100867c5671..841ebf0d19f3 100644 --- a/crates/re_viewer/src/ui/data_ui/annotation_context.rs +++ b/crates/re_viewer/src/ui/data_ui/annotation_context.rs @@ -17,7 +17,7 @@ impl DataUi for AnnotationContext { _query: &re_arrow_store::LatestAtQuery, ) { match verbosity { - UiVerbosity::Small | UiVerbosity::MaxHeight(_) => { + UiVerbosity::Small => { ui.label(format!( "AnnotationContext with {} classes", self.class_map.len() diff --git a/crates/re_viewer/src/ui/data_ui/component.rs b/crates/re_viewer/src/ui/data_ui/component.rs index b2335f696ca4..2d3f3a85131e 100644 --- a/crates/re_viewer/src/ui/data_ui/component.rs +++ b/crates/re_viewer/src/ui/data_ui/component.rs @@ -43,7 +43,7 @@ impl DataUi for EntityComponentWithInstances { let num_instances = self.num_instances(); let one_line = match verbosity { - crate::ui::UiVerbosity::Small | crate::ui::UiVerbosity::MaxHeight(_) => true, + crate::ui::UiVerbosity::Small => true, crate::UiVerbosity::Reduced | crate::ui::UiVerbosity::All => false, }; @@ -56,6 +56,7 @@ impl DataUi for EntityComponentWithInstances { ui, verbosity, query, + &self.entity_path, &self.component_data, &instance_key, ); @@ -107,6 +108,7 @@ impl DataUi for EntityComponentWithInstances { ui, crate::ui::UiVerbosity::Small, query, + &self.entity_path, &self.component_data, &instance_key, ); diff --git a/crates/re_viewer/src/ui/data_ui/component_path.rs b/crates/re_viewer/src/ui/data_ui/component_path.rs index 504844738adc..f834f6cc5fb9 100644 --- a/crates/re_viewer/src/ui/data_ui/component_path.rs +++ b/crates/re_viewer/src/ui/data_ui/component_path.rs @@ -25,7 +25,7 @@ impl DataUi for ComponentPath { // Any other failure to get a component is unexpected ui.label(ctx.re_ui.error_text(format!("Error: {err}"))); } - Ok(component_data) => { + Ok((_, component_data)) => { super::component::EntityComponentWithInstances { entity_path: self.entity_path.clone(), component_data, diff --git a/crates/re_viewer/src/ui/data_ui/component_ui_registry.rs b/crates/re_viewer/src/ui/data_ui/component_ui_registry.rs index dc5e885c6739..fda295e234d3 100644 --- a/crates/re_viewer/src/ui/data_ui/component_ui_registry.rs +++ b/crates/re_viewer/src/ui/data_ui/component_ui_registry.rs @@ -1,6 +1,7 @@ use std::collections::BTreeMap; use re_arrow_store::LatestAtQuery; +use re_data_store::EntityPath; use re_log_types::{ component_types::InstanceKey, external::arrow2, Component, ComponentName, DeserializableComponent, @@ -9,7 +10,7 @@ use re_query::ComponentWithInstances; use crate::{misc::ViewerContext, ui::UiVerbosity}; -use super::DataUi; +use super::{DataUi, EntityDataUi}; type ComponentUiCallback = Box< dyn Fn( @@ -17,6 +18,7 @@ type ComponentUiCallback = Box< &mut egui::Ui, UiVerbosity, &LatestAtQuery, + &EntityPath, &ComponentWithInstances, &InstanceKey, ), @@ -47,7 +49,6 @@ impl Default for ComponentUiRegistry { registry.add::(); registry.add::(); registry.add::(); - registry.add::(); // registry.add::(); // registry.add::(); // registry.add::(); @@ -68,33 +69,39 @@ impl Default for ComponentUiRegistry { } impl ComponentUiRegistry { - fn add(&mut self) + fn add(&mut self) where for<'a> &'a C::ArrayType: IntoIterator, { self.components.insert( C::name(), - Box::new(|ctx, ui, verbosity, query, component, instance| { - match component.lookup::(instance) { - Ok(component) => component.data_ui(ctx, ui, verbosity, query), + Box::new( + |ctx, ui, verbosity, query, entity_path, component, instance| match component + .lookup::(instance) + { + Ok(component) => { + component.entity_data_ui(ctx, ui, verbosity, entity_path, query); + } Err(re_query::QueryError::ComponentNotFound) => { ui.weak("(not found)"); } Err(err) => { re_log::warn_once!("Expected component {}, {}", C::name(), err); } - } - }), + }, + ), ); } /// Show a ui for this instance of this component. + #[allow(clippy::too_many_arguments)] pub fn ui( &self, ctx: &mut crate::misc::ViewerContext<'_>, ui: &mut egui::Ui, verbosity: crate::ui::UiVerbosity, query: &LatestAtQuery, + entity_path: &EntityPath, component: &ComponentWithInstances, instance_key: &InstanceKey, ) { @@ -107,7 +114,15 @@ impl ComponentUiRegistry { } if let Some(ui_callback) = self.components.get(&component.name()) { - (*ui_callback)(ctx, ui, verbosity, query, component, instance_key); + (*ui_callback)( + ctx, + ui, + verbosity, + query, + entity_path, + component, + instance_key, + ); } else { // No special ui implementation - use a generic one: if let Some(value) = component.lookup_arrow(instance_key) { @@ -143,7 +158,7 @@ impl DataUi for re_log_types::component_types::TextEntry { let Self { body, level } = self; match verbosity { - UiVerbosity::Small | UiVerbosity::MaxHeight(_) => { + UiVerbosity::Small => { ui.horizontal(|ui| { if let Some(level) = level { ui.label(level_to_rich_text(ui, level)); diff --git a/crates/re_viewer/src/ui/data_ui/data.rs b/crates/re_viewer/src/ui/data_ui/data.rs index 2db2899b6889..314c6dde8586 100644 --- a/crates/re_viewer/src/ui/data_ui/data.rs +++ b/crates/re_viewer/src/ui/data_ui/data.rs @@ -81,7 +81,7 @@ impl DataUi for ViewCoordinates { _query: &re_arrow_store::LatestAtQuery, ) { match verbosity { - UiVerbosity::Small | UiVerbosity::MaxHeight(_) => { + UiVerbosity::Small => { ui.label(format!("ViewCoordinates: {}", self.describe())); } UiVerbosity::All | UiVerbosity::Reduced => { @@ -101,7 +101,7 @@ impl DataUi for Rigid3 { query: &re_arrow_store::LatestAtQuery, ) { match verbosity { - UiVerbosity::Small | UiVerbosity::MaxHeight(_) => { + UiVerbosity::Small => { ui.label("Rigid 3D transform").on_hover_ui(|ui| { self.data_ui(ctx, ui, UiVerbosity::All, query); }); @@ -140,7 +140,7 @@ impl DataUi for Pinhole { query: &re_arrow_store::LatestAtQuery, ) { match verbosity { - UiVerbosity::Small | UiVerbosity::MaxHeight(_) => { + UiVerbosity::Small => { ui.label("Pinhole transform").on_hover_ui(|ui| { self.data_ui(ctx, ui, UiVerbosity::All, query); }); @@ -269,7 +269,7 @@ impl DataUi for LineStrip2D { _query: &re_arrow_store::LatestAtQuery, ) { match verbosity { - UiVerbosity::Small | UiVerbosity::Reduced | UiVerbosity::MaxHeight(_) => { + UiVerbosity::Small | UiVerbosity::Reduced => { ui.label(format!("{} positions", self.0.len())); } UiVerbosity::All => { @@ -318,7 +318,7 @@ impl DataUi for LineStrip3D { _query: &re_arrow_store::LatestAtQuery, ) { match verbosity { - UiVerbosity::Small | UiVerbosity::Reduced | UiVerbosity::MaxHeight(_) => { + UiVerbosity::Small | UiVerbosity::Reduced => { ui.label(format!("{} positions", self.0.len())); } UiVerbosity::All => { diff --git a/crates/re_viewer/src/ui/data_ui/image.rs b/crates/re_viewer/src/ui/data_ui/image.rs index bb3d5a483548..59945574dd4d 100644 --- a/crates/re_viewer/src/ui/data_ui/image.rs +++ b/crates/re_viewer/src/ui/data_ui/image.rs @@ -1,17 +1,19 @@ -use egui::{ColorImage, Vec2}; +use egui::{Color32, Vec2}; use itertools::Itertools as _; use re_log_types::{ - component_types::{ClassId, Tensor, TensorDataMeaning, TensorTrait}, + component_types::{ClassId, Tensor, TensorDataMeaning}, TensorElement, }; +use re_renderer::renderer::ColormappedTexture; +use re_ui::ReUi; -use crate::misc::{ - caches::{ColoredTensorView, TensorStats}, - ViewerContext, +use crate::{ + misc::{caches::TensorStats, ViewerContext}, + ui::annotations::AnnotationMap, }; -use super::{DataUi, UiVerbosity}; +use super::{EntityDataUi, UiVerbosity}; pub fn format_tensor_shape_single_line( shape: &[re_log_types::component_types::TensorDimension], @@ -19,107 +21,210 @@ pub fn format_tensor_shape_single_line( format!("[{}]", shape.iter().join(", ")) } -impl DataUi for Tensor { - fn data_ui( +impl EntityDataUi for Tensor { + fn entity_data_ui( &self, ctx: &mut ViewerContext<'_>, ui: &mut egui::Ui, verbosity: crate::ui::UiVerbosity, - _query: &re_arrow_store::LatestAtQuery, + entity_path: &re_log_types::EntityPath, + query: &re_arrow_store::LatestAtQuery, ) { - let decoded = ctx + crate::profile_function!(); + + match ctx .cache .decode - .try_decode_tensor_if_necessary(self.clone()); - - let tensor_view = match &decoded { - Ok(decoded) => ctx.cache.image.get_view(decoded), + .try_decode_tensor_if_necessary(self.clone()) + { + Ok(decoded) => { + tensor_ui(ctx, ui, verbosity, entity_path, query, self, &decoded); + } Err(err) => { - ui.label( - ctx.re_ui - .error_text(format!("Error Decoding Tensor: {err}")), - ); - return; + ui.label(ctx.re_ui.error_text(err.to_string())); } - }; - - let tensor_stats = ctx.cache.tensor_stats.get(&self.id()); - - match verbosity { - UiVerbosity::Small | UiVerbosity::MaxHeight(_) => { - ui.horizontal_centered(|ui| { - if let Some(retained_img) = tensor_view.retained_image { - let max_height = match verbosity { - UiVerbosity::Small => 24.0, - UiVerbosity::All | UiVerbosity::Reduced => 128.0, - UiVerbosity::MaxHeight(height) => height, - }; - retained_img - .show_max_size(ui, Vec2::new(4.0 * max_height, max_height)) - .on_hover_ui(|ui| { - retained_img.show_max_size(ui, Vec2::splat(400.0)); - }); - } + } + } +} - ui.label(format!( - "{} x {}", - self.dtype(), - format_tensor_shape_single_line(self.shape()) - )) - .on_hover_ui(|ui| tensor_summary_ui(ctx.re_ui, ui, self, tensor_stats)); - }); - } +fn tensor_ui( + ctx: &mut ViewerContext<'_>, + ui: &mut egui::Ui, + verbosity: UiVerbosity, + entity_path: &re_data_store::EntityPath, + query: &re_arrow_store::LatestAtQuery, + _encoded_tensor: &Tensor, + tensor: &Tensor, +) { + // See if we can convert the tensor to a GPU texture. + // Even if not, we will show info about the tensor. + let tensor_stats = *ctx.cache.tensor_stats(tensor); + let annotations = annotations(ctx, query, entity_path); + let debug_name = entity_path.to_string(); + let texture_result = crate::gpu_bridge::tensor_to_gpu( + ctx.render_ctx, + &debug_name, + tensor, + &tensor_stats, + &annotations, + ) + .ok(); + + match verbosity { + UiVerbosity::Small => { + ui.horizontal_centered(|ui| { + if let Some(texture) = &texture_result { + let max_height = 24.0; + let max_size = Vec2::new(4.0 * max_height, max_height); + show_image_at_max_size( + ctx.render_ctx, + ctx.re_ui, + ui, + texture.clone(), + &debug_name, + max_size, + ) + .on_hover_ui(|ui| { + // Show larger image on hover + let max_size = Vec2::splat(400.0); + show_image_at_max_size( + ctx.render_ctx, + ctx.re_ui, + ui, + texture.clone(), + &debug_name, + max_size, + ); + }); + } - UiVerbosity::All | UiVerbosity::Reduced => { - ui.vertical(|ui| { - ui.set_min_width(100.0); - tensor_summary_ui(ctx.re_ui, ui, self, tensor_stats); + ui.label(format!( + "{} x {}", + tensor.dtype(), + format_tensor_shape_single_line(tensor.shape()) + )) + .on_hover_ui(|ui| tensor_summary_ui(ctx.re_ui, ui, tensor, &tensor_stats)); + }); + } - if let Some(retained_img) = tensor_view.retained_image { - let max_size = ui - .available_size() - .min(retained_img.size_vec2()) - .min(egui::vec2(150.0, 300.0)); - let response = retained_img.show_max_size(ui, max_size); + UiVerbosity::All | UiVerbosity::Reduced => { + ui.vertical(|ui| { + ui.set_min_width(100.0); + tensor_summary_ui(ctx.re_ui, ui, tensor, &tensor_stats); + + if let Some(texture) = &texture_result { + let max_size = ui + .available_size() + .min(texture_size(texture)) + .min(egui::vec2(150.0, 300.0)); + let response = show_image_at_max_size( + ctx.render_ctx, + ctx.re_ui, + ui, + texture.clone(), + &debug_name, + max_size, + ); + if let Some(pointer_pos) = ui.ctx().pointer_latest_pos() { let image_rect = response.rect; + show_zoomed_image_region_tooltip( + ctx.render_ctx, + ui, + response, + tensor, + &tensor_stats, + &annotations, + tensor.meter, + &debug_name, + image_rect, + pointer_pos, + ); + } - if let Some(pointer_pos) = ui.ctx().pointer_latest_pos() { - show_zoomed_image_region_tooltip( - ui, - response, - &tensor_view, - image_rect, - pointer_pos, - None, - ); - } + // TODO(emilk): support copying and saving images on web + #[cfg(not(target_arch = "wasm32"))] + if _encoded_tensor.data.is_compressed_image() || tensor.could_be_dynamic_image() + { + copy_and_save_image_ui(ui, tensor, _encoded_tensor); } - #[allow(clippy::collapsible_match)] // false positive on wasm32 - if let Some(dynamic_img) = tensor_view.dynamic_img() { - // TODO(emilk): support copying and saving images on web - #[cfg(not(target_arch = "wasm32"))] - ui.horizontal(|ui| image_options(ui, self, &dynamic_img)); - - // TODO(emilk): support histograms of non-RGB images too - if let image::DynamicImage::ImageRgba8(rgba_image) = dynamic_img { - ui.collapsing("Histogram", |ui| { - histogram_ui(ui, &rgba_image); - }); + if let Some([_h, _w, channels]) = tensor.image_height_width_channels() { + if channels == 3 { + if let re_log_types::component_types::TensorData::U8(data) = + &tensor.data + { + ui.collapsing("Histogram", |ui| { + rgb8_histogram_ui(ui, data.as_slice()); + }); + } } } - }); - } + } + }); } } } +fn annotations( + ctx: &mut ViewerContext<'_>, + query: &re_arrow_store::LatestAtQuery, + entity_path: &re_data_store::EntityPath, +) -> std::sync::Arc { + let mut annotation_map = AnnotationMap::default(); + let entity_paths: nohash_hasher::IntSet<_> = std::iter::once(entity_path.clone()).collect(); + let entity_props_map = re_data_store::EntityPropertyMap::default(); + let scene_query = crate::ui::scene::SceneQuery { + entity_paths: &entity_paths, + timeline: query.timeline, + latest_at: query.at, + entity_props_map: &entity_props_map, + }; + annotation_map.load(ctx, &scene_query); + annotation_map.find(entity_path) +} + +fn texture_size(colormapped_texture: &ColormappedTexture) -> Vec2 { + let [w, h] = colormapped_texture.texture.width_height(); + egui::vec2(w as f32, h as f32) +} + +fn show_image_at_max_size( + render_ctx: &mut re_renderer::RenderContext, + re_ui: &ReUi, + ui: &mut egui::Ui, + colormapped_texture: ColormappedTexture, + debug_name: &str, + max_size: Vec2, +) -> egui::Response { + let desired_size = { + let mut desired_size = texture_size(&colormapped_texture); + desired_size *= (max_size.x / desired_size.x).min(1.0); + desired_size *= (max_size.y / desired_size.y).min(1.0); + desired_size + }; + + let (response, painter) = ui.allocate_painter(desired_size, egui::Sense::hover()); + if let Err(err) = crate::gpu_bridge::render_image( + render_ctx, + &painter, + response.rect, + colormapped_texture, + egui::TextureOptions::LINEAR, + debug_name, + ) { + let label_response = ui.label(re_ui.error_text(err.to_string())); + response.union(label_response) + } else { + response + } +} + pub fn tensor_summary_ui_grid_contents( re_ui: &re_ui::ReUi, ui: &mut egui::Ui, tensor: &Tensor, - tensor_stats: Option<&TensorStats>, + tensor_stats: &TensorStats, ) { let Tensor { tensor_id: _, @@ -191,10 +296,9 @@ pub fn tensor_summary_ui_grid_contents( } } - if let Some(TensorStats { - range: Some((min, max)), - }) = tensor_stats - { + let TensorStats { range } = tensor_stats; + + if let Some((min, max)) = range { ui.label("Data range") .on_hover_text("All values of the tensor range within these bounds."); ui.monospace(format!( @@ -210,7 +314,7 @@ pub fn tensor_summary_ui( re_ui: &re_ui::ReUi, ui: &mut egui::Ui, tensor: &Tensor, - tensor_stats: Option<&TensorStats>, + tensor_stats: &TensorStats, ) { egui::Grid::new("tensor_summary_ui") .num_columns(2) @@ -219,38 +323,47 @@ pub fn tensor_summary_ui( }); } +#[allow(clippy::too_many_arguments)] fn show_zoomed_image_region_tooltip( + render_ctx: &mut re_renderer::RenderContext, parent_ui: &mut egui::Ui, response: egui::Response, - tensor_view: &ColoredTensorView<'_, '_>, + tensor: &Tensor, + tensor_stats: &TensorStats, + annotations: &crate::ui::Annotations, + meter: Option, + debug_name: &str, image_rect: egui::Rect, pointer_pos: egui::Pos2, - meter: Option, ) -> egui::Response { response .on_hover_cursor(egui::CursorIcon::Crosshair) .on_hover_ui_at_pointer(|ui| { ui.set_max_width(320.0); ui.horizontal(|ui| { - if tensor_view.tensor.is_shaped_like_an_image() { - let h = tensor_view.tensor.shape()[0].size as _; - let w = tensor_view.tensor.shape()[1].size as _; + if let Some([h, w, _]) = tensor.image_height_width_channels() { + use egui::remap_clamp; - use egui::NumExt; - - let center = [ - (egui::remap(pointer_pos.x, image_rect.x_range(), 0.0..=w as f32) as isize) - .at_most(w), - (egui::remap(pointer_pos.y, image_rect.y_range(), 0.0..=h as f32) as isize) - .at_most(h), + let center_texel = [ + (remap_clamp(pointer_pos.x, image_rect.x_range(), 0.0..=w as f32) as isize), + (remap_clamp(pointer_pos.y, image_rect.y_range(), 0.0..=h as f32) as isize), ]; show_zoomed_image_region_area_outline( parent_ui, - tensor_view, - center, + tensor, + center_texel, image_rect, ); - show_zoomed_image_region(ui, tensor_view, center, meter); + show_zoomed_image_region( + render_ctx, + ui, + tensor, + tensor_stats, + annotations, + meter, + debug_name, + center_texel, + ); } }); }) @@ -261,224 +374,257 @@ const ZOOMED_IMAGE_TEXEL_RADIUS: isize = 10; pub fn show_zoomed_image_region_area_outline( ui: &mut egui::Ui, - tensor_view: &ColoredTensorView<'_, '_>, + tensor: &Tensor, [center_x, center_y]: [isize; 2], image_rect: egui::Rect, ) { - if tensor_view.tensor.is_shaped_like_an_image() { - use egui::{pos2, remap, Color32, Rect}; - - let h = tensor_view.tensor.shape()[0].size as _; - let w = tensor_view.tensor.shape()[1].size as _; - - // Show where on the original image the zoomed-in region is at: - let left = (center_x - ZOOMED_IMAGE_TEXEL_RADIUS) as f32; - let right = (center_x + ZOOMED_IMAGE_TEXEL_RADIUS) as f32; - let top = (center_y - ZOOMED_IMAGE_TEXEL_RADIUS) as f32; - let bottom = (center_y + ZOOMED_IMAGE_TEXEL_RADIUS) as f32; - - let left = remap(left, 0.0..=w, image_rect.x_range()); - let right = remap(right, 0.0..=w, image_rect.x_range()); - let top = remap(top, 0.0..=h, image_rect.y_range()); - let bottom = remap(bottom, 0.0..=h, image_rect.y_range()); - - let rect = Rect::from_min_max(pos2(left, top), pos2(right, bottom)); - // TODO(emilk): use `parent_ui.painter()` and put it in a high Z layer, when https://github.com/emilk/egui/issues/1516 is done - let painter = ui.ctx().debug_painter(); - painter.rect_stroke(rect, 0.0, (2.0, Color32::BLACK)); - painter.rect_stroke(rect, 0.0, (1.0, Color32::WHITE)); - } + use egui::{pos2, remap, Rect}; + + let Some([height, width, _]) = tensor.image_height_width_channels() else {return;}; + + let width = width as f32; + let height = height as f32; + + // Show where on the original image the zoomed-in region is at: + let left = (center_x - ZOOMED_IMAGE_TEXEL_RADIUS) as f32; + let right = (center_x + ZOOMED_IMAGE_TEXEL_RADIUS) as f32; + let top = (center_y - ZOOMED_IMAGE_TEXEL_RADIUS) as f32; + let bottom = (center_y + ZOOMED_IMAGE_TEXEL_RADIUS) as f32; + + let left = remap(left, 0.0..=width, image_rect.x_range()); + let right = remap(right, 0.0..=width, image_rect.x_range()); + let top = remap(top, 0.0..=height, image_rect.y_range()); + let bottom = remap(bottom, 0.0..=height, image_rect.y_range()); + + let rect = Rect::from_min_max(pos2(left, top), pos2(right, bottom)); + // TODO(emilk): use `parent_ui.painter()` and put it in a high Z layer, when https://github.com/emilk/egui/issues/1516 is done + let painter = ui.ctx().debug_painter(); + painter.rect_stroke(rect, 0.0, (2.0, Color32::BLACK)); + painter.rect_stroke(rect, 0.0, (1.0, Color32::WHITE)); } /// `meter`: iff this is a depth map, how long is one meter? +#[allow(clippy::too_many_arguments)] pub fn show_zoomed_image_region( - tooltip_ui: &mut egui::Ui, - tensor_view: &ColoredTensorView<'_, '_>, - image_position: [isize; 2], + render_ctx: &mut re_renderer::RenderContext, + ui: &mut egui::Ui, + tensor: &Tensor, + tensor_stats: &TensorStats, + annotations: &crate::ui::Annotations, meter: Option, + debug_name: &str, + center_texel: [isize; 2], ) { - if let Some(colored_image) = tensor_view.colored_image { - use egui::{color_picker, pos2, remap, Color32, Mesh, Rect}; - - const POINTS_PER_TEXEL: f32 = 5.0; - let size = Vec2::splat((ZOOMED_IMAGE_TEXEL_RADIUS * 2 + 1) as f32 * POINTS_PER_TEXEL); - - let (_id, zoom_rect) = tooltip_ui.allocate_space(size); - let painter = tooltip_ui.painter(); - - painter.rect_filled(zoom_rect, 0.0, tooltip_ui.visuals().extreme_bg_color); - - let mut mesh = Mesh::default(); - let mut center_texel_rect = None; - for dx in -ZOOMED_IMAGE_TEXEL_RADIUS..=ZOOMED_IMAGE_TEXEL_RADIUS { - for dy in -ZOOMED_IMAGE_TEXEL_RADIUS..=ZOOMED_IMAGE_TEXEL_RADIUS { - let x = image_position[0] + dx; - let y = image_position[1] + dy; - let color = get_pixel(colored_image, [x, y]); - if let Some(color) = color { - if color != Color32::TRANSPARENT { - let tr = ZOOMED_IMAGE_TEXEL_RADIUS as f32; - let left = remap(dx as f32, -tr..=(tr + 1.0), zoom_rect.x_range()); - let right = remap((dx + 1) as f32, -tr..=(tr + 1.0), zoom_rect.x_range()); - let top = remap(dy as f32, -tr..=(tr + 1.0), zoom_rect.y_range()); - let bottom = remap((dy + 1) as f32, -tr..=(tr + 1.0), zoom_rect.y_range()); - let rect = Rect { - min: pos2(left, top), - max: pos2(right, bottom), - }; - mesh.add_colored_rect(rect, color); - - if dx == 0 && dy == 0 { - center_texel_rect = Some(rect); - } - } - } - } - } + if let Err(err) = try_show_zoomed_image_region( + render_ctx, + ui, + tensor, + tensor_stats, + annotations, + meter, + debug_name, + center_texel, + ) { + ui.label(format!("Error: {err}")); + } +} - painter.add(mesh); +/// `meter`: iff this is a depth map, how long is one meter? +#[allow(clippy::too_many_arguments)] +fn try_show_zoomed_image_region( + render_ctx: &mut re_renderer::RenderContext, + ui: &mut egui::Ui, + tensor: &Tensor, + tensor_stats: &TensorStats, + annotations: &crate::ui::Annotations, + meter: Option, + debug_name: &str, + center_texel: [isize; 2], +) -> anyhow::Result<()> { + let texture = crate::gpu_bridge::tensor_to_gpu( + render_ctx, + debug_name, + tensor, + tensor_stats, + annotations, + )?; - if let Some(center_texel_rect) = center_texel_rect { - painter.rect_stroke(center_texel_rect, 0.0, (2.0, Color32::BLACK)); - painter.rect_stroke(center_texel_rect, 0.0, (1.0, Color32::WHITE)); - } + let Some([height, width, _]) = tensor.image_height_width_channels() else { return Ok(()); }; + + const POINTS_PER_TEXEL: f32 = 5.0; + let size = Vec2::splat((ZOOMED_IMAGE_TEXEL_RADIUS * 2 + 1) as f32 * POINTS_PER_TEXEL); + + let (_id, zoom_rect) = ui.allocate_space(size); + let painter = ui.painter(); + + painter.rect_filled(zoom_rect, 0.0, ui.visuals().extreme_bg_color); + + { + let image_rect_on_screen = egui::Rect::from_min_size( + zoom_rect.center() + - POINTS_PER_TEXEL + * egui::vec2(center_texel[0] as f32 + 0.5, center_texel[1] as f32 + 0.5), + POINTS_PER_TEXEL * egui::vec2(width as f32, height as f32), + ); + + crate::gpu_bridge::render_image( + render_ctx, + &painter.with_clip_rect(zoom_rect), + image_rect_on_screen, + texture.clone(), + egui::TextureOptions::NEAREST, + debug_name, + )?; + } - if let Some(color) = get_pixel(colored_image, image_position) { - tooltip_ui.separator(); - let (x, y) = (image_position[0] as _, image_position[1] as _); + // Show the center text, to indicate which texel we're printing the values of: + { + let center_texel_rect = + egui::Rect::from_center_size(zoom_rect.center(), Vec2::splat(POINTS_PER_TEXEL)); + painter.rect_stroke(center_texel_rect.expand(1.0), 0.0, (1.0, Color32::BLACK)); + painter.rect_stroke(center_texel_rect, 0.0, (1.0, Color32::WHITE)); + } - tooltip_ui.vertical(|ui| { - egui::Grid::new("hovered pixel properties").show(ui, |ui| { - ui.label("Position:"); - ui.label(format!("{}, {}", image_position[0], image_position[1])); + let [x, y] = center_texel; + if 0 <= x && (x as u64) < width && 0 <= y && (y as u64) < height { + ui.separator(); + + ui.vertical(|ui| { + tensor_pixel_value_ui(ui, tensor, annotations, [x as _, y as _], meter); + + // Show a big sample of the color of the middle texel: + let (rect, _) = + ui.allocate_exact_size(Vec2::splat(ui.available_height()), egui::Sense::hover()); + // Position texture so that the center texel is at the center of the rect: + let zoom = rect.width(); + let image_rect_on_screen = egui::Rect::from_min_size( + rect.center() + - zoom * egui::vec2(center_texel[0] as f32 + 0.5, center_texel[1] as f32 + 0.5), + zoom * egui::vec2(width as f32, height as f32), + ); + crate::gpu_bridge::render_image( + render_ctx, + &ui.painter().with_clip_rect(rect), + image_rect_on_screen, + texture, + egui::TextureOptions::NEAREST, + debug_name, + ) + }) + .inner?; + } + Ok(()) +} + +fn tensor_pixel_value_ui( + ui: &mut egui::Ui, + tensor: &Tensor, + annotations: &crate::ui::Annotations, + [x, y]: [u64; 2], + meter: Option, +) { + egui::Grid::new("hovered pixel properties").show(ui, |ui| { + ui.label("Position:"); + ui.label(format!("{x}, {y}")); + ui.end_row(); + + if tensor.num_dim() == 2 { + if let Some(raw_value) = tensor.get(&[y, x]) { + if let (TensorDataMeaning::ClassId, Some(u16_val)) = + (tensor.meaning(), raw_value.try_as_u16()) + { + ui.label("Label:"); + ui.label( + annotations + .class_description(Some(ClassId(u16_val))) + .annotation_info() + .label(None) + .unwrap_or_else(|| u16_val.to_string()), + ); ui.end_row(); + }; + } + } + if let Some(meter) = meter { + // This is a depth map + if let Some(raw_value) = tensor.get(&[y, x]) { + let raw_value = raw_value.as_f64(); + let meters = raw_value / meter as f64; + ui.label("Depth:"); + if meters < 1.0 { + ui.monospace(format!("{:.1} mm", meters * 1e3)); + } else { + ui.monospace(format!("{meters:.3} m")); + } + } + } + }); - if tensor_view.tensor.num_dim() == 2 { - if let Some(raw_value) = tensor_view.tensor.get(&[y, x]) { - if let (TensorDataMeaning::ClassId, annotations, Some(u16_val)) = ( - tensor_view.tensor.meaning(), - tensor_view.annotations, - raw_value.try_as_u16(), - ) { - ui.label("Label:"); - ui.label( - annotations - .class_description(Some(ClassId(u16_val))) - .annotation_info() - .label(None) - .unwrap_or_default(), - ); - ui.end_row(); - }; + let text = match tensor.num_dim() { + 2 => tensor.get(&[y, x]).map(|v| format!("Val: {v}")), + 3 => match tensor.shape()[2].size { + 0 => Some("Cannot preview 0-size channel".to_owned()), + 1 => tensor.get(&[y, x, 0]).map(|v| format!("Val: {v}")), + 3 => { + // TODO(jleibs): Track RGB ordering somehow -- don't just assume it + if let (Some(r), Some(g), Some(b)) = ( + tensor.get(&[y, x, 0]), + tensor.get(&[y, x, 1]), + tensor.get(&[y, x, 2]), + ) { + match (r, g, b) { + (TensorElement::U8(r), TensorElement::U8(g), TensorElement::U8(b)) => { + Some(format!("R: {r}, G: {g}, B: {b}, #{r:02X}{g:02X}{b:02X}")) } + _ => Some(format!("R: {r}, G: {g}, B: {b}")), } - if let Some(meter) = meter { - // This is a depth map - if let Some(raw_value) = tensor_view.tensor.get(&[y, x]) { - let raw_value = raw_value.as_f64(); - let meters = raw_value / meter as f64; - ui.label("Depth:"); - if meters < 1.0 { - ui.monospace(format!("{:.1} mm", meters * 1e3)); - } else { - ui.monospace(format!("{meters:.3} m")); - } - } - } - }); - - let tensor = tensor_view.tensor; - - let text = match tensor.num_dim() { - 2 => tensor.get(&[y, x]).map(|v| format!("Val: {v}")), - 3 => match tensor.shape()[2].size { - 0 => Some("Cannot preview 0-size channel".to_owned()), - 1 => tensor.get(&[y, x, 0]).map(|v| format!("Val: {v}")), - 3 => { - // TODO(jleibs): Track RGB ordering somehow -- don't just assume it - if let (Some(r), Some(g), Some(b)) = ( - tensor_view.tensor.get(&[y, x, 0]), - tensor_view.tensor.get(&[y, x, 1]), - tensor_view.tensor.get(&[y, x, 2]), - ) { - match (r, g, b) { - ( - TensorElement::U8(r), - TensorElement::U8(g), - TensorElement::U8(b), - ) => { - Some(format!("R: {r}, G: {g}, B: {b}, #{r:02X}{g:02X}{b:02X}")) - } - _ => { - Some(format!("R: {r}, G: {g}, B: {b}")) - } - } - } else { - None - } - }, - 4 => { - // TODO(jleibs): Track RGB ordering somehow -- don't just assume it - if let (Some(r), Some(g), Some(b), Some(a)) = ( - tensor_view.tensor.get(&[y, x, 0]), - tensor_view.tensor.get(&[y, x, 1]), - tensor_view.tensor.get(&[y, x, 2]), - tensor_view.tensor.get(&[y, x, 3]), - ) { - match (r, g, b, a) { - ( - TensorElement::U8(r), - TensorElement::U8(g), - TensorElement::U8(b), - TensorElement::U8(a), - ) => { - Some(format!("R: {r}, G: {g}, B: {b}, A: {a}, #{r:02X}{g:02X}{b:02X}{a:02X}")) - } - _ => { - Some(format!("R: {r}, G: {g}, B: {b}, A: {a}")) - } - } - } else { - None - } - }, - channels => { - Some(format!("Cannot preview {channels}-channel image")) - } - }, - dims => { - Some(format!("Cannot preview {dims}-dimensional image")) + } else { + None + } + } + 4 => { + // TODO(jleibs): Track RGB ordering somehow -- don't just assume it + if let (Some(r), Some(g), Some(b), Some(a)) = ( + tensor.get(&[y, x, 0]), + tensor.get(&[y, x, 1]), + tensor.get(&[y, x, 2]), + tensor.get(&[y, x, 3]), + ) { + match (r, g, b, a) { + ( + TensorElement::U8(r), + TensorElement::U8(g), + TensorElement::U8(b), + TensorElement::U8(a), + ) => Some(format!( + "R: {r}, G: {g}, B: {b}, A: {a}, #{r:02X}{g:02X}{b:02X}{a:02X}" + )), + _ => Some(format!("R: {r}, G: {g}, B: {b}, A: {a}")), } - }; - - if let Some(text) = text { - ui.label(text); } else { - ui.label("No Value"); + None } + } + channels => Some(format!("Cannot preview {channels}-channel image")), + }, + dims => Some(format!("Cannot preview {dims}-dimensional image")), + }; - color_picker::show_color(ui, color, Vec2::splat(ui.available_height())); - }); - } - } -} - -fn get_pixel(image: &ColorImage, [x, y]: [isize; 2]) -> Option { - if x < 0 || y < 0 || image.width() as isize <= x || image.height() as isize <= y { - None + if let Some(text) = text { + ui.label(text); } else { - Some(image[(x as _, y as _)]) + ui.label("No Value"); } } -fn histogram_ui(ui: &mut egui::Ui, rgba_image: &image::RgbaImage) -> egui::Response { +fn rgb8_histogram_ui(ui: &mut egui::Ui, rgb: &[u8]) -> egui::Response { crate::profile_function!(); let mut histograms = [[0_u64; 256]; 3]; { // TODO(emilk): this is slow, so cache the results! crate::profile_scope!("build"); - for pixel in rgba_image.pixels() { + for pixel in rgb.chunks_exact(3) { for c in 0..3 { histograms[c][pixel[c] as usize] += 1; } @@ -486,7 +632,6 @@ fn histogram_ui(ui: &mut egui::Ui, rgba_image: &image::RgbaImage) -> egui::Respo } use egui::plot::{Bar, BarChart, Legend, Plot}; - use egui::Color32; let names = ["R", "G", "B"]; let colors = [Color32::RED, Color32::GREEN, Color32::BLUE]; @@ -529,61 +674,73 @@ fn histogram_ui(ui: &mut egui::Ui, rgba_image: &image::RgbaImage) -> egui::Respo } #[cfg(not(target_arch = "wasm32"))] -fn image_options( - ui: &mut egui::Ui, - tensor: &re_log_types::component_types::Tensor, - dynamic_image: &image::DynamicImage, -) { - // TODO(emilk): support copying images on web +fn copy_and_save_image_ui(ui: &mut egui::Ui, tensor: &Tensor, _encoded_tensor: &Tensor) { + ui.horizontal(|ui| { + if tensor.could_be_dynamic_image() && ui.button("Click to copy image").clicked() { + match tensor.to_dynamic_image() { + Ok(dynamic_image) => { + let rgba = dynamic_image.to_rgba8(); + crate::misc::Clipboard::with(|clipboard| { + clipboard.set_image( + [rgba.width() as _, rgba.height() as _], + bytemuck::cast_slice(rgba.as_raw()), + ); + }); + } + Err(err) => { + re_log::error!("Failed to convert tensor to image: {err}"); + } + } + } - use re_log_types::component_types::TensorData; + if ui.button("Save image…").clicked() { + match tensor.to_dynamic_image() { + Ok(dynamic_image) => { + save_image(_encoded_tensor, &dynamic_image); + } + Err(err) => { + re_log::error!("Failed to convert tensor to image: {err}"); + } + } + } + }); +} - #[cfg(not(target_arch = "wasm32"))] - if ui.button("Click to copy image").clicked() { - let rgba = dynamic_image.to_rgba8(); - crate::misc::Clipboard::with(|clipboard| { - clipboard.set_image( - [rgba.width() as _, rgba.height() as _], - bytemuck::cast_slice(rgba.as_raw()), - ); - }); - } +#[cfg(not(target_arch = "wasm32"))] +fn save_image(tensor: &re_log_types::component_types::Tensor, dynamic_image: &image::DynamicImage) { + use re_log_types::component_types::TensorData; - // TODO(emilk): support saving images on web - #[cfg(not(target_arch = "wasm32"))] - if ui.button("Save image…").clicked() { - match &tensor.data { - TensorData::JPEG(bytes) => { - if let Some(path) = rfd::FileDialog::new() - .set_file_name("image.jpg") - .save_file() - { - match write_binary(&path, bytes.as_slice()) { - Ok(()) => { - re_log::info!("Image saved to {path:?}"); - } - Err(err) => { - re_log::error!( - "Failed saving image to {path:?}: {}", - re_error::format(&err) - ); - } + match &tensor.data { + TensorData::JPEG(bytes) => { + if let Some(path) = rfd::FileDialog::new() + .set_file_name("image.jpg") + .save_file() + { + match write_binary(&path, bytes.as_slice()) { + Ok(()) => { + re_log::info!("Image saved to {path:?}"); + } + Err(err) => { + re_log::error!( + "Failed saving image to {path:?}: {}", + re_error::format(&err) + ); } } } - _ => { - if let Some(path) = rfd::FileDialog::new() - .set_file_name("image.png") - .save_file() - { - match dynamic_image.save(&path) { - // TODO(emilk): show a popup instead of logging result - Ok(()) => { - re_log::info!("Image saved to {path:?}"); - } - Err(err) => { - re_log::error!("Failed saving image to {path:?}: {err}"); - } + } + _ => { + if let Some(path) = rfd::FileDialog::new() + .set_file_name("image.png") + .save_file() + { + match dynamic_image.save(&path) { + // TODO(emilk): show a popup instead of logging result + Ok(()) => { + re_log::info!("Image saved to {path:?}"); + } + Err(err) => { + re_log::error!("Failed saving image to {path:?}: {err}"); } } } diff --git a/crates/re_viewer/src/ui/data_ui/instance_path.rs b/crates/re_viewer/src/ui/data_ui/instance_path.rs index 6bd727eb4092..866b7c58c2d7 100644 --- a/crates/re_viewer/src/ui/data_ui/instance_path.rs +++ b/crates/re_viewer/src/ui/data_ui/instance_path.rs @@ -7,7 +7,7 @@ use crate::{misc::ViewerContext, ui::UiVerbosity}; use super::DataUi; const HIDDEN_COMPONENTS_FOR_ALL_VERBOSITY: &[&str] = &["rerun.instance_key"]; -const HIDDEN_COMPONENTS_FOR_LOW_VERBOSITY: &[&str] = &["rerun.msg_id"]; +const HIDDEN_COMPONENTS_FOR_LOW_VERBOSITY: &[&str] = &[]; impl DataUi for InstancePath { fn data_ui( @@ -45,7 +45,7 @@ impl DataUi for InstancePath { continue; } match verbosity { - UiVerbosity::Small | UiVerbosity::MaxHeight(_) | UiVerbosity::Reduced => { + UiVerbosity::Small | UiVerbosity::Reduced => { if HIDDEN_COMPONENTS_FOR_LOW_VERBOSITY .contains(&component_name.as_str()) { @@ -65,7 +65,7 @@ impl DataUi for InstancePath { Err(err) => { ui.label(ctx.re_ui.error_text(format!("Error: {err}"))); } - Ok(component_data) => { + Ok((_, component_data)) => { if self.instance_key.is_splat() { super::component::EntityComponentWithInstances { entity_path: self.entity_path.clone(), @@ -83,6 +83,7 @@ impl DataUi for InstancePath { ui, UiVerbosity::Small, query, + &self.entity_path, &component_data, &self.instance_key, ); diff --git a/crates/re_viewer/src/ui/data_ui/log_msg.rs b/crates/re_viewer/src/ui/data_ui/log_msg.rs index b536284f6eeb..c4c982c2d066 100644 --- a/crates/re_viewer/src/ui/data_ui/log_msg.rs +++ b/crates/re_viewer/src/ui/data_ui/log_msg.rs @@ -16,8 +16,8 @@ impl DataUi for LogMsg { ) { match self { LogMsg::BeginRecordingMsg(msg) => msg.data_ui(ctx, ui, verbosity, query), - LogMsg::EntityPathOpMsg(msg) => msg.data_ui(ctx, ui, verbosity, query), - LogMsg::ArrowMsg(msg) => msg.data_ui(ctx, ui, verbosity, query), + LogMsg::EntityPathOpMsg(_, msg) => msg.data_ui(ctx, ui, verbosity, query), + LogMsg::ArrowMsg(_, msg) => msg.data_ui(ctx, ui, verbosity, query), LogMsg::Goodbye(_) => { ui.label("Goodbye"); } @@ -34,7 +34,7 @@ impl DataUi for BeginRecordingMsg { _query: &re_arrow_store::LatestAtQuery, ) { ui.code("BeginRecordingMsg"); - let BeginRecordingMsg { msg_id: _, info } = self; + let BeginRecordingMsg { row_id: _, info } = self; let RecordingInfo { application_id, recording_id, @@ -76,7 +76,7 @@ impl DataUi for EntityPathOpMsg { query: &re_arrow_store::LatestAtQuery, ) { let EntityPathOpMsg { - msg_id: _, + row_id: _, time_point, path_op, } = self; @@ -101,7 +101,7 @@ impl DataUi for ArrowMsg { verbosity: UiVerbosity, query: &re_arrow_store::LatestAtQuery, ) { - let table: DataTable = match self.try_into() { + let table = match DataTable::from_arrow_msg(self) { Ok(table) => table, Err(err) => { ui.label( @@ -113,7 +113,7 @@ impl DataUi for ArrowMsg { }; // TODO(cmc): Come up with something a bit nicer once data tables become a common sight. - for row in table.as_rows() { + for row in table.to_rows() { egui::Grid::new("fields").num_columns(2).show(ui, |ui| { ui.monospace("entity_path:"); ctx.entity_path_button(ui, None, row.entity_path()); diff --git a/crates/re_viewer/src/ui/data_ui/mod.rs b/crates/re_viewer/src/ui/data_ui/mod.rs index cb94694d0a40..753d17302d54 100644 --- a/crates/re_viewer/src/ui/data_ui/mod.rs +++ b/crates/re_viewer/src/ui/data_ui/mod.rs @@ -1,6 +1,7 @@ //! The `DataUi` trait and implementations provide methods for representing data using [`egui`]. use itertools::Itertools; +use re_data_store::EntityPath; use re_log_types::{DataCell, PathOp, TimePoint}; use crate::misc::ViewerContext; @@ -14,7 +15,6 @@ mod entity_path; pub(crate) mod image; mod instance_path; mod log_msg; -mod msg_id; pub(crate) use component_ui_registry::ComponentUiRegistry; @@ -24,9 +24,6 @@ pub enum UiVerbosity { /// Keep it small enough to fit on one row. Small, - /// At most this height - MaxHeight(f32), - /// Display a reduced set, used for hovering. Reduced, @@ -34,7 +31,7 @@ pub enum UiVerbosity { All, } -/// Types implementing [`DataUi`] can draw themselves with a [`ViewerContext`] and [`egui::Ui`]. +/// Types implementing [`DataUi`] can display themselves in an [`egui::Ui`]. pub(crate) trait DataUi { /// If you need to lookup something in the data store, use the given query to do so. fn data_ui( @@ -46,6 +43,37 @@ pub(crate) trait DataUi { ); } +/// Similar to [`DataUi`], but for data that is related to an entity (e.g. a component). +/// +/// This is given the context of the entity it is part of so it can do queries. +pub(crate) trait EntityDataUi { + /// If you need to lookup something in the data store, use the given query to do so. + fn entity_data_ui( + &self, + ctx: &mut ViewerContext<'_>, + ui: &mut egui::Ui, + verbosity: UiVerbosity, + entity_path: &EntityPath, + query: &re_arrow_store::LatestAtQuery, + ); +} + +impl EntityDataUi for T +where + T: DataUi, +{ + fn entity_data_ui( + &self, + ctx: &mut ViewerContext<'_>, + ui: &mut egui::Ui, + verbosity: UiVerbosity, + _entity: &EntityPath, + query: &re_arrow_store::LatestAtQuery, + ) { + self.data_ui(ctx, ui, verbosity, query); + } +} + // ---------------------------------------------------------------------------- impl DataUi for TimePoint { @@ -81,7 +109,7 @@ impl DataUi for [DataCell] { sorted.sort_by_key(|cb| cb.component_name()); match verbosity { - UiVerbosity::Small | UiVerbosity::MaxHeight(_) => { + UiVerbosity::Small => { ui.label(sorted.iter().map(format_cell).join(", ")); } diff --git a/crates/re_viewer/src/ui/data_ui/msg_id.rs b/crates/re_viewer/src/ui/data_ui/msg_id.rs deleted file mode 100644 index b954bdfcb198..000000000000 --- a/crates/re_viewer/src/ui/data_ui/msg_id.rs +++ /dev/null @@ -1,28 +0,0 @@ -use re_log_types::MsgId; - -use crate::misc::ViewerContext; - -use super::{DataUi, UiVerbosity}; - -impl DataUi for MsgId { - fn data_ui( - &self, - ctx: &mut ViewerContext<'_>, - ui: &mut egui::Ui, - verbosity: UiVerbosity, - query: &re_arrow_store::LatestAtQuery, - ) { - match verbosity { - UiVerbosity::Small | UiVerbosity::MaxHeight(_) => { - ctx.msg_id_button(ui, *self); - } - UiVerbosity::All | UiVerbosity::Reduced => { - if let Some(msg) = ctx.log_db.get_log_msg(self) { - msg.data_ui(ctx, ui, verbosity, query); - } else { - ctx.msg_id_button(ui, *self); - } - } - } - } -} diff --git a/crates/re_viewer/src/ui/event_log_view.rs b/crates/re_viewer/src/ui/event_log_view.rs deleted file mode 100644 index 94c96edddfe8..000000000000 --- a/crates/re_viewer/src/ui/event_log_view.rs +++ /dev/null @@ -1,234 +0,0 @@ -use itertools::Itertools as _; - -use re_arrow_store::{LatestAtQuery, TimeInt}; -use re_format::format_number; -use re_log_types::{BeginRecordingMsg, DataTable, EntityPathOpMsg, LogMsg, RecordingInfo}; - -use crate::{UiVerbosity, ViewerContext}; - -use super::data_ui::DataUi; - -/// An event log, a table of all log messages. -#[derive(Default, serde::Deserialize, serde::Serialize)] -#[serde(default)] -pub(crate) struct EventLogView {} - -impl EventLogView { - #[allow(clippy::unused_self)] - pub fn ui(&mut self, ctx: &mut ViewerContext<'_>, ui: &mut egui::Ui) { - crate::profile_function!(); - - let messages = { - crate::profile_scope!("Collecting messages"); - ctx.log_db.chronological_log_messages().collect_vec() - }; - - egui::Frame { - inner_margin: re_ui::ReUi::view_padding().into(), - ..egui::Frame::default() - } - .show(ui, |ui| { - ui.label(format!("{} log lines", format_number(ctx.log_db.len()))); - ui.separator(); - - egui::ScrollArea::horizontal() - .auto_shrink([false; 2]) - .show(ui, |ui| { - message_table(ctx, ui, &messages); - }); - }); - } -} - -pub(crate) fn message_table(ctx: &mut ViewerContext<'_>, ui: &mut egui::Ui, messages: &[&LogMsg]) { - crate::profile_function!(); - - use egui_extras::{Column, TableBuilder}; - - TableBuilder::new(ui) - .max_scroll_height(f32::INFINITY) // Fill up whole height - .cell_layout(egui::Layout::left_to_right(egui::Align::Center)) - .resizable(true) - .column(Column::initial(100.0).at_least(50.0).clip(true)) // msg_id - .column(Column::initial(130.0).at_least(50.0).clip(true)) // message type - .columns( - // timeline(s): - Column::auto().clip(true).at_least(50.0), - ctx.log_db.timelines().count(), - ) - .column(Column::auto().clip(true).at_least(50.0)) // path - .column(Column::remainder()) // payload - .header(re_ui::ReUi::table_header_height(), |mut header| { - re_ui::ReUi::setup_table_header(&mut header); - header.col(|ui| { - ui.strong("MsgID"); - }); - header.col(|ui| { - ui.strong("Message Type"); - }); - for timeline in ctx.log_db.timelines() { - header.col(|ui| { - ctx.timeline_button(ui, timeline); - }); - } - header.col(|ui| { - ui.strong("Path"); - }); - header.col(|ui| { - ui.strong("Payload"); - }); - }) - .body(|mut body| { - re_ui::ReUi::setup_table_body(&mut body); - - // for MANY messages, `heterogeneous_rows` is too slow. TODO(emilk): how many? - if messages.len() < 10_000_000 { - body.heterogeneous_rows( - messages.iter().copied().map(row_height), - |index, mut row| { - let msg = messages[index]; - table_row(ctx, &mut row, msg, row_height(msg)); - }, - ); - } else { - let row_height = re_ui::ReUi::table_line_height(); - body.rows(row_height, messages.len(), |index, mut row| { - table_row(ctx, &mut row, messages[index], row_height); - }); - } - }); -} - -fn row_height(_msg: &LogMsg) -> f32 { - // TODO(emilk): make rows with images (tensors) higher! - re_ui::ReUi::table_line_height() -} - -fn table_row( - ctx: &mut ViewerContext<'_>, - row: &mut egui_extras::TableRow<'_, '_>, - msg: &LogMsg, - row_height: f32, -) { - match msg { - LogMsg::BeginRecordingMsg(msg) => { - let BeginRecordingMsg { msg_id, info } = msg; - let RecordingInfo { - application_id, - recording_id, - started, - recording_source, - is_official_example, - } = info; - - row.col(|ui| { - ctx.msg_id_button(ui, *msg_id); - }); - row.col(|ui| { - ui.monospace("BeginRecordingMsg"); - ui.label(format!("Source: {recording_source}")); - ui.label(format!("Official example: {is_official_example}")); - }); - for _ in ctx.log_db.timelines() { - row.col(|ui| { - ui.label("-"); - }); - } - row.col(|ui| { - ui.label(started.format()); - }); - row.col(|ui| { - ui.monospace(format!("{application_id} - {recording_id:?}")); - }); - } - LogMsg::EntityPathOpMsg(msg) => { - let EntityPathOpMsg { - msg_id, - time_point, - path_op, - } = msg; - - row.col(|ui| { - ctx.msg_id_button(ui, *msg_id); - }); - row.col(|ui| { - ui.monospace("EntityPathOpMsg"); - }); - for timeline in ctx.log_db.timelines() { - row.col(|ui| { - if let Some(value) = time_point.get(timeline) { - ctx.time_button(ui, timeline, *value); - } - }); - } - row.col(|ui| { - ctx.entity_path_button(ui, None, path_op.entity_path()); - }); - row.col(|ui| { - let timeline = *ctx.rec_cfg.time_ctrl.timeline(); - let query = LatestAtQuery::new( - timeline, - time_point.get(&timeline).copied().unwrap_or(TimeInt::MAX), - ); - path_op.data_ui(ctx, ui, UiVerbosity::All, &query); - }); - } - // NOTE: This really only makes sense because we don't yet have batches with more than a - // single row at the moment... and by the time we do, the event log view will have - // disappeared entirely. - LogMsg::ArrowMsg(msg) => match DataTable::try_from(msg) { - Ok(table) => { - for datarow in table.as_rows() { - row.col(|ui| { - ctx.msg_id_button(ui, datarow.row_id()); - }); - row.col(|ui| { - ui.monospace("ArrowMsg"); - }); - for timeline in ctx.log_db.timelines() { - row.col(|ui| { - if let Some(value) = datarow.timepoint().get(timeline) { - ctx.time_button(ui, timeline, *value); - } - }); - } - row.col(|ui| { - ctx.entity_path_button(ui, None, datarow.entity_path()); - }); - - row.col(|ui| { - let timeline = *ctx.rec_cfg.time_ctrl.timeline(); - let query = LatestAtQuery::new( - timeline, - datarow - .timepoint() - .get(&timeline) - .copied() - .unwrap_or(TimeInt::MAX), - ); - datarow.cells().data_ui( - ctx, - ui, - UiVerbosity::MaxHeight(row_height), - &query, - ); - }); - } - } - Err(err) => { - re_log::error_once!("Bad arrow payload: {err}",); - row.col(|ui| { - ui.label("Bad Arrow Payload".to_owned()); - }); - } - }, - LogMsg::Goodbye(msg_id) => { - row.col(|ui| { - ctx.msg_id_button(ui, *msg_id); - }); - row.col(|ui| { - ui.monospace("Goodbye"); - }); - } - } -} diff --git a/crates/re_viewer/src/ui/memory_panel.rs b/crates/re_viewer/src/ui/memory_panel.rs index f9734004f321..5c61be30e84a 100644 --- a/crates/re_viewer/src/ui/memory_panel.rs +++ b/crates/re_viewer/src/ui/memory_panel.rs @@ -1,4 +1,4 @@ -use re_arrow_store::DataStoreStats; +use re_arrow_store::{DataStoreConfig, DataStoreRowStats, DataStoreStats}; use re_format::{format_bytes, format_number}; use re_memory::{util::sec_since_start, MemoryHistory, MemoryLimit, MemoryUse}; use re_renderer::WgpuResourcePoolStatistics; @@ -26,9 +26,7 @@ impl MemoryPanel { (gpu_resource_stats.total_buffer_size_in_bytes + gpu_resource_stats.total_texture_size_in_bytes) as _, ), - Some( - (store_stats.total_index_size_bytes + store_stats.total_component_size_bytes) as _, - ), + Some(store_stats.total.num_bytes as _), ); } @@ -42,6 +40,7 @@ impl MemoryPanel { ui: &mut egui::Ui, limit: &MemoryLimit, gpu_resource_stats: &WgpuResourcePoolStatistics, + store_config: &DataStoreConfig, store_stats: &DataStoreStats, ) { crate::profile_function!(); @@ -54,7 +53,7 @@ impl MemoryPanel { .min_width(250.0) .default_width(300.0) .show_inside(ui, |ui| { - Self::left_side(ui, limit, gpu_resource_stats, store_stats); + Self::left_side(ui, limit, gpu_resource_stats, store_config, store_stats); }); egui::CentralPanel::default().show_inside(ui, |ui| { @@ -67,6 +66,7 @@ impl MemoryPanel { ui: &mut egui::Ui, limit: &MemoryLimit, gpu_resource_stats: &WgpuResourcePoolStatistics, + store_config: &DataStoreConfig, store_stats: &DataStoreStats, ) { ui.strong("Depthai Viewer resource usage"); @@ -83,7 +83,7 @@ impl MemoryPanel { ui.separator(); ui.collapsing("Datastore Resources", |ui| { - Self::store_stats(ui, store_stats); + Self::store_stats(ui, store_config, store_stats); }); } @@ -180,15 +180,16 @@ impl MemoryPanel { }); } - fn store_stats(ui: &mut egui::Ui, store_stats: &DataStoreStats) { + fn store_stats( + ui: &mut egui::Ui, + store_config: &DataStoreConfig, + store_stats: &DataStoreStats, + ) { egui::Grid::new("store config grid") .num_columns(3) .show(ui, |ui| { - let DataStoreStats { config, .. } = store_stats; - ui.label(egui::RichText::new("Limits").italics()); ui.label("Row limit"); - ui.label("Size limit"); ui.end_row(); let label_rows = |ui: &mut egui::Ui, num_rows| { @@ -198,22 +199,13 @@ impl MemoryPanel { ui.label(re_format::format_number(num_rows as _)) } }; - let label_size = |ui: &mut egui::Ui, size| { - if size == u64::MAX { - ui.label("+∞") - } else { - ui.label(re_format::format_bytes(size as _)) - } - }; - ui.label("Indices:"); - label_rows(ui, config.index_bucket_nb_rows); - label_size(ui, config.index_bucket_size_bytes); + ui.label("Timeless:"); + label_rows(ui, u64::MAX); ui.end_row(); - ui.label("Components:"); - label_rows(ui, config.component_bucket_nb_rows); - label_size(ui, config.component_bucket_size_bytes); + ui.label("Temporal:"); + label_rows(ui, store_config.indexed_bucket_num_rows); ui.end_row(); }); @@ -223,21 +215,13 @@ impl MemoryPanel { .num_columns(3) .show(ui, |ui| { let DataStoreStats { - total_timeless_index_rows, - total_timeless_index_size_bytes, - total_timeless_component_rows, - total_timeless_component_size_bytes, - total_temporal_index_rows, - total_temporal_index_size_bytes, - total_temporal_index_buckets, - total_temporal_component_rows, - total_temporal_component_size_bytes, - total_temporal_component_buckets, - total_index_rows, - total_index_size_bytes, - total_component_rows, - total_component_size_bytes, - config: _, + type_registry, + metadata_registry, + autogenerated, + timeless, + temporal, + temporal_buckets, + total, } = *store_stats; ui.label(egui::RichText::new("Stats").italics()); @@ -246,48 +230,44 @@ impl MemoryPanel { ui.label("Size"); ui.end_row(); - let label_buckets = |ui: &mut egui::Ui, num_buckets| { - ui.label(re_format::format_number(num_buckets as _)) - }; - let label_rows = - |ui: &mut egui::Ui, num_rows| ui.label(re_format::format_number(num_rows as _)); - let label_size = - |ui: &mut egui::Ui, size| ui.label(re_format::format_bytes(size as _)); + fn label_row_stats(ui: &mut egui::Ui, row_stats: DataStoreRowStats) { + let DataStoreRowStats { + num_rows, + num_bytes, + } = row_stats; - ui.label("Indices (timeless):"); + ui.label(re_format::format_number(num_rows as _)); + ui.label(re_format::format_bytes(num_bytes as _)); + } + + ui.label("Type registry:"); ui.label(""); - label_rows(ui, total_timeless_index_rows); - label_size(ui, total_timeless_index_size_bytes); + label_row_stats(ui, type_registry); ui.end_row(); - ui.label("Indices (temporal):"); - label_buckets(ui, total_temporal_index_buckets); - label_rows(ui, total_temporal_index_rows); - label_size(ui, total_temporal_index_size_bytes); + ui.label("Metadata registry:"); + ui.label(""); + label_row_stats(ui, metadata_registry); ui.end_row(); - ui.label("Indices (total):"); - label_buckets(ui, total_temporal_index_buckets); - label_rows(ui, total_index_rows); - label_size(ui, total_index_size_bytes); + ui.label("Cluster cache:"); + ui.label(""); + label_row_stats(ui, autogenerated); ui.end_row(); - ui.label("Components (timeless):"); + ui.label("Timeless:"); ui.label(""); - label_rows(ui, total_timeless_component_rows); - label_size(ui, total_timeless_component_size_bytes); + label_row_stats(ui, timeless); ui.end_row(); - ui.label("Components (temporal):"); - label_buckets(ui, total_temporal_component_buckets); - label_rows(ui, total_temporal_component_rows); - label_size(ui, total_temporal_component_size_bytes); + ui.label("Temporal:"); + ui.label(re_format::format_number(temporal_buckets as _)); + label_row_stats(ui, temporal); ui.end_row(); - ui.label("Components (total):"); - label_buckets(ui, total_temporal_component_buckets); - label_rows(ui, total_component_rows); - label_size(ui, total_component_size_bytes); + ui.label("Total"); + ui.label(re_format::format_number(temporal_buckets as _)); + label_row_stats(ui, total); ui.end_row(); }); } diff --git a/crates/re_viewer/src/ui/mod.rs b/crates/re_viewer/src/ui/mod.rs index 1a24b9878cdf..f07ba8c6c466 100644 --- a/crates/re_viewer/src/ui/mod.rs +++ b/crates/re_viewer/src/ui/mod.rs @@ -17,7 +17,6 @@ mod view_time_series; mod viewport; pub(crate) mod data_ui; -pub(crate) mod event_log_view; pub(crate) mod memory_panel; pub(crate) mod selection_panel; pub(crate) mod time_panel; diff --git a/crates/re_viewer/src/ui/selection_history.rs b/crates/re_viewer/src/ui/selection_history.rs index 69b7f29d3bae..8410b5428582 100644 --- a/crates/re_viewer/src/ui/selection_history.rs +++ b/crates/re_viewer/src/ui/selection_history.rs @@ -1,5 +1,3 @@ -use re_data_store::LogDb; - use crate::misc::ItemCollection; use super::Blueprint; @@ -32,12 +30,12 @@ pub struct SelectionHistory { } impl SelectionHistory { - pub(crate) fn on_frame_start(&mut self, log_db: &LogDb, blueprint: &Blueprint) { + pub(crate) fn on_frame_start(&mut self, blueprint: &Blueprint) { crate::profile_function!(); let mut i = 0; self.stack.retain_mut(|selection| { - selection.purge_invalid(log_db, blueprint); + selection.purge_invalid(blueprint); let retain = !selection.is_empty(); if !retain && i <= self.current { self.current = self.current.saturating_sub(1); diff --git a/crates/re_viewer/src/ui/selection_history_ui.rs b/crates/re_viewer/src/ui/selection_history_ui.rs index 29064a43efa2..e91c53966da7 100644 --- a/crates/re_viewer/src/ui/selection_history_ui.rs +++ b/crates/re_viewer/src/ui/selection_history_ui.rs @@ -1,7 +1,7 @@ use egui::RichText; use re_ui::Command; -use super::{HistoricalSelection, SelectionHistory}; +use super::SelectionHistory; use crate::{misc::ItemCollection, ui::Blueprint, Item}; // --- @@ -14,7 +14,6 @@ impl SelectionHistory { blueprint: &Blueprint, ) -> Option { self.control_bar_ui(re_ui, ui, blueprint) - .map(|sel| sel.selection) } fn control_bar_ui( @@ -22,7 +21,7 @@ impl SelectionHistory { re_ui: &re_ui::ReUi, ui: &mut egui::Ui, blueprint: &Blueprint, - ) -> Option { + ) -> Option { ui.horizontal_centered(|ui| { ui.strong("Selection").on_hover_text("The Selection View contains information and options about the currently selected object(s)."); @@ -38,27 +37,23 @@ impl SelectionHistory { }).inner } - // TODO(cmc): note that for now, we only check prev/next shortcuts in the UI code that - // shows the associated buttons... this means shortcuts only work when the selection panel - // is open! - // We might want to change this at some point, though the way things are currently designed, - // there isn't much point in selecting stuff while the selection panel is hidden anyway. - - pub fn select_previous(&mut self) -> Option { + #[must_use] + pub fn select_previous(&mut self) -> Option { if let Some(previous) = self.previous() { if previous.index != self.current { self.current = previous.index; - return self.current(); + return self.current().map(|s| s.selection); } } None } - pub fn select_next(&mut self) -> Option { + #[must_use] + pub fn select_next(&mut self) -> Option { if let Some(next) = self.next() { if next.index != self.current { self.current = next.index; - return self.current(); + return self.current().map(|s| s.selection); } } None @@ -69,7 +64,7 @@ impl SelectionHistory { re_ui: &re_ui::ReUi, ui: &mut egui::Ui, blueprint: &Blueprint, - ) -> Option { + ) -> Option { // undo selection if let Some(previous) = self.previous() { let response = re_ui @@ -112,7 +107,7 @@ impl SelectionHistory { re_ui: &re_ui::ReUi, ui: &mut egui::Ui, blueprint: &Blueprint, - ) -> Option { + ) -> Option { // redo selection if let Some(next) = self.next() { let response = re_ui @@ -206,7 +201,6 @@ fn item_to_string(blueprint: &Blueprint, item: &Item) -> String { "".to_owned() } } - Item::MsgId(msg_id) => msg_id.short_string(), Item::ComponentPath(path) => { format!("{} {}", path.entity_path, path.component_name.short_name(),) } diff --git a/crates/re_viewer/src/ui/selection_panel.rs b/crates/re_viewer/src/ui/selection_panel.rs index 793aa3829f80..c329e086391b 100644 --- a/crates/re_viewer/src/ui/selection_panel.rs +++ b/crates/re_viewer/src/ui/selection_panel.rs @@ -6,12 +6,12 @@ use itertools::Itertools; use poll_promise::Promise; use re_arrow_store::{LatestAtQuery, RangeQuery, TimeInt, TimeRange, Timeline}; use re_data_store::{ - query_latest_single, ColorMap, ColorMapper, EditableAutoValue, EntityPath, EntityProperties, + query_latest_single, Colormap, ColorMapper, EditableAutoValue, EntityPath, EntityProperties, ExtraQueryHistory, }; use re_log_types::{ component_types::{ImuData, InstanceKey, Tensor, TensorDataMeaning}, - Component, MsgId, TimeType, Transform, + Component, TimeType, Transform, }; use re_query::{query_primary_with_history, QueryError}; @@ -24,7 +24,7 @@ use crate::{ use egui_dock::{DockArea, NodeIndex, Tree}; -use super::{data_ui::DataUi, plot_3d, space_view::ViewState, SpaceView, ViewCategory}; +use super::{data_ui::DataUi, space_view::ViewState, SpaceView, ViewCategory}; use egui::emath::History; @@ -590,19 +590,19 @@ impl SelectionPanel { let num_selections = ctx.selection().len(); let selection = ctx.selection().to_vec(); - for (i, selection) in selection.iter().enumerate() { + for (i, item) in selection.iter().enumerate() { ui.push_id(i, |ui| { - what_is_selected_ui(ui, ctx, blueprint, selection); + what_is_selected_ui(ui, ctx, blueprint, item); - if has_data_section(selection) { + if has_data_section(item) { ctx.re_ui.large_collapsing_header(ui, "Data", true, |ui| { - selection.data_ui(ctx, ui, UiVerbosity::All, &query); + item.data_ui(ctx, ui, UiVerbosity::All, &query); }); } ctx.re_ui .large_collapsing_header(ui, "Blueprint", true, |ui| { - blueprint_ui(ui, ctx, blueprint, selection); + blueprint_ui(ui, ctx, blueprint, item); }); if i + 1 < num_selections { @@ -616,7 +616,7 @@ impl SelectionPanel { fn has_data_section(item: &Item) -> bool { match item { - Item::MsgId(_) | Item::ComponentPath(_) | Item::InstancePath(_, _) => true, + Item::ComponentPath(_) | Item::InstancePath(_, _) => true, // Skip data ui since we don't know yet what to show for these. Item::SpaceView(_) | Item::DataBlueprintGroup(_, _) => false, } @@ -630,12 +630,6 @@ pub fn what_is_selected_ui( item: &Item, ) { match item { - Item::MsgId(msg_id) => { - ui.horizontal(|ui| { - ui.label("Message ID:"); - ctx.msg_id_button(ui, *msg_id); - }); - } Item::ComponentPath(re_log_types::ComponentPath { entity_path, component_name, @@ -727,9 +721,6 @@ impl DataUi for Item { // If you add something in here make sure to adjust SelectionPanel::contents accordingly. debug_assert!(!has_data_section(self)); } - Item::MsgId(msg_id) => { - msg_id.data_ui(ctx, ui, verbosity, query); - } Item::ComponentPath(component_path) => { component_path.data_ui(ctx, ui, verbosity, query); } @@ -748,11 +739,6 @@ fn blueprint_ui( item: &Item, ) { match item { - Item::MsgId(_) => { - // TODO(andreas): Show space views that contains entities that's part of this message. - ui.weak("(nothing)"); - } - Item::ComponentPath(component_path) => { list_existing_data_blueprints(ui, ctx, component_path.entity_path(), blueprint); } @@ -940,12 +926,12 @@ fn colormap_props_ui(ui: &mut egui::Ui, entity_props: &mut EntityProperties) { } }; - add_label(ColorMapper::ColorMap(ColorMap::Grayscale)); - add_label(ColorMapper::ColorMap(ColorMap::Turbo)); - add_label(ColorMapper::ColorMap(ColorMap::Viridis)); - add_label(ColorMapper::ColorMap(ColorMap::Plasma)); - add_label(ColorMapper::ColorMap(ColorMap::Magma)); - add_label(ColorMapper::ColorMap(ColorMap::Inferno)); + add_label(ColorMapper::Colormap(Colormap::Grayscale)); + add_label(ColorMapper::Colormap(Colormap::Turbo)); + add_label(ColorMapper::Colormap(Colormap::Viridis)); + add_label(ColorMapper::Colormap(Colormap::Plasma)); + add_label(ColorMapper::Colormap(Colormap::Magma)); + add_label(ColorMapper::Colormap(Colormap::Inferno)); }); ui.end_row(); diff --git a/crates/re_viewer/src/ui/space_view.rs b/crates/re_viewer/src/ui/space_view.rs index c2323d5f0bfa..aa2ee9f31507 100644 --- a/crates/re_viewer/src/ui/space_view.rs +++ b/crates/re_viewer/src/ui/space_view.rs @@ -1,5 +1,5 @@ use re_arrow_store::Timeline; -use re_data_store::{EntityPath, EntityTree, InstancePath, TimeInt}; +use re_data_store::{EntityPath, EntityPropertyMap, EntityTree, InstancePath, TimeInt}; use re_renderer::{GpuReadbackIdentifier, ScreenshotProcessor}; use crate::{ @@ -251,8 +251,15 @@ impl SpaceView { self.view_state .state_spatial .update_object_property_heuristics(ctx, &mut self.data_blueprint); - self.view_state - .ui_spatial(ctx, ui, &self.space_path, scene, self.id, highlights); + self.view_state.ui_spatial( + ctx, + ui, + &self.space_path, + scene, + self.id, + highlights, + self.data_blueprint.data_blueprints_projected(), + ); } ViewCategory::Tensor => { @@ -341,10 +348,18 @@ impl ViewState { scene: view_spatial::SceneSpatial, space_view_id: SpaceViewId, highlights: &SpaceViewHighlights, + entity_properties: &EntityPropertyMap, ) { ui.vertical(|ui| { - self.state_spatial - .view_spatial(ctx, ui, space, scene, space_view_id, highlights); + self.state_spatial.view_spatial( + ctx, + ui, + space, + scene, + space_view_id, + highlights, + entity_properties, + ); }); } diff --git a/crates/re_viewer/src/ui/space_view_heuristics.rs b/crates/re_viewer/src/ui/space_view_heuristics.rs index 8fc9229dc536..a3eb14616210 100644 --- a/crates/re_viewer/src/ui/space_view_heuristics.rs +++ b/crates/re_viewer/src/ui/space_view_heuristics.rs @@ -5,10 +5,7 @@ use itertools::Itertools; use nohash_hasher::IntSet; use re_arrow_store::{DataStore, LatestAtQuery, Timeline}; use re_data_store::{log_db::EntityDb, query_latest_single, ComponentName, EntityPath}; -use re_log_types::{ - component_types::{Tensor, TensorTrait}, - Component, -}; +use re_log_types::{component_types::Tensor, Component}; use crate::{ misc::{space_info::SpaceInfoCollection, ViewerContext}, @@ -256,7 +253,6 @@ fn is_default_added_to_space_view( let ignored_components = [ re_log_types::Transform::name(), re_log_types::ViewCoordinates::name(), - re_log_types::MsgId::name(), re_log_types::component_types::InstanceKey::name(), re_log_types::component_types::KeypointId::name(), DataStore::insert_id_key(), diff --git a/crates/re_viewer/src/ui/time_panel/data_density_graph.rs b/crates/re_viewer/src/ui/time_panel/data_density_graph.rs index b5a7dfae9c84..bdf311b4ccf2 100644 --- a/crates/re_viewer/src/ui/time_panel/data_density_graph.rs +++ b/crates/re_viewer/src/ui/time_panel/data_density_graph.rs @@ -488,7 +488,7 @@ pub fn data_density_graph_ui( ctx.rec_cfg.time_ctrl.set_time(hovered_time_range.min); ctx.rec_cfg.time_ctrl.pause(); } else if !ui.ctx().memory(|mem| mem.is_anything_being_dragged()) { - show_msg_ids_tooltip( + show_row_ids_tooltip( ctx, blueprint, ui.ctx(), @@ -518,7 +518,7 @@ fn make_brighter(color: Color32) -> Color32 { ) } -fn show_msg_ids_tooltip( +fn show_row_ids_tooltip( ctx: &mut ViewerContext<'_>, blueprint: &mut Blueprint, egui_ctx: &egui::Context, diff --git a/crates/re_viewer/src/ui/time_panel/mod.rs b/crates/re_viewer/src/ui/time_panel/mod.rs index c937cb5374c3..e106cb4ed74a 100644 --- a/crates/re_viewer/src/ui/time_panel/mod.rs +++ b/crates/re_viewer/src/ui/time_panel/mod.rs @@ -747,9 +747,12 @@ fn initialize_time_ranges_ui( .prefix_times .get(ctx.rec_cfg.time_ctrl.timeline()) { - let timeline_axis = TimelineAxis::new(ctx.rec_cfg.time_ctrl.time_type(), times); - time_view = time_view.or_else(|| Some(view_everything(&time_x_range, &timeline_axis))); - time_range.extend(timeline_axis.ranges); + // NOTE: `times` can be empty if a GC wiped everything. + if !times.is_empty() { + let timeline_axis = TimelineAxis::new(ctx.rec_cfg.time_ctrl.time_type(), times); + time_view = time_view.or_else(|| Some(view_everything(&time_x_range, &timeline_axis))); + time_range.extend(timeline_axis.ranges); + } } TimeRangesUi::new( diff --git a/crates/re_viewer/src/ui/view_bar_chart/scene.rs b/crates/re_viewer/src/ui/view_bar_chart/scene.rs index c22ca0f2ced9..829d5138f1ac 100644 --- a/crates/re_viewer/src/ui/view_bar_chart/scene.rs +++ b/crates/re_viewer/src/ui/view_bar_chart/scene.rs @@ -3,7 +3,7 @@ use std::collections::BTreeMap; use re_arrow_store::LatestAtQuery; use re_data_store::EntityPath; use re_log::warn_once; -use re_log_types::component_types::{self, InstanceKey, Tensor, TensorTrait as _}; +use re_log_types::component_types::{self, InstanceKey, Tensor}; use re_query::query_entity_with_primary; use crate::{misc::ViewerContext, ui::scene::SceneQuery}; diff --git a/crates/re_viewer/src/ui/view_category.rs b/crates/re_viewer/src/ui/view_category.rs index d37fa22eed75..25e9d8960b3a 100644 --- a/crates/re_viewer/src/ui/view_category.rs +++ b/crates/re_viewer/src/ui/view_category.rs @@ -2,8 +2,7 @@ use re_arrow_store::{LatestAtQuery, TimeInt}; use re_data_store::{EntityPath, LogDb, Timeline}; use re_log_types::{ component_types::{ - Box3D, LineStrip2D, LineStrip3D, NodeGraph, Point2D, Point3D, Rect2D, Scalar, Tensor, - TensorTrait, TextEntry, + Box3D, LineStrip2D, LineStrip3D, NodeGraph, Point2D, Point3D, Rect2D, Scalar, Tensor, TextEntry, }, Arrow3D, Component, Mesh3D, Transform, }; diff --git a/crates/re_viewer/src/ui/view_node_graph/scene.rs b/crates/re_viewer/src/ui/view_node_graph/scene.rs index 09ddc9a38cc0..5d0aa163daf1 100644 --- a/crates/re_viewer/src/ui/view_node_graph/scene.rs +++ b/crates/re_viewer/src/ui/view_node_graph/scene.rs @@ -2,7 +2,7 @@ use re_arrow_store::TimeRange; use re_data_store::EntityPath; use re_log_types::{ component_types::{self, InstanceKey}, - Component, MsgId, + Component, }; use re_query::{range_entity_with_primary, QueryError}; @@ -12,9 +12,6 @@ use crate::{ui::SceneQuery, ViewerContext}; #[derive(Debug, Clone)] pub struct NodeGraphEntry { - // props - pub msg_id: MsgId, - pub entity_path: EntityPath, /// `None` for timeless data. diff --git a/crates/re_viewer/src/ui/view_node_graph/ui.rs b/crates/re_viewer/src/ui/view_node_graph/ui.rs index 9b23fbb87623..2e08045cc3d5 100644 --- a/crates/re_viewer/src/ui/view_node_graph/ui.rs +++ b/crates/re_viewer/src/ui/view_node_graph/ui.rs @@ -92,204 +92,5 @@ impl ViewNodeGraphFilters { // accordingly. fn update(&mut self, ctx: &mut ViewerContext<'_>, NodeGraph_entries: &[NodeGraphEntry]) { crate::profile_function!(); - - let Self { - col_timelines, - col_entity_path: _, - col_log_level: _, - row_entity_paths, - row_log_levels, - } = self; - - for timeline in ctx.log_db.timelines() { - col_timelines.entry(*timeline).or_insert(true); - } - - for entity_path in NodeGraph_entries.iter().map(|te| &te.entity_path) { - row_entity_paths.entry(entity_path.clone()).or_insert(true); - } - - for level in NodeGraph_entries.iter().filter_map(|te| te.level.as_ref()) { - row_log_levels.entry(level.clone()).or_insert(true); - } - } -} - -// --- - -fn get_time_point(ctx: &ViewerContext<'_>, entry: &NodeGraphEntry) -> Option { - if let Some(time_point) = ctx - .log_db - .entity_db - .data_store - .get_msg_metadata(&entry.msg_id) - { - Some(time_point.clone()) - } else { - re_log::warn_once!("Missing LogMsg for {:?}", entry.entity_path); - None } } - -/// `scroll_to_row` indicates how far down we want to scroll in terms of logical rows, -/// as opposed to `scroll_to_offset` (computed below) which is how far down we want to -/// scroll in terms of actual points. -fn table_ui( - ctx: &mut ViewerContext<'_>, - ui: &mut egui::Ui, - state: &mut ViewNodeGraphState, - NodeGraph_entries: &[NodeGraphEntry], - scroll_to_row: Option, -) { - let timelines = state - .filters - .col_timelines - .iter() - .filter_map(|(timeline, visible)| visible.then_some(timeline)) - .collect::>(); - - use egui_extras::Column; - - let global_timeline = *ctx.rec_cfg.time_ctrl.timeline(); - let global_time = ctx.rec_cfg.time_ctrl.time_int(); - - let mut table_builder = egui_extras::TableBuilder::new(ui) - .resizable(true) - .vscroll(true) - .auto_shrink([false; 2]) // expand to take up the whole Space View - .min_scrolled_height(0.0) // we can go as small as we need to be in order to fit within the space view! - .max_scroll_height(f32::INFINITY) // Fill up whole height - .cell_layout(egui::Layout::left_to_right(egui::Align::TOP)); - - if let Some(scroll_to_row) = scroll_to_row { - table_builder = table_builder.scroll_to_row(scroll_to_row, Some(egui::Align::Center)); - } - - let mut body_clip_rect = None; - let mut current_time_y = None; // where to draw the current time indicator cursor - - { - // timeline(s) - table_builder = - table_builder.columns(Column::auto().clip(true).at_least(32.0), timelines.len()); - - // entity path - if state.filters.col_entity_path { - table_builder = table_builder.column(Column::auto().clip(true).at_least(32.0)); - } - // log level - if state.filters.col_log_level { - table_builder = table_builder.column(Column::auto().at_least(30.0)); - } - // body - table_builder = table_builder.column(Column::remainder().at_least(100.0)); - } - table_builder - .header(re_ui::ReUi::table_header_height(), |mut header| { - re_ui::ReUi::setup_table_header(&mut header); - for timeline in &timelines { - header.col(|ui| { - ctx.timeline_button(ui, timeline); - }); - } - if state.filters.col_entity_path { - header.col(|ui| { - ui.strong("Entity path"); - }); - } - if state.filters.col_log_level { - header.col(|ui| { - ui.strong("Level"); - }); - } - header.col(|ui| { - ui.strong("Body"); - }); - }) - .body(|mut body| { - re_ui::ReUi::setup_table_body(&mut body); - - body_clip_rect = Some(body.max_rect()); - - let row_heights = NodeGraph_entries.iter().map(calc_row_height); - body.heterogeneous_rows(row_heights, |index, mut row| { - let NodeGraph_entry = &NodeGraph_entries[index]; - - // NOTE: `try_from_props` is where we actually fetch data from the underlying - // store, which is a costly operation. - // Doing this here guarantees that it only happens for visible rows. - let Some(time_point) = get_time_point(ctx, NodeGraph_entry) else { - row.col(|ui| { - ui.colored_label( - Color32::RED, - "", - ); - }); - return; - }; - - // timeline(s) - for timeline in &timelines { - row.col(|ui| { - if let Some(row_time) = time_point.get(timeline).copied() { - ctx.time_button(ui, timeline, row_time); - - if let Some(global_time) = global_time { - if *timeline == &global_timeline { - #[allow(clippy::comparison_chain)] - if global_time < row_time { - // We've past the global time - it is thus above this row. - if current_time_y.is_none() { - current_time_y = Some(ui.max_rect().top()); - } - } else if global_time == row_time { - // This row is exactly at the current time. - // We could draw the current time exactly onto this row, but that would look bad, - // so let's draw it under instead. It looks better in the "following" mode. - current_time_y = Some(ui.max_rect().bottom()); - } - } - } - } - }); - } - - // path - if state.filters.col_entity_path { - row.col(|ui| { - ctx.entity_path_button(ui, None, &NodeGraph_entry.entity_path); - }); - } - // body - row.col(|ui| { - let mut some_text = egui::RichText::new(&NodeGraph_entry.body); - - if state.monospace { - some_text = some_text.monospace(); - } - if let Some([r, g, b, a]) = NodeGraph_entry.color { - some_text = some_text.color(Color32::from_rgba_unmultiplied(r, g, b, a)); - } - - ui.label(some_text); - }); - }); - }); - - // TODO(cmc): this draws on top of the headers :( - if let (Some(body_clip_rect), Some(current_time_y)) = (body_clip_rect, current_time_y) { - // Show that the current time is here: - ui.painter().with_clip_rect(body_clip_rect).hline( - ui.max_rect().x_range(), - current_time_y, - (1.0, Color32::WHITE), - ); - } -} - -fn calc_row_height(entry: &NodeGraphEntry) -> f32 { - // Simple, fast, ugly, and functional - let num_newlines = entry.body.bytes().filter(|&c| c == b'\n').count(); - let num_rows = 1 + num_newlines; - num_rows as f32 * re_ui::ReUi::table_line_height() -} diff --git a/crates/re_viewer/src/ui/view_spatial/eye.rs b/crates/re_viewer/src/ui/view_spatial/eye.rs index 101da1ca6d7c..287481311101 100644 --- a/crates/re_viewer/src/ui/view_spatial/eye.rs +++ b/crates/re_viewer/src/ui/view_spatial/eye.rs @@ -48,24 +48,24 @@ impl Eye { } } - pub fn ui_from_world(&self, rect: &Rect) -> Mat4 { - let aspect_ratio = rect.width() / rect.height(); + pub fn ui_from_world(&self, space2d_rect: Rect) -> Mat4 { + let aspect_ratio = space2d_rect.width() / space2d_rect.height(); let projection = if let Some(fov_y) = self.fov_y { Mat4::perspective_infinite_rh(fov_y, aspect_ratio, self.near()) } else { Mat4::orthographic_rh( - rect.left(), - rect.right(), - rect.bottom(), - rect.top(), + space2d_rect.left(), + space2d_rect.right(), + space2d_rect.bottom(), + space2d_rect.top(), self.near(), self.far(), ) }; - Mat4::from_translation(vec3(rect.center().x, rect.center().y, 0.0)) - * Mat4::from_scale(0.5 * vec3(rect.width(), -rect.height(), 1.0)) + Mat4::from_translation(vec3(space2d_rect.center().x, space2d_rect.center().y, 0.0)) + * Mat4::from_scale(0.5 * vec3(space2d_rect.width(), -space2d_rect.height(), 1.0)) * projection * self.world_from_view.inverse() } @@ -80,7 +80,7 @@ impl Eye { /// Picking ray for a given pointer in the parent space /// (i.e. prior to camera transform, "world" space) - pub fn picking_ray(&self, screen_rect: &Rect, pointer: glam::Vec2) -> macaw::Ray3 { + pub fn picking_ray(&self, screen_rect: Rect, pointer: glam::Vec2) -> macaw::Ray3 { if let Some(fov_y) = self.fov_y { let (w, h) = (screen_rect.width(), screen_rect.height()); let aspect_ratio = w / h; diff --git a/crates/re_viewer/src/ui/view_spatial/mod.rs b/crates/re_viewer/src/ui/view_spatial/mod.rs index 141f6c3f9dd6..240c83ae2023 100644 --- a/crates/re_viewer/src/ui/view_spatial/mod.rs +++ b/crates/re_viewer/src/ui/view_spatial/mod.rs @@ -5,7 +5,7 @@ mod space_camera_3d; mod ui; mod ui_2d; mod ui_3d; -mod ui_renderer_bridge; +pub mod ui_renderer_bridge; pub use self::scene::{Image, MeshSource, MeshSourceData, SceneSpatial, UiLabel, UiLabelTarget}; pub use self::space_camera_3d::SpaceCamera3D; diff --git a/crates/re_viewer/src/ui/view_spatial/scene/mod.rs b/crates/re_viewer/src/ui/view_spatial/scene/mod.rs index 9d10b54a77e2..ef5082ad2fa8 100644 --- a/crates/re_viewer/src/ui/view_spatial/scene/mod.rs +++ b/crates/re_viewer/src/ui/view_spatial/scene/mod.rs @@ -3,12 +3,12 @@ use std::sync::Arc; use ahash::HashMap; use re_data_store::{EntityPath, InstancePathHash}; use re_log_types::{ - component_types::{ClassId, KeypointId, Tensor}, + component_types::{ClassId, InstanceKey, KeypointId, Tensor}, MeshId, }; use re_renderer::{Color32, OutlineMaskPreference, Size}; -use super::{eye::Eye, SpaceCamera3D, SpatialNavigationMode}; +use super::{SpaceCamera3D, SpatialNavigationMode}; use crate::{ misc::{mesh_loader::LoadedMesh, SpaceViewHighlights, TransformCache, ViewerContext}, ui::{ @@ -21,7 +21,7 @@ mod picking; mod primitives; mod scene_part; -pub use self::picking::{AdditionalPickingInfo, PickingRayHit, PickingResult}; +pub use self::picking::{PickingContext, PickingHitType, PickingRayHit, PickingResult}; pub use self::primitives::SceneSpatialPrimitives; use scene_part::ScenePart; @@ -56,7 +56,8 @@ pub struct MeshSource { } pub struct Image { - pub instance_path_hash: InstancePathHash, + /// Path to the image (note image instance ids would refer to pixels!) + pub ent_path: EntityPath, pub tensor: Tensor, @@ -122,17 +123,6 @@ pub struct SceneSpatial { pub space_cameras: Vec, } -fn instance_path_hash_if_interactive( - entity_path: &EntityPath, - interactive: bool, -) -> InstancePathHash { - if interactive { - InstancePathHash::entity_splat(entity_path) - } else { - InstancePathHash::NONE - } -} - pub type Keypoints = HashMap<(ClassId, i64), HashMap>; impl SceneSpatial { @@ -172,7 +162,7 @@ impl SceneSpatial { // -- // Note: Lines2DPart handles both Segments and LinesPaths since they are unified on the logging-side. &scene_part::Lines2DPart, - &scene_part::Points2DPart, + &scene_part::Points2DPart { max_labels: 10 }, // --- &scene_part::CamerasPart, ]; @@ -192,12 +182,13 @@ impl SceneSpatial { entity_path: &re_data_store::EntityPath, keypoints: Keypoints, annotations: &Arc, - interactive: bool, ) { // Generate keypoint connections if any. - let instance_path_hash = instance_path_hash_if_interactive(entity_path, interactive); - - let mut line_batch = self.primitives.line_strips.batch("keypoint connections"); + let mut line_batch = self + .primitives + .line_strips + .batch("keypoint connections") + .picking_object_id(re_renderer::PickingLayerObjectId(entity_path.hash64())); for ((class_id, _time), keypoints_in_class) in keypoints { let Some(class_description) = annotations.context.class_map.get(&class_id) else { @@ -221,7 +212,8 @@ impl SceneSpatial { .add_segment(*a, *b) .radius(Size::AUTO) .color(color) - .user_data(instance_path_hash); + // Select the entire object when clicking any of the lines. + .picking_instance_id(re_renderer::PickingLayerInstanceId(InstanceKey::SPLAT.0)); } } } @@ -232,7 +224,7 @@ impl SceneSpatial { if self .space_cameras .iter() - .any(|camera| &camera.entity_path != space_info_path) + .any(|camera| camera.instance_path_hash.entity_path_hash != space_info_path.hash()) { return SpatialNavigationMode::ThreeD; } @@ -246,28 +238,4 @@ impl SceneSpatial { SpatialNavigationMode::ThreeD } - - #[allow(clippy::too_many_arguments)] - pub fn picking( - &self, - render_ctx: &re_renderer::RenderContext, - gpu_readback_identifier: re_renderer::GpuReadbackIdentifier, - previous_picking_result: &Option, - pointer_in_ui: glam::Vec2, - ui_rect: &egui::Rect, - eye: &Eye, - ui_interaction_radius: f32, - ) -> PickingResult { - picking::picking( - render_ctx, - gpu_readback_identifier, - previous_picking_result, - pointer_in_ui, - ui_rect, - eye, - &self.primitives, - &self.ui, - ui_interaction_radius, - ) - } } diff --git a/crates/re_viewer/src/ui/view_spatial/scene/picking.rs b/crates/re_viewer/src/ui/view_spatial/scene/picking.rs index 5585e555f5a0..3feb71285d4a 100644 --- a/crates/re_viewer/src/ui/view_spatial/scene/picking.rs +++ b/crates/re_viewer/src/ui/view_spatial/scene/picking.rs @@ -1,22 +1,22 @@ -use itertools::Itertools as _; +//! Handles picking in 2D & 3D spaces. use re_data_store::InstancePathHash; +use re_log_types::{component_types::InstanceKey, EntityPathHash}; use re_renderer::PickingLayerProcessor; use super::{SceneSpatialPrimitives, SceneSpatialUiData}; use crate::{ - math::{line_segment_distance_sq_to_point_2d, ray_closest_t_line_segment}, misc::instance_hash_conversions::instance_path_hash_from_picking_layer_id, ui::view_spatial::eye::Eye, }; -#[derive(Clone)] -pub enum AdditionalPickingInfo { - /// No additional picking information. - None, +#[derive(Clone, PartialEq, Eq)] +pub enum PickingHitType { + /// The hit was a textured rect. + TexturedRect, - /// The hit was a textured rect at the given uv coordinates (ranging from 0 to 1) - TexturedRect(glam::Vec2), + /// The result came from GPU based picking. + GpuPickingResult, /// We hit a egui ui element, meaning that depth information is not usable. GuiOverlay, @@ -29,173 +29,119 @@ pub struct PickingRayHit { /// The ray hit position may not actually be on this entity, as we allow snapping to closest entity! pub instance_path_hash: InstancePathHash, - /// Where along the picking ray the hit occurred. - pub ray_t: f32, + /// Where the ray hit the entity. + pub space_position: glam::Vec3, pub depth_offset: re_renderer::DepthOffset, /// Any additional information about the picking hit. - pub info: AdditionalPickingInfo, - - /// True if this picking result came from a GPU picking pass. - pub used_gpu_picking: bool, -} - -impl PickingRayHit { - fn from_instance_and_t(instance_path_hash: InstancePathHash, t: f32) -> Self { - Self { - instance_path_hash, - ray_t: t, - info: AdditionalPickingInfo::None, - depth_offset: 0, - used_gpu_picking: false, - } - } + pub hit_type: PickingHitType, } #[derive(Clone)] pub struct PickingResult { - /// Picking ray hit for an opaque object (if any). - pub opaque_hit: Option, - - /// Picking ray hits for transparent objects, sorted from far to near. - /// If there is an opaque hit, all of them are in front of the opaque hit. - pub transparent_hits: Vec, - - /// The picking ray used. Given in the coordinates of the space the picking is performed in. - picking_ray: macaw::Ray3, + /// Picking ray hits. NOT sorted by distance but rather by source of picking. + /// + /// Typically there is only one hit, but there might be several if there are transparent objects + /// or "aggressive" objects like 2D images which we always want to pick, even if they're in the background. + /// (This is very useful for 2D scenes and so far we keep this behavior in 3D for simplicity) + pub hits: Vec, } impl PickingResult { - /// The space position of a given hit. - #[allow(dead_code)] - pub fn space_position(&self, hit: &PickingRayHit) -> glam::Vec3 { - self.picking_ray.origin + self.picking_ray.dir * hit.ray_t - } - - /// Iterates over all hits from far to close. - pub fn iter_hits(&self) -> impl Iterator { - self.opaque_hit.iter().chain(self.transparent_hits.iter()) + pub fn space_position(&self) -> Option { + self.hits.last().map(|hit| hit.space_position) } } -const RAY_T_EPSILON: f32 = f32::EPSILON; +/// Picking context in which picking is performed. +pub struct PickingContext { + /// Cursor position in the UI coordinate system. + pub pointer_in_ui: glam::Vec2, -struct PickingContext { - pointer_in_ui: glam::Vec2, - ray_in_world: macaw::Ray3, - ui_from_world: glam::Mat4, - max_side_ui_dist_sq: f32, -} + /// Cursor position on the renderer canvas in pixels. + pub pointer_in_pixel: glam::Vec2, -struct PickingState { - closest_opaque_side_ui_dist_sq: f32, - closest_opaque_pick: PickingRayHit, - transparent_hits: Vec, -} - -impl PickingState { - fn check_hit(&mut self, side_ui_dist_sq: f32, ray_hit: PickingRayHit, transparent: bool) { - let gap_to_closest_opaque = self.closest_opaque_pick.ray_t - ray_hit.ray_t; + /// Cursor position in the 2D space coordinate system. + /// + /// For 3D spaces this is equal to the cursor position in pixel coordinate system. + pub pointer_in_space2d: glam::Vec2, - // Use depth offset if very close to each other in relative distance. - if gap_to_closest_opaque.abs() - < self.closest_opaque_pick.ray_t.max(ray_hit.ray_t) * RAY_T_EPSILON - { - if ray_hit.depth_offset < self.closest_opaque_pick.depth_offset { - return; - } - } else if gap_to_closest_opaque < 0.0 { - return; - } + /// The picking ray used. Given in the coordinates of the space the picking is performed in. + pub ray_in_world: macaw::Ray3, +} - if side_ui_dist_sq <= self.closest_opaque_side_ui_dist_sq { - if transparent { - self.transparent_hits.push(ray_hit); - } else { - self.closest_opaque_pick = ray_hit; - self.closest_opaque_side_ui_dist_sq = side_ui_dist_sq; - } +impl PickingContext { + /// Radius in which cursor interactions may snap to the nearest object even if the cursor + /// does not hover it directly. + /// + /// Note that this needs to be scaled when zooming is applied by the virtual->visible ui rect transform. + pub const UI_INTERACTION_RADIUS: f32 = 5.0; + + pub fn new( + pointer_in_ui: egui::Pos2, + space2d_from_ui: eframe::emath::RectTransform, + ui_clip_rect: egui::Rect, + pixels_from_points: f32, + eye: &Eye, + ) -> PickingContext { + let pointer_in_space2d = space2d_from_ui.transform_pos(pointer_in_ui); + let pointer_in_space2d = glam::vec2(pointer_in_space2d.x, pointer_in_space2d.y); + let pointer_in_pixel = (pointer_in_ui - ui_clip_rect.left_top()) * pixels_from_points; + + PickingContext { + pointer_in_space2d, + pointer_in_pixel: glam::vec2(pointer_in_pixel.x, pointer_in_pixel.y), + pointer_in_ui: glam::vec2(pointer_in_ui.x, pointer_in_ui.y), + ray_in_world: eye.picking_ray(*space2d_from_ui.to(), pointer_in_space2d), } } - fn sort_and_remove_hidden_transparent(&mut self) { - // Sort from far to close - self.transparent_hits - .sort_by(|a, b| b.ray_t.partial_cmp(&a.ray_t).unwrap()); - - // Delete subset that is behind opaque hit. - if self.closest_opaque_pick.ray_t.is_finite() { - let mut num_hidden = 0; - for (i, transparent_hit) in self.transparent_hits.iter().enumerate() { - if transparent_hit.ray_t <= self.closest_opaque_pick.ray_t { - break; - } - num_hidden = i + 1; - } - self.transparent_hits.drain(0..num_hidden); - } + /// Performs picking for a given scene. + pub fn pick( + &self, + render_ctx: &re_renderer::RenderContext, + gpu_readback_identifier: re_renderer::GpuReadbackIdentifier, + previous_picking_result: &Option, + primitives: &SceneSpatialPrimitives, + ui_data: &SceneSpatialUiData, + ) -> PickingResult { + crate::profile_function!(); + + let mut hits = Vec::new(); + + // Start with gpu based picking as baseline. This is our prime source of picking information. + hits.extend(picking_gpu( + render_ctx, + gpu_readback_identifier, + self, + previous_picking_result, + )); + + // We also never throw away any textured rects, even if they're behind other objects. + let mut rect_hits = picking_textured_rects( + self, + &primitives.textured_rectangles, + &primitives.textured_rectangles_ids, + ); + rect_hits.sort_by(|a, b| b.depth_offset.cmp(&a.depth_offset)); + hits.extend(rect_hits); + + // UI rects are overlaid on top, but we don't let them hide other picking results either. + hits.extend(picking_ui_rects(self, ui_data)); + + PickingResult { hits } } } -#[allow(clippy::too_many_arguments)] -pub fn picking( +fn picking_gpu( render_ctx: &re_renderer::RenderContext, - gpu_readback_identifier: re_renderer::GpuReadbackIdentifier, + gpu_readback_identifier: u64, + context: &PickingContext, previous_picking_result: &Option, - pointer_in_ui: glam::Vec2, - ui_rect: &egui::Rect, - eye: &Eye, - primitives: &SceneSpatialPrimitives, - ui_data: &SceneSpatialUiData, - ui_interaction_radius: f32, -) -> PickingResult { +) -> Option { crate::profile_function!(); - let max_side_ui_dist_sq = ui_interaction_radius * ui_interaction_radius; - - let context = PickingContext { - pointer_in_ui, - ui_from_world: eye.ui_from_world(ui_rect), - ray_in_world: eye.picking_ray(ui_rect, pointer_in_ui), - max_side_ui_dist_sq, - }; - let mut state = PickingState { - closest_opaque_side_ui_dist_sq: max_side_ui_dist_sq, - closest_opaque_pick: PickingRayHit { - instance_path_hash: InstancePathHash::NONE, - ray_t: f32::INFINITY, - info: AdditionalPickingInfo::None, - depth_offset: 0, - used_gpu_picking: false, - }, - // Combined, sorted (and partially "hidden") by opaque results later. - transparent_hits: Vec::new(), - }; - - let SceneSpatialPrimitives { - bounding_box: _, - textured_rectangles, - textured_rectangles_ids, - line_strips, - points, - meshes, - depth_clouds: _, // no picking for depth clouds yet - any_outlines: _, - } = primitives; - - picking_points(&context, &mut state, points); - picking_lines(&context, &mut state, line_strips); - picking_meshes(&context, &mut state, meshes); - picking_textured_rects( - &context, - &mut state, - textured_rectangles, - textured_rectangles_ids, - ); - picking_ui_rects(&context, &mut state, ui_data); - - // GPU based picking. // Only look at newest available result, discard everything else. let mut gpu_picking_result = None; while let Some(picking_result) = @@ -203,160 +149,81 @@ pub fn picking( { gpu_picking_result = Some(picking_result); } - // TODO(andreas): Use gpu picking as fallback for now to fix meshes. Should combine instead! - if state.closest_opaque_pick.instance_path_hash == InstancePathHash::NONE { - if let Some(gpu_picking_result) = gpu_picking_result { - // TODO(andreas): Pick middle pixel for now. But we soon want to snap to the closest object using a bigger picking rect. - let rect = gpu_picking_result.rect; - let picked_id = gpu_picking_result.picking_data - [(rect.width() / 2 + (rect.height() / 2) * rect.width()) as usize]; - let picked_object = instance_path_hash_from_picking_layer_id(picked_id); - - // TODO(andreas): We're lacking depth information! - state.closest_opaque_pick.instance_path_hash = picked_object; - state.closest_opaque_pick.used_gpu_picking = true; - } else { - // It is possible that some frames we don't get a picking result and the frame after we get several. - // We need to cache the last picking result and use it until we get a new one or the mouse leaves the screen. - // (Andreas: On my mac this *actually* happens in very simple scenes, I get occasional frames with 0 and then with 2 picking results!) - if let Some(PickingResult { - opaque_hit: Some(previous_opaque_hit), - .. - }) = previous_picking_result - { - if previous_opaque_hit.used_gpu_picking { - state.closest_opaque_pick = previous_opaque_hit.clone(); - } - } - } - } - - state.sort_and_remove_hidden_transparent(); - - PickingResult { - opaque_hit: state - .closest_opaque_pick - .instance_path_hash - .is_some() - .then_some(state.closest_opaque_pick), - transparent_hits: state.transparent_hits, - picking_ray: context.ray_in_world, - } -} -fn picking_points( - context: &PickingContext, - state: &mut PickingState, - points: &re_renderer::PointCloudBuilder, -) { - crate::profile_function!(); - - for (batch, vertex_iter) in points.iter_vertices_and_userdata_by_batch() { - // For getting the closest point we could transform the mouse ray into the "batch space". - // However, we want to determine the closest point in *screen space*, meaning that we need to project all points. - let ui_from_batch = context.ui_from_world * batch.world_from_obj; - - for (point, instance_hash) in vertex_iter { - if instance_hash.is_none() { - continue; - } + if let Some(gpu_picking_result) = gpu_picking_result { + // First, figure out where on the rect the cursor is by now. + // (for simplicity, we assume the screen hasn't been resized) + let pointer_on_picking_rect = + context.pointer_in_pixel - gpu_picking_result.rect.left_top.as_vec2(); + // The cursor might have moved outside of the rect. Clamp it back in. + let pointer_on_picking_rect = pointer_on_picking_rect.clamp( + glam::Vec2::ZERO, + (gpu_picking_result.rect.extent - glam::UVec2::ONE).as_vec2(), + ); - // TODO(emilk): take point radius into account - let pos_in_ui = ui_from_batch.project_point3(point.position); - let dist_sq = pos_in_ui.truncate().distance_squared(context.pointer_in_ui); - if dist_sq <= state.closest_opaque_side_ui_dist_sq { - let t = context - .ray_in_world - .closest_t_to_point(batch.world_from_obj.transform_point3(point.position)); - state.check_hit( - dist_sq, - PickingRayHit::from_instance_and_t(*instance_hash, t), - false, - ); + // Find closest non-zero pixel to the cursor. + let mut picked_id = re_renderer::PickingLayerId::default(); + let mut picked_on_picking_rect = glam::Vec2::ZERO; + let mut closest_rect_distance_sq = f32::INFINITY; + + for (i, id) in gpu_picking_result.picking_id_data.iter().enumerate() { + if id.object.0 != 0 { + let current_pos_on_picking_rect = glam::uvec2( + i as u32 % gpu_picking_result.rect.extent.x, + i as u32 / gpu_picking_result.rect.extent.x, + ) + .as_vec2() + + glam::vec2(0.5, 0.5); // Use pixel center for distances. + let distance_sq = + current_pos_on_picking_rect.distance_squared(pointer_on_picking_rect); + if distance_sq < closest_rect_distance_sq { + picked_on_picking_rect = current_pos_on_picking_rect; + closest_rect_distance_sq = distance_sq; + picked_id = *id; + } } } - } -} - -fn picking_lines( - context: &PickingContext, - state: &mut PickingState, - line_strips: &re_renderer::LineStripSeriesBuilder, -) { - crate::profile_function!(); - - for (batch, vertices) in line_strips.iter_vertices_by_batch() { - // For getting the closest point we could transform the mouse ray into the "batch space". - // However, we want to determine the closest point in *screen space*, meaning that we need to project all points. - let ui_from_batch = context.ui_from_world * batch.world_from_obj; - - for (start, end) in vertices.tuple_windows() { - // Skip unconnected tuples. - if start.strip_index != end.strip_index { - continue; - } - - let instance_hash = line_strips.strip_user_data[start.strip_index as usize]; - if instance_hash.is_none() { - continue; - } - - // TODO(emilk): take line segment radius into account - let a = ui_from_batch.project_point3(start.position); - let b = ui_from_batch.project_point3(end.position); - let side_ui_dist_sq = line_segment_distance_sq_to_point_2d( - [a.truncate(), b.truncate()], - context.pointer_in_ui, - ); - - if side_ui_dist_sq < context.max_side_ui_dist_sq { - let start_world = batch.world_from_obj.transform_point3(start.position); - let end_world = batch.world_from_obj.transform_point3(end.position); - let t = ray_closest_t_line_segment(&context.ray_in_world, [start_world, end_world]); - - state.check_hit( - side_ui_dist_sq, - PickingRayHit::from_instance_and_t(instance_hash, t), - false, - ); - } + if picked_id == re_renderer::PickingLayerId::default() { + // Nothing found. + return None; } - } -} - -fn picking_meshes( - context: &PickingContext, - state: &mut PickingState, - meshes: &[super::MeshSource], -) { - crate::profile_function!(); - for mesh in meshes { - if !mesh.picking_instance_hash.is_some() { - continue; - } - let ray_in_mesh = (mesh.world_from_mesh.inverse() * context.ray_in_world).normalize(); - let t = crate::math::ray_bbox_intersect(&ray_in_mesh, mesh.mesh.bbox()); - - if t < 0.0 { - let side_ui_dist_sq = 0.0; - state.check_hit( - side_ui_dist_sq, - PickingRayHit::from_instance_and_t(mesh.picking_instance_hash, t), - false, - ); + let picked_world_position = + gpu_picking_result.picked_world_position(picked_on_picking_rect.as_uvec2()); + + Some(PickingRayHit { + instance_path_hash: instance_path_hash_from_picking_layer_id(picked_id), + space_position: picked_world_position, + depth_offset: 1, + hit_type: PickingHitType::GpuPickingResult, + }) + } else { + // It is possible that some frames we don't get a picking result and the frame after we get several. + // We need to cache the last picking result and use it until we get a new one or the mouse leaves the screen. + // (Andreas: On my mac this *actually* happens in very simple scenes, I get occasional frames with 0 and then with 2 picking results!) + if let Some(PickingResult { hits }) = previous_picking_result { + for previous_opaque_hit in hits.iter() { + if matches!( + previous_opaque_hit.hit_type, + PickingHitType::GpuPickingResult + ) { + return Some(previous_opaque_hit.clone()); + } + } } + None } } fn picking_textured_rects( context: &PickingContext, - state: &mut PickingState, textured_rectangles: &[re_renderer::renderer::TexturedRect], - textured_rectangles_ids: &[InstancePathHash], -) { + textured_rectangles_ids: &[EntityPathHash], +) -> Vec { crate::profile_function!(); + let mut hits = Vec::new(); + for (rect, id) in textured_rectangles .iter() .zip(textured_rectangles_ids.iter()) @@ -376,44 +243,50 @@ fn picking_textured_rects( if !intersect { continue; } - let intersection_world = context.ray_in_world.origin + context.ray_in_world.dir * t; + let intersection_world = context.ray_in_world.point_along(t); let dir_from_rect_top_left = intersection_world - rect.top_left_corner_position; let u = dir_from_rect_top_left.dot(rect.extent_u) / rect.extent_u.length_squared(); let v = dir_from_rect_top_left.dot(rect.extent_v) / rect.extent_v.length_squared(); if (0.0..=1.0).contains(&u) && (0.0..=1.0).contains(&v) { - let picking_hit = PickingRayHit { - instance_path_hash: *id, - ray_t: t, - info: AdditionalPickingInfo::TexturedRect(glam::vec2(u, v)), - depth_offset: rect.depth_offset, - used_gpu_picking: false, - }; - state.check_hit(0.0, picking_hit, rect.multiplicative_tint.a() < 1.0); + hits.push(PickingRayHit { + instance_path_hash: InstancePathHash { + entity_path_hash: *id, + instance_key: InstanceKey::from_2d_image_coordinate( + [ + (u * rect.colormapped_texture.texture.width() as f32) as u32, + (v * rect.colormapped_texture.texture.height() as f32) as u32, + ], + rect.colormapped_texture.texture.width() as u64, + ), + }, + space_position: intersection_world, + hit_type: PickingHitType::TexturedRect, + depth_offset: rect.options.depth_offset, + }); } } + + hits } fn picking_ui_rects( context: &PickingContext, - state: &mut PickingState, ui_data: &SceneSpatialUiData, -) { +) -> Option { crate::profile_function!(); - let egui_pos = egui::pos2(context.pointer_in_ui.x, context.pointer_in_ui.y); + let egui_pos = egui::pos2(context.pointer_in_space2d.x, context.pointer_in_space2d.y); for (bbox, instance_hash) in &ui_data.pickable_ui_rects { - let side_ui_dist_sq = bbox.distance_sq_to_pos(egui_pos); - state.check_hit( - side_ui_dist_sq, - PickingRayHit { + if bbox.contains(egui_pos) { + // Handle only a single ui rectangle (exit right away, ignore potential overlaps) + return Some(PickingRayHit { instance_path_hash: *instance_hash, - ray_t: 0.0, - info: AdditionalPickingInfo::GuiOverlay, + space_position: context.ray_in_world.origin, + hit_type: PickingHitType::GuiOverlay, depth_offset: 0, - used_gpu_picking: false, - }, - false, - ); + }); + } } + None } diff --git a/crates/re_viewer/src/ui/view_spatial/scene/primitives.rs b/crates/re_viewer/src/ui/view_spatial/scene/primitives.rs index cf86d5c6d2d7..5fbd4448ff79 100644 --- a/crates/re_viewer/src/ui/view_spatial/scene/primitives.rs +++ b/crates/re_viewer/src/ui/view_spatial/scene/primitives.rs @@ -1,5 +1,6 @@ use egui::Color32; use re_data_store::InstancePathHash; +use re_log_types::EntityPathHash; use re_renderer::{ renderer::{DepthClouds, MeshInstance}, LineStripSeriesBuilder, PointCloudBuilder, @@ -12,7 +13,7 @@ use super::MeshSource; /// Primitives sent off to `re_renderer`. /// (Some meta information still relevant to ui setup as well) /// -/// TODO(andreas): Right now we're using `re_renderer` data structures for reading (bounding box & picking). +/// TODO(andreas): Right now we're using `re_renderer` data structures for reading (bounding box). /// In the future, this will be more limited as we're going to gpu staging data as soon as possible /// which is very slow to read. See [#594](https://github.com/rerun-io/rerun/pull/594) pub struct SceneSpatialPrimitives { @@ -21,11 +22,11 @@ pub struct SceneSpatialPrimitives { // TODO(andreas): Storing extra data like so is unsafe and not future proof either // (see also above comment on the need to separate cpu-readable data) - pub textured_rectangles_ids: Vec, + pub textured_rectangles_ids: Vec, pub textured_rectangles: Vec, - pub line_strips: LineStripSeriesBuilder, - pub points: PointCloudBuilder, + pub line_strips: LineStripSeriesBuilder, + pub points: PointCloudBuilder, pub meshes: Vec, pub depth_clouds: DepthClouds, @@ -93,7 +94,7 @@ impl SceneSpatialPrimitives { line_strips, points, meshes, - depth_clouds: _, // no bbox for depth clouds + depth_clouds, any_outlines: _, } = self; @@ -133,6 +134,10 @@ impl SceneSpatialPrimitives { *bounding_box = bounding_box.union(mesh.mesh.bbox().transform_affine3(&mesh.world_from_mesh)); } + + for cloud in &depth_clouds.clouds { + *bounding_box = bounding_box.union(cloud.bbox()); + } } pub fn mesh_instances(&self) -> Vec { @@ -173,7 +178,13 @@ impl SceneSpatialPrimitives { let line_radius = re_renderer::Size::new_scene(axis_length * 0.05); let origin = transform.translation(); - let mut line_batch = self.line_strips.batch("origin axis"); + let picking_layer_id = picking_layer_id_from_instance_path_hash(instance_path_hash); + + let mut line_batch = self + .line_strips + .batch("origin axis") + .picking_object_id(picking_layer_id.object); + line_batch .add_segment( origin, @@ -182,7 +193,7 @@ impl SceneSpatialPrimitives { .radius(line_radius) .color(AXIS_COLOR_X) .flags(LineStripFlags::CAP_END_TRIANGLE | LineStripFlags::CAP_START_ROUND) - .user_data(instance_path_hash); + .picking_instance_id(picking_layer_id.instance); line_batch .add_segment( origin, @@ -191,7 +202,7 @@ impl SceneSpatialPrimitives { .radius(line_radius) .color(AXIS_COLOR_Y) .flags(LineStripFlags::CAP_END_TRIANGLE | LineStripFlags::CAP_START_ROUND) - .user_data(instance_path_hash); + .picking_instance_id(picking_layer_id.instance); line_batch .add_segment( origin, @@ -200,6 +211,6 @@ impl SceneSpatialPrimitives { .radius(line_radius) .color(AXIS_COLOR_Z) .flags(LineStripFlags::CAP_END_TRIANGLE | LineStripFlags::CAP_START_ROUND) - .user_data(instance_path_hash); + .picking_instance_id(picking_layer_id.instance); } } diff --git a/crates/re_viewer/src/ui/view_spatial/scene/scene_part/arrows3d.rs b/crates/re_viewer/src/ui/view_spatial/scene/scene_part/arrows3d.rs index eb1cecb63b26..f40ca9025684 100644 --- a/crates/re_viewer/src/ui/view_spatial/scene/scene_part/arrows3d.rs +++ b/crates/re_viewer/src/ui/view_spatial/scene/scene_part/arrows3d.rs @@ -1,5 +1,5 @@ use glam::Mat4; -use re_data_store::{EntityPath, EntityProperties}; +use re_data_store::EntityPath; use re_log_types::{ component_types::{ColorRGBA, InstanceKey, Label, Radius}, Arrow3D, Component, @@ -12,16 +12,13 @@ use crate::{ ui::{scene::SceneQuery, view_spatial::SceneSpatial, DefaultColor}, }; -use super::{instance_path_hash_for_picking, ScenePart}; +use super::{instance_key_to_picking_id, ScenePart}; pub struct Arrows3DPart; impl Arrows3DPart { - #[allow(clippy::too_many_arguments)] fn process_entity_view( scene: &mut SceneSpatial, - _query: &SceneQuery<'_>, - props: &EntityProperties, entity_view: &EntityView, ent_path: &EntityPath, world_from_obj: Mat4, @@ -39,21 +36,14 @@ impl Arrows3DPart { .line_strips .batch("arrows") .world_from_obj(world_from_obj) - .outline_mask_ids(entity_highlight.overall); + .outline_mask_ids(entity_highlight.overall) + .picking_object_id(re_renderer::PickingLayerObjectId(ent_path.hash64())); let visitor = |instance_key: InstanceKey, arrow: Arrow3D, color: Option, radius: Option, _label: Option