Skip to content

Commit

Permalink
DataStore changelog 2: introduce StoreEvents (#4203)
Browse files Browse the repository at this point in the history
Introduces `StoreEvent`, an event that describes the atomic unit of
change in the Rerun `DataStore`: a row has been added to or removed from
the store.

`StoreEvent`s are fired on both the insertion and garbage collection
paths, enabling listeners to build arbitrary, always up-to-date views &
trigger systems.

```rust
/// The atomic unit of change in the Rerun [`DataStore`].
///
/// A [`StoreEvent`] describes the changes caused by the addition or deletion of a
/// [`re_log_types::DataRow`] in the store.
///
/// Methods that mutate the [`DataStore`], such as [`DataStore::insert_row`] and [`DataStore::gc`],
/// return [`StoreEvent`]s that describe the changes.
///
/// Refer to field-level documentation for more details and check out [`StoreDiff`] for a precise
/// definition of what an event involves.
#[derive(Debug, Clone, PartialEq)]
pub struct StoreEvent {
    /// Which [`DataStore`] sent this event?
    pub store_id: StoreId,

    /// What was the store's generation when it sent that event?
    pub store_generation: StoreGeneration,

    /// Monotonically increasing ID of the event.
    ///
    /// This is on a per-store basis.
    ///
    /// When handling a [`StoreEvent`], if this is the first time you process this [`StoreId`] and
    /// the associated `event_id` is not `1`, it means you registered late and missed some updates.
    pub event_id: u64,

    /// What actually changed?
    ///
    /// Refer to [`StoreDiff`] for more information.
    pub diff: StoreDiff,
}

/// Describes an atomic change in the Rerun [`DataStore`]: a row has been added or deleted.
///
/// From a query model standpoint, the [`DataStore`] _always_ operates one row at a time:
/// - The contents of a row (i.e. its columns) are immutable past insertion, by virtue of
///   [`RowId`]s being unique and non-reusable.
/// - Similarly, garbage collection always removes _all the data_ associated with a row in one go:
///   there cannot be orphaned columns. When a row is gone, all data associated with it is gone too.
///
/// Refer to field-level documentation for more information.
#[derive(Debug, Clone, PartialEq)]
pub struct StoreDiff {
    /// Addition or deletion?
    ///
    /// The store's internals are opaque and don't necessarily reflect the query model (e.g. there
    /// might be data in the store that cannot by reached by any query).
    ///
    /// A [`StoreDiff`] answers a logical question: "does there exist a query path which can return
    /// data from that row?".
    pub kind: StoreDiffKind,

    /// What's the row's [`RowId`]?
    ///
    /// [`RowId`]s are guaranteed to be unique within a single [`DataStore`].
    ///
    /// Put another way, the same [`RowId`] can only appear twice in a [`StoreDiff`] event:
    /// one addition and (optionally) one deletion (in that order!).
    pub row_id: RowId,

    /// The [`TimePoint`] associated with that row.
    ///
    /// Since insertions and deletions both work on a row-level basis, this is guaranteed to be the
    /// same value for both the insertion and deletion events (if any).
    pub timepoint: TimePoint,

    /// The [`EntityPath`] associated with that row.
    ///
    /// Since insertions and deletions both work on a row-level basis, this is guaranteed to be the
    /// same value for both the insertion and deletion events (if any).
    pub entity_path: EntityPath,

    /// All the [`DataCell`]s associated with that row.
    ///
    /// Since insertions and deletions both work on a row-level basis, this is guaranteed to be the
    /// same set of values for both the insertion and deletion events (if any).
    pub cells: IntMap<ComponentName, DataCell>,
}
```


---

`DataStore` changelog PR series:
- #4202
- #4203
- #4205
- #4206
- #4208
- #4209
  • Loading branch information
teh-cmc authored Nov 15, 2023
1 parent d0a42df commit 8263309
Show file tree
Hide file tree
Showing 10 changed files with 743 additions and 112 deletions.
1 change: 1 addition & 0 deletions crates/re_arrow_store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ polars-ops = { workspace = true, optional = true, features = [


[dev-dependencies]
re_log_types = { workspace = true, features = ["testing"] }
re_types = { workspace = true, features = ["datagen"] }

anyhow.workspace = true
Expand Down
2 changes: 2 additions & 0 deletions crates/re_arrow_store/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ mod arrow_util;
mod store;
mod store_arrow;
mod store_dump;
mod store_event;
mod store_format;
mod store_gc;
mod store_helpers;
Expand All @@ -37,6 +38,7 @@ pub mod test_util;

pub use self::arrow_util::ArrayExt;
pub use self::store::{DataStore, DataStoreConfig, StoreGeneration};
pub use self::store_event::{StoreDiff, StoreDiffKind, StoreEvent};
pub use self::store_gc::{Deleted, GarbageCollectionOptions, GarbageCollectionTarget};
pub use self::store_helpers::VersionedComponent;
pub use self::store_read::{LatestAtQuery, RangeQuery};
Expand Down
7 changes: 6 additions & 1 deletion crates/re_arrow_store/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ use ahash::HashMap;
use arrow2::datatypes::DataType;
use nohash_hasher::IntMap;
use parking_lot::RwLock;
use re_types_core::{ComponentName, ComponentNameSet, SizeBytes};
use smallvec::SmallVec;

use re_log_types::{
DataCell, DataCellColumn, EntityPath, EntityPathHash, ErasedTimeVec, NumInstancesVec, RowId,
RowIdVec, StoreId, TimeInt, TimePoint, TimeRange, Timeline,
};
use re_types_core::{ComponentName, ComponentNameSet, SizeBytes};

// --- Data store ---

Expand Down Expand Up @@ -229,6 +229,9 @@ pub struct DataStore {

/// Monotonically increasing ID for GCs.
pub(crate) gc_id: u64,

/// Monotonically increasing ID for store events.
pub(crate) event_id: AtomicU64,
}

impl Clone for DataStore {
Expand All @@ -245,6 +248,7 @@ impl Clone for DataStore {
insert_id: Default::default(),
query_id: Default::default(),
gc_id: Default::default(),
event_id: Default::default(),
}
}
}
Expand All @@ -264,6 +268,7 @@ impl DataStore {
insert_id: 0,
query_id: AtomicU64::new(0),
gc_id: 0,
event_id: AtomicU64::new(0),
}
}

Expand Down
Loading

1 comment on commit 8263309

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'Rust Benchmark'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.25.

Benchmark suite Current: 8263309 Previous: 0be6185 Ratio
arrow_mono_strings2/insert 2529475251 ns/iter (± 8749539) 1982567456 ns/iter (± 6930605) 1.28
arrow_batch_points2/insert 1574266 ns/iter (± 6633) 1228434 ns/iter (± 5466) 1.28
arrow_batch_strings2/insert 1608494 ns/iter (± 14770) 1244321 ns/iter (± 11847) 1.29

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.