diff --git a/src/graph.rs b/src/graph.rs index eba15f14d..32f94b9fb 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -1,6 +1,11 @@ use self::graph_data_memory::GraphDataMemory; use self::graph_impl::GraphImpl; +mod dictionary; +mod dictionary_data; +mod dictionary_data_memory; +mod dictionary_impl; +mod dictionary_value; mod graph_data; mod graph_data_memory; mod graph_data_storage; diff --git a/src/graph/dictionary.rs b/src/graph/dictionary.rs new file mode 100644 index 000000000..fb29a867c --- /dev/null +++ b/src/graph/dictionary.rs @@ -0,0 +1,249 @@ +use super::dictionary_data_memory::DictionaryDataMemory; +use super::dictionary_impl::DictionaryImpl; +use super::dictionary_value::DictionaryValue; +use crate::storage::HashMultiMap; +use crate::storage::Serialize; +use crate::storage::StableHash; + +pub(crate) type Dictionary = DictionaryImpl>; + +#[allow(dead_code)] +impl Dictionary +where + T: Clone + Default + Eq + PartialEq + StableHash + Serialize, +{ + pub(crate) fn new() -> Dictionary { + Dictionary { + data: DictionaryDataMemory:: { + index: HashMultiMap::::new(), + values: vec![DictionaryValue:: { + meta: 0, + hash: 0, + value: T::default(), + }], + }, + phantom_data: std::marker::PhantomData, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Clone, Default, Eq, PartialEq)] + struct CollidedValue { + pub(super) value: i64, + } + + impl Serialize for CollidedValue { + fn deserialize(bytes: &[u8]) -> Result { + Ok(CollidedValue { + value: i64::deserialize(bytes)?, + }) + } + + fn serialize(&self) -> Vec { + self.value.serialize() + } + } + + impl StableHash for CollidedValue { + fn stable_hash(&self) -> u64 { + 1 + } + } + + #[test] + fn collided_value() { + let value = CollidedValue { value: 10 }; + let bytes = value.serialize(); + let other = CollidedValue::deserialize(&bytes).unwrap(); + + assert!(value == other); + } + + #[test] + fn count_invalid_index() { + let dictionary = Dictionary::::new(); + + assert_eq!(dictionary.count(-1), Ok(None)); + } + + #[test] + fn index() { + let mut dictionary = Dictionary::::new(); + + let index = dictionary.insert(&10).unwrap(); + + assert_eq!(dictionary.index(&10), Ok(Some(index))); + } + + #[test] + fn index_missing_value() { + let dictionary = Dictionary::::new(); + + assert_eq!(dictionary.index(&10), Ok(None)); + } + + #[test] + fn index_removed_value() { + let mut dictionary = Dictionary::::new(); + + let index = dictionary.insert(&10).unwrap(); + dictionary.remove(index).unwrap(); + + assert_eq!(dictionary.index(&10), Ok(None)); + } + + #[test] + fn index_reuse() { + let mut dictionary = Dictionary::::new(); + + let index1 = dictionary.insert(&5).unwrap(); + let index2 = dictionary.insert(&10).unwrap(); + let index3 = dictionary.insert(&7).unwrap(); + + dictionary.remove(index2).unwrap(); + dictionary.remove(index1).unwrap(); + dictionary.remove(index3).unwrap(); + + assert_eq!(dictionary.count(index1), Ok(None)); + assert_eq!(dictionary.count(index2), Ok(None)); + assert_eq!(dictionary.count(index3), Ok(None)); + + assert_eq!(dictionary.insert(&3), Ok(index3)); + assert_eq!(dictionary.insert(&2), Ok(index1)); + assert_eq!(dictionary.insert(&1), Ok(index2)); + + assert_eq!(dictionary.value(index1), Ok(Some(2))); + assert_eq!(dictionary.value(index2), Ok(Some(1))); + assert_eq!(dictionary.value(index3), Ok(Some(3))); + } + + #[test] + fn index_with_collisions() { + let mut dictionary = Dictionary::::new(); + + let index1 = dictionary.insert(&CollidedValue { value: 1 }).unwrap(); + let index2 = dictionary.insert(&CollidedValue { value: 2 }).unwrap(); + let index3 = dictionary.insert(&CollidedValue { value: 3 }).unwrap(); + + assert_eq!( + dictionary.index(&CollidedValue { value: 1 }), + Ok(Some(index1)) + ); + + assert_eq!( + dictionary.index(&CollidedValue { value: 2 }), + Ok(Some(index2)) + ); + + assert_eq!( + dictionary.index(&CollidedValue { value: 3 }), + Ok(Some(index3)) + ); + } + + #[test] + fn insert() { + let mut dictionary = Dictionary::::new(); + + let index = dictionary.insert(&10).unwrap(); + + assert_eq!(dictionary.len(), Ok(1)); + assert_eq!(dictionary.value(index), Ok(Some(10_i64))); + assert_eq!(dictionary.count(index), Ok(Some(1))); + } + + #[test] + fn insert_multiple() { + let mut dictionary = Dictionary::::new(); + + let index1 = dictionary.insert(&10).unwrap(); + let index2 = dictionary.insert(&15).unwrap(); + let index3 = dictionary.insert(&20).unwrap(); + + assert_eq!(dictionary.len(), Ok(3)); + + assert_eq!(dictionary.value(index1).unwrap(), Some(10_i64)); + assert_eq!(dictionary.count(index1), Ok(Some(1))); + + assert_eq!(dictionary.value(index2).unwrap(), Some(15_i64)); + assert_eq!(dictionary.count(index2), Ok(Some(1))); + + assert_eq!(dictionary.value(index3).unwrap(), Some(20_i64)); + assert_eq!(dictionary.count(index3), Ok(Some(1))); + } + + #[test] + fn insert_same() { + let mut dictionary = Dictionary::::new(); + + dictionary.insert(&10).unwrap(); + + let index2 = dictionary.insert(&15).unwrap(); + + assert_eq!(dictionary.insert(&15).unwrap(), index2); + assert_eq!(dictionary.insert(&15).unwrap(), index2); + + dictionary.insert(&20).unwrap(); + + assert_eq!(dictionary.len(), Ok(3)); + assert_eq!(dictionary.count(index2), Ok(Some(3))); + } + + #[test] + fn remove() { + let mut dictionary = Dictionary::::new(); + + let index = dictionary.insert(&10).unwrap(); + dictionary.remove(index).unwrap(); + + assert_eq!(dictionary.value(index), Ok(None)); + assert_eq!(dictionary.count(index), Ok(None)); + } + + #[test] + fn remove_duplicated() { + let mut dictionary = Dictionary::::new(); + + let index = dictionary.insert(&10).unwrap(); + dictionary.insert(&10).unwrap(); + dictionary.insert(&10).unwrap(); + + assert_eq!(dictionary.value(index), Ok(Some(10))); + assert_eq!(dictionary.count(index), Ok(Some(3))); + + dictionary.remove(index).unwrap(); + + assert_eq!(dictionary.value(index), Ok(Some(10))); + assert_eq!(dictionary.count(index), Ok(Some(2))); + + dictionary.remove(index).unwrap(); + dictionary.remove(index).unwrap(); + + assert_eq!(dictionary.value(index), Ok(None)); + assert_eq!(dictionary.count(index), Ok(None)); + } + + #[test] + fn remove_missing() { + let mut dictionary = Dictionary::::new(); + + let index = dictionary.insert(&10).unwrap(); + + assert_eq!(dictionary.len(), Ok(1)); + + dictionary.remove(index + 1).unwrap(); + + assert_eq!(dictionary.len(), Ok(1)); + } + + #[test] + fn value_missing_index() { + let mut dictionary = Dictionary::::new(); + + assert_eq!(dictionary.value(1), Ok(None)); + } +} diff --git a/src/graph/dictionary_data.rs b/src/graph/dictionary_data.rs new file mode 100644 index 000000000..0a8bd4a75 --- /dev/null +++ b/src/graph/dictionary_data.rs @@ -0,0 +1,22 @@ +use super::dictionary_value::DictionaryValue; +use crate::storage::Serialize; +use crate::storage::StableHash; +use crate::DbError; + +pub(crate) trait DictionaryData +where + T: Clone + Default + Eq + PartialEq + StableHash + Serialize, +{ + fn capacity(&self) -> u64; + fn commit(&mut self) -> Result<(), DbError>; + fn indexes(&self, hash: u64) -> Result, DbError>; + fn insert(&mut self, hash: u64, index: i64) -> Result<(), DbError>; + fn hash(&self, index: i64) -> Result; + fn meta(&self, index: i64) -> Result; + fn remove(&mut self, hash: u64, index: i64) -> Result<(), DbError>; + fn set_hash(&mut self, index: i64, hash: u64) -> Result<(), DbError>; + fn set_meta(&mut self, index: i64, meta: i64) -> Result<(), DbError>; + fn set_value(&mut self, index: i64, value: DictionaryValue) -> Result<(), DbError>; + fn transaction(&mut self); + fn value(&self, index: i64) -> Result, DbError>; +} diff --git a/src/graph/dictionary_data_memory.rs b/src/graph/dictionary_data_memory.rs new file mode 100644 index 000000000..0eb370858 --- /dev/null +++ b/src/graph/dictionary_data_memory.rs @@ -0,0 +1,77 @@ +use super::dictionary_data::DictionaryData; +use super::dictionary_value::DictionaryValue; +use crate::storage::HashMultiMap; +use crate::storage::Serialize; +use crate::storage::StableHash; +use crate::DbError; + +pub(crate) struct DictionaryDataMemory +where + T: Clone + Default + Eq + PartialEq + StableHash + Serialize, +{ + pub(super) index: HashMultiMap, + pub(super) values: Vec>, +} + +impl DictionaryData for DictionaryDataMemory +where + T: Clone + Default + Eq + PartialEq + StableHash + Serialize, +{ + fn capacity(&self) -> u64 { + self.values.len() as u64 + } + + fn commit(&mut self) -> Result<(), DbError> { + Ok(()) + } + + fn indexes(&self, hash: u64) -> Result, DbError> { + self.index.values(&hash) + } + + fn insert(&mut self, hash: u64, index: i64) -> Result<(), DbError> { + self.index.insert(hash, index) + } + + fn hash(&self, index: i64) -> Result { + Ok(self.values[index as usize].hash) + } + + fn meta(&self, index: i64) -> Result { + Ok(self.values[index as usize].meta) + } + + fn remove(&mut self, hash: u64, index: i64) -> Result<(), DbError> { + self.index.remove_value(&hash, &index)?; + + Ok(()) + } + + fn set_hash(&mut self, index: i64, hash: u64) -> Result<(), DbError> { + self.values[index as usize].hash = hash; + + Ok(()) + } + + fn set_meta(&mut self, index: i64, meta: i64) -> Result<(), DbError> { + self.values[index as usize].meta = meta; + + Ok(()) + } + + fn set_value(&mut self, index: i64, value: DictionaryValue) -> Result<(), DbError> { + if self.capacity() == index as u64 { + self.values.push(value); + } else { + self.values[index as usize] = value; + } + + Ok(()) + } + + fn transaction(&mut self) {} + + fn value(&self, index: i64) -> Result, crate::DbError> { + Ok(self.values[index as usize].clone()) + } +} diff --git a/src/graph/dictionary_impl.rs b/src/graph/dictionary_impl.rs new file mode 100644 index 000000000..593d11d1c --- /dev/null +++ b/src/graph/dictionary_impl.rs @@ -0,0 +1,161 @@ +use super::dictionary_data::DictionaryData; +use super::dictionary_value::DictionaryValue; +use crate::storage::Serialize; +use crate::storage::StableHash; +use crate::DbError; + +pub(crate) struct DictionaryImpl +where + T: Clone + Default + Eq + PartialEq + StableHash + Serialize, + Data: DictionaryData, +{ + pub(super) data: Data, + pub(super) phantom_data: std::marker::PhantomData, +} + +#[allow(dead_code)] +impl DictionaryImpl +where + T: Clone + Default + Eq + PartialEq + StableHash + Serialize, + Data: DictionaryData, +{ + pub(crate) fn count(&self, index: i64) -> Result, DbError> { + if self.is_valid_index(index) { + let value = self.data.meta(index)?; + + if 0 < value { + return Ok(Some(value as u64)); + } + } + + Ok(None) + } + + pub(crate) fn len(&self) -> Result { + self.data.hash(0) + } + + pub(crate) fn index(&self, value: &T) -> Result, DbError> { + let hash = value.stable_hash(); + + if let Some(value) = self.find_value(hash, value)? { + return Ok(Some(value.0)); + } + + Ok(None) + } + + pub(crate) fn insert(&mut self, value: &T) -> Result { + let hash = value.stable_hash(); + let index; + + self.data.transaction(); + + if let Some(value) = self.find_value(hash, value)? { + index = value.0; + self.data.set_meta(index, value.1 + 1)?; + } else { + index = self.insert_new(hash, value)?; + } + + self.data.commit()?; + + Ok(index) + } + + pub(crate) fn remove(&mut self, index: i64) -> Result<(), DbError> { + if self.is_valid_index(index) { + let value = self.data.meta(index)?; + + self.data.transaction(); + + if value == 1 { + self.remove_value(index)? + } else { + self.data.set_meta(index, value - 1)? + } + + self.data.commit()?; + } + + Ok(()) + } + + pub(crate) fn value(&mut self, index: i64) -> Result, DbError> { + if self.is_valid_index(index) { + let value = self.data.value(index)?; + + if 0 < value.meta { + return Ok(Some(value.value)); + } + } + + Ok(None) + } + + fn find_value(&self, hash: u64, value: &T) -> Result, DbError> { + for index in self.data.indexes(hash)? { + let dictionary_value = self.data.value(index)?; + + if dictionary_value.value == *value { + return Ok(Some((index, dictionary_value.meta))); + } + } + + Ok(None) + } + + fn free_index(&mut self, index: i64) -> Result<(), DbError> { + let next_free_index = self.data.meta(0)?; + self.data.set_meta(index, next_free_index)?; + self.data.set_meta(0, -index) + } + + fn get_free_index(&mut self) -> Result { + let mut free_index = -self.data.meta(0)?; + + if free_index == 0 { + free_index = self.data.capacity() as i64; + } else { + let next_free_index = self.data.meta(free_index)?; + self.data.set_meta(0, next_free_index)?; + } + + Ok(free_index) + } + + fn insert_new(&mut self, hash: u64, value: &T) -> Result { + let index = self.get_free_index()?; + + self.data.insert(hash, index)?; + self.data.set_value( + index, + DictionaryValue:: { + meta: 1, + hash, + value: value.clone(), + }, + )?; + + let len = self.len()?; + self.data.set_hash(0, len + 1)?; + + Ok(index) + } + + fn is_valid_index(&self, index: i64) -> bool { + 0 < index && index < self.data.capacity() as i64 + } + + fn remove_value(&mut self, index: i64) -> Result<(), DbError> { + let hash = self.data.hash(index)?; + self.data.remove(hash, index)?; + + self.free_index(index)?; + + let len = self.len()?; + self.data.set_hash(0, len - 1)?; + + Ok(()) + } +} diff --git a/src/graph/dictionary_value.rs b/src/graph/dictionary_value.rs new file mode 100644 index 000000000..5cba5d825 --- /dev/null +++ b/src/graph/dictionary_value.rs @@ -0,0 +1,50 @@ +use crate::storage::Serialize; +use crate::storage::StableHash; +use crate::DbError; + +#[derive(Clone, Default, PartialEq, Eq)] +pub(crate) struct DictionaryValue +where + T: Clone + Default + Eq + PartialEq + StableHash + Serialize, +{ + pub(super) meta: i64, + pub(super) hash: u64, + pub(super) value: T, +} + +impl Serialize for DictionaryValue +where + T: Clone + Default + Eq + PartialEq + StableHash + Serialize, +{ + fn deserialize(bytes: &[u8]) -> Result { + Ok(DictionaryValue:: { + meta: i64::deserialize(bytes)?, + hash: u64::deserialize(&bytes[(i64::serialized_size() as usize)..])?, + value: T::deserialize( + &bytes[((i64::serialized_size() + u64::serialized_size()) as usize)..], + )?, + }) + } + + fn serialize(&self) -> Vec { + let mut bytes = self.meta.serialize(); + bytes.extend(self.hash.serialize()); + bytes.extend(self.value.serialize()); + + bytes + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn serialize() { + let value = DictionaryValue::::default(); + let bytes = value.serialize(); + let other = DictionaryValue::::deserialize(&bytes).unwrap(); + + assert!(other == value); + } +} diff --git a/src/storage.rs b/src/storage.rs index 2411ce5f5..e61bc467e 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -30,13 +30,16 @@ use write_ahead_log_record::WriteAheadLogRecord; #[allow(unused_imports)] pub(crate) use file_storage::FileStorage; pub(crate) use file_storage_data::FileStorageData; +#[allow(unused_imports)] +pub(crate) use hash_multi_map::HashMultiMap; pub(crate) use serialize::Serialize; #[allow(unused_imports)] pub(crate) use stable_hash::StableHash; pub(crate) use storage_data::StorageData; -#[allow(unused_imports)] pub(crate) use storage_hash_map::StorageHashMap; #[allow(unused_imports)] +pub(crate) use storage_hash_multi_map::StorageHashMultiMap; +#[allow(unused_imports)] pub(crate) use storage_vec::StorageVec; pub(crate) struct Storage {