Skip to content

Commit

Permalink
perf(hstr): Use thin arc for hash and length (#10033)
Browse files Browse the repository at this point in the history
**Description:**

This would improve performance and reduce allocation.

**Related issue:**

 - Closes #10030
  • Loading branch information
kdy1 authored Feb 14, 2025
1 parent ffb7734 commit 2bea793
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 49 deletions.
6 changes: 6 additions & 0 deletions .changeset/eight-steaks-yawn.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
swc_core: minor
hstr: minor
---

perf(hstr): Use thin arc for hash and length
76 changes: 38 additions & 38 deletions crates/hstr/src/dynamic.rs
Original file line number Diff line number Diff line change
@@ -1,64 +1,62 @@
use std::{
borrow::Cow,
fmt::Debug,
ffi::c_void,
hash::{BuildHasherDefault, Hash, Hasher},
mem::ManuallyDrop,
ops::Deref,
ptr::NonNull,
};

use rustc_hash::FxHasher;
use triomphe::Arc;
use triomphe::{HeaderWithLength, ThinArc};

use crate::{
tagged_value::{TaggedValue, MAX_INLINE_LEN},
Atom, INLINE_TAG_INIT, LEN_OFFSET, TAG_MASK,
};

#[derive(Debug)]
pub(crate) struct Entry {
pub string: Box<str>,
pub(crate) struct Metadata {
pub hash: u64,
}

impl Entry {
pub unsafe fn cast(ptr: TaggedValue) -> *const Entry {
ptr.get_ptr().cast()
}
#[derive(Clone)]
pub(crate) struct Item(ThinArc<HeaderWithLength<Metadata>, u8>);

pub unsafe fn deref_from<'i>(ptr: TaggedValue) -> &'i Entry {
&*Self::cast(ptr)
}
impl Deref for Item {
type Target = <ThinArc<HeaderWithLength<Metadata>, u8> as Deref>::Target;

pub unsafe fn restore_arc(v: TaggedValue) -> Arc<Entry> {
let ptr = v.get_ptr() as *const Entry;
Arc::from_raw(ptr)
fn deref(&self) -> &Self::Target {
&self.0
}
}

impl PartialEq for Entry {
fn eq(&self, other: &Self) -> bool {
// Assumption: `store_id` and `alias` don't matter for equality within a single
// store (what we care about here). Compare hash first because that's cheaper.
self.hash == other.hash && self.string == other.string
/// TODO: Use real weak pointer
type WeakItem = Item;

impl Hash for Item {
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u64(self.0.header.header.header.hash);
}
}

impl Eq for Entry {}
pub(crate) unsafe fn deref_from(ptr: TaggedValue) -> ManuallyDrop<Item> {
let item = restore_arc(ptr);

impl Hash for Entry {
fn hash<H: Hasher>(&self, state: &mut H) {
// Assumption: type H is an EntryHasher
state.write_u64(self.hash);
}
ManuallyDrop::new(item)
}

pub(crate) unsafe fn restore_arc(v: TaggedValue) -> Item {
let ptr = v.get_ptr();
Item(ThinArc::from_raw(ptr))
}

/// A store that stores [Atom]s. Can be merged with other [AtomStore]s for
/// better performance.
///
///
/// # Merging [AtomStore]
#[derive(Debug)]
pub struct AtomStore {
pub(crate) data: hashbrown::HashMap<Arc<Entry>, (), BuildEntryHasher>,
pub(crate) data: hashbrown::HashMap<WeakItem, (), BuildEntryHasher>,
}

impl Default for AtomStore {
Expand Down Expand Up @@ -96,11 +94,11 @@ where

let hash = calc_hash(&text);
let entry = storage.insert_entry(text, hash);
let entry = Arc::into_raw(entry);
let entry = ThinArc::into_raw(entry.0) as *mut c_void;

let ptr: NonNull<Entry> = unsafe {
let ptr: NonNull<c_void> = unsafe {
// Safety: Arc::into_raw returns a non-null pointer
NonNull::new_unchecked(entry as *mut Entry)
NonNull::new_unchecked(entry)
};
debug_assert!(0 == ptr.as_ptr() as u8 & TAG_MASK);
Atom {
Expand All @@ -109,22 +107,24 @@ where
}

pub(crate) trait Storage {
fn insert_entry(self, text: Cow<str>, hash: u64) -> Arc<Entry>;
fn insert_entry(self, text: Cow<str>, hash: u64) -> Item;
}

impl Storage for &'_ mut AtomStore {
#[inline(never)]
fn insert_entry(self, text: Cow<str>, hash: u64) -> Arc<Entry> {
fn insert_entry(self, text: Cow<str>, hash: u64) -> Item {
let (entry, _) = self
.data
.raw_entry_mut()
.from_hash(hash, |key| key.hash == hash && *key.string == *text)
.from_hash(hash, |key| {
key.header.header.header.hash == hash && key.slice == *text.as_bytes()
})
.or_insert_with(move || {
(
Arc::new(Entry {
string: text.into_owned().into_boxed_str(),
hash,
}),
Item(ThinArc::from_header_and_slice(
HeaderWithLength::new(Metadata { hash }, text.len()),
text.as_bytes(),
)),
(),
)
});
Expand Down
32 changes: 21 additions & 11 deletions crates/hstr/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
#![cfg_attr(feature = "atom_size_128", feature(integer_atomics))]
//! See [Atom] for more information.
use core::str;
use std::{
fmt::{Debug, Display},
hash::Hash,
mem::{self, forget},
mem::{self, forget, transmute},
num::NonZeroU8,
ops::Deref,
str::from_utf8_unchecked,
};

use debug_unreachable::debug_unreachable;
use once_cell::sync::Lazy;
use tagged_value::TaggedValue;

pub use crate::dynamic::AtomStore;
use crate::dynamic::Entry;
use crate::tagged_value::TaggedValue;

mod dynamic;
mod global_store;
Expand Down Expand Up @@ -233,7 +234,13 @@ impl Atom {
#[inline(never)]
fn get_hash(&self) -> u64 {
match self.tag() {
DYNAMIC_TAG => unsafe { Entry::deref_from(self.unsafe_data) }.hash,
DYNAMIC_TAG => {
unsafe { crate::dynamic::deref_from(self.unsafe_data) }
.header
.header
.header
.hash
}
STATIC_TAG => {
todo!("static hash")
}
Expand All @@ -249,7 +256,10 @@ impl Atom {
#[inline(never)]
fn as_str(&self) -> &str {
match self.tag() {
DYNAMIC_TAG => &unsafe { Entry::deref_from(self.unsafe_data) }.string,
DYNAMIC_TAG => unsafe {
let item = crate::dynamic::deref_from(self.unsafe_data);
from_utf8_unchecked(transmute::<&[u8], &'static [u8]>(&item.slice))
},
STATIC_TAG => {
todo!("static as_str")
}
Expand Down Expand Up @@ -277,14 +287,14 @@ impl PartialEq for Atom {
}

if self.is_dynamic() && other.is_dynamic() {
let te = unsafe { Entry::deref_from(self.unsafe_data) };
let oe = unsafe { Entry::deref_from(other.unsafe_data) };
let te = unsafe { crate::dynamic::deref_from(self.unsafe_data) };
let oe = unsafe { crate::dynamic::deref_from(other.unsafe_data) };

if te.hash != oe.hash {
if te.header.header.header.hash != oe.header.header.header.hash {
return false;
}

return te.string == oe.string;
return te.slice == oe.slice;
}

if self.get_hash() != other.get_hash() {
Expand All @@ -310,7 +320,7 @@ impl Drop for Atom {
#[inline(always)]
fn drop(&mut self) {
if self.is_dynamic() {
unsafe { drop(Entry::restore_arc(self.unsafe_data)) }
unsafe { drop(crate::dynamic::restore_arc(self.unsafe_data)) }
}
}
}
Expand All @@ -327,7 +337,7 @@ impl Atom {
pub(crate) fn from_alias(alias: TaggedValue) -> Self {
if alias.tag() & TAG_MASK == DYNAMIC_TAG {
unsafe {
let arc = Entry::restore_arc(alias);
let arc = crate::dynamic::restore_arc(alias);
forget(arc.clone());
forget(arc);
}
Expand Down

0 comments on commit 2bea793

Please sign in to comment.