Skip to content

Commit

Permalink
Remove kstring in favor of BString (GitoxideLabs#1460)
Browse files Browse the repository at this point in the history
Let's not trade safety for some convenience.

This reduces performance by 5% at least (in the WebKit repository at 886077e077a496a6e398df52a4b7915d8cd68f76):

```
❯ hyperfine  "/Users/byron/dev/github.com/Byron/gitoxide/target/release/gix index entries ':(attr:export-ignore)' -i" "gix index entries ':(attr:export-ignore)' -i"
Benchmark 1: /Users/byron/dev/github.com/Byron/gitoxide/target/release/gix index entries ':(attr:export-ignore)' -i
  Time (mean ± σ):     820.1 ms ±  71.7 ms    [User: 759.9 ms, System: 85.9 ms]
  Range (min … max):   778.8 ms … 984.3 ms    10 runs

  Warning: Statistical outliers were detected. Consider re-running this benchmark on a quiet system without any interferences from other programs. It might help to use the '--warmup' or '--prepare' options.

Benchmark 2: gix index entries ':(attr:export-ignore)' -i
  Time (mean ± σ):     782.1 ms ±   1.8 ms    [User: 760.1 ms, System: 43.2 ms]
  Range (min … max):   780.0 ms … 786.0 ms    10 runs

Summary
  gix index entries ':(attr:export-ignore)' -i ran
    1.05 ± 0.09 times faster than /Users/byron/dev/github.com/Byron/gitoxide/target/release/gix index entries ':(attr:export-ignore)' -i
```
  • Loading branch information
Byron authored and LuaKT committed Aug 20, 2024
1 parent fbdafcc commit 98f8aa7
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 22 deletions.
3 changes: 2 additions & 1 deletion gix-attributes/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
doc = ::document_features::document_features!()
)]
#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))]
#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
#![deny(missing_docs, rust_2018_idioms)]
#![forbid(unsafe_code)]

pub use gix_glob as glob;
use kstring::{KString, KStringRef};
Expand Down
3 changes: 1 addition & 2 deletions gix-attributes/src/name.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use crate::{Name, NameRef};
use bstr::{BStr, BString, ByteSlice};
use kstring::KStringRef;

use crate::{Name, NameRef};

impl<'a> NameRef<'a> {
/// Turn this ref into its owned counterpart.
pub fn to_owned(self) -> Name {
Expand Down
3 changes: 1 addition & 2 deletions gix-attributes/src/parse.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
use std::borrow::Cow;

use crate::{name, AssignmentRef, Name, NameRef, StateRef};
use bstr::{BStr, ByteSlice};
use kstring::KStringRef;

use crate::{name, AssignmentRef, Name, NameRef, StateRef};

/// The kind of attribute that was parsed.
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
Expand Down
3 changes: 1 addition & 2 deletions gix-attributes/src/search/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::collections::HashMap;

use kstring::KString;
use smallvec::SmallVec;
use std::collections::HashMap;

use crate::{Assignment, AssignmentRef};

Expand Down
23 changes: 9 additions & 14 deletions gix-attributes/src/state.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,24 @@
use bstr::{BStr, ByteSlice};
use kstring::{KString, KStringRef};

use crate::{State, StateRef};
use bstr::{BStr, BString, ByteSlice};

/// A container to encapsulate a tightly packed and typically unallocated byte value that isn't necessarily UTF8 encoded.
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Value(KString);
// TODO: This should be some sort of 'smallbstring' - but can't use `kstring` here due to UTF8 requirement. 5% performance boost possible.
// What's really needed here is a representation that displays as string when serialized which helps with JSON.
// Maybe `smallvec` with display and serialization wrapper would do the trick?
pub struct Value(BString);

/// A reference container to encapsulate a tightly packed and typically unallocated byte value that isn't necessarily UTF8 encoded.
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ValueRef<'a>(#[cfg_attr(feature = "serde", serde(borrow))] KStringRef<'a>);
pub struct ValueRef<'a>(#[cfg_attr(feature = "serde", serde(borrow))] &'a [u8]);

/// Lifecycle
impl<'a> ValueRef<'a> {
/// Keep `input` as our value.
pub fn from_bytes(input: &'a [u8]) -> Self {
Self(KStringRef::from_ref(
// SAFETY: our API makes accessing that value as `str` impossible, so illformed UTF8 is never exposed as such.
#[allow(unsafe_code)]
unsafe {
std::str::from_utf8_unchecked(input)
},
))
Self(input)
}
}

Expand All @@ -42,7 +37,7 @@ impl ValueRef<'_> {

impl<'a> From<&'a str> for ValueRef<'a> {
fn from(v: &'a str) -> Self {
ValueRef(v.into())
ValueRef(v.as_bytes().into())
}
}

Expand All @@ -54,7 +49,7 @@ impl<'a> From<ValueRef<'a>> for Value {

impl From<&str> for Value {
fn from(v: &str) -> Self {
Value(KString::from_ref(v))
Value(v.as_bytes().into())
}
}

Expand Down
29 changes: 29 additions & 0 deletions gix-attributes/tests/parse/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use bstr::BString;
use gix_attributes::state::ValueRef;
use gix_attributes::{parse, StateRef};
use gix_glob::pattern::Mode;
use gix_testtools::fixture_bytes;
Expand Down Expand Up @@ -275,6 +276,19 @@ fn attributes_can_have_values() {
);
}

#[test]
fn attributes_can_have_illformed_utf8() {
assert_eq!(
byte_line(b"p a=one b=\xC3\x28\x41 c=d "),
(
pattern("p", Mode::NO_SUB_DIR, None),
vec![value("a", "one"), byte_value("b", b"\xC3\x28\x41"), value("c", "d")],
1
),
"illformed UTF8 is fully supported"
);
}

#[test]
fn attributes_see_state_adjustments_over_value_assignments() {
assert_eq!(
Expand Down Expand Up @@ -325,6 +339,10 @@ fn value<'b>(attr: &str, value: &'b str) -> (BString, StateRef<'b>) {
(attr.into(), StateRef::Value(value.into()))
}

fn byte_value<'b>(attr: &str, value: &'b [u8]) -> (BString, StateRef<'b>) {
(attr.into(), StateRef::Value(ValueRef::from_bytes(value)))
}

fn pattern(name: &str, flags: gix_glob::pattern::Mode, first_wildcard_pos: Option<usize>) -> parse::Kind {
parse::Kind::Pattern(gix_glob::Pattern {
text: name.into(),
Expand All @@ -344,6 +362,17 @@ fn line(input: &str) -> ExpandedAttribute {
try_line(input).unwrap()
}

fn byte_line(input: &[u8]) -> ExpandedAttribute {
try_byte_line(input).unwrap()
}

fn try_byte_line(input: &[u8]) -> Result<ExpandedAttribute, parse::Error> {
let mut lines = gix_attributes::parse(input);
let res = expand(lines.next().unwrap())?;
assert!(lines.next().is_none(), "expected only one line");
Ok(res)
}

fn lenient_lines(input: &str) -> Vec<ExpandedAttribute> {
gix_attributes::parse(input.as_bytes())
.map(expand)
Expand Down
2 changes: 1 addition & 1 deletion gix-attributes/tests/search/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ fn given_attributes_are_made_available_in_given_order() -> crate::Result {
fn size_of_outcome() {
assert_eq!(
std::mem::size_of::<Outcome>(),
904,
840,
"it's quite big, shouldn't change without us noticing"
)
}
Expand Down

0 comments on commit 98f8aa7

Please sign in to comment.