Skip to content

Commit

Permalink
Add Null Mask to Prefix and Suffix Iters
Browse files Browse the repository at this point in the history
Update comment

Update comment
  • Loading branch information
xinlifoobar committed Aug 28, 2024
1 parent b711f23 commit 5e55c42
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 20 deletions.
30 changes: 14 additions & 16 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,49 +318,47 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
}

/// Returns an iterator over the prefix bytes of this array with respect to the prefix length.
/// If the prefix length is larger than the string length, it will return the empty slice.
pub fn prefix_bytes_iter(&self, prefix_len: usize) -> impl Iterator<Item = &[u8]> {
/// If the prefix length is larger than the string length, it will return None.
///
/// Note for null views, it returns None.
pub fn prefix_bytes_iter(&self, prefix_len: usize) -> impl Iterator<Item = Option<&[u8]>> {
self.views().into_iter().map(move |v| {
let len = (*v as u32) as usize;

if len < prefix_len {
return &[] as &[u8];
}

if prefix_len <= 4 || len <= 12 {
unsafe { StringViewArray::inline_value(v, prefix_len) }
None
} else if prefix_len <= 4 || len <= 12 {
Some(unsafe { StringViewArray::inline_value(v, prefix_len) })
} else {
let view = ByteView::from(*v);
let data = unsafe {
self.data_buffers()
.get_unchecked(view.buffer_index as usize)
};
let offset = view.offset as usize;
unsafe { data.get_unchecked(offset..offset + prefix_len) }
Some(unsafe { data.get_unchecked(offset..offset + prefix_len) })
}
})
}

/// Returns an iterator over the suffix bytes of this array with respect to the suffix length.
/// If the suffix length is larger than the string length, it will return the empty slice.
pub fn suffix_bytes_iter(&self, suffix_len: usize) -> impl Iterator<Item = &[u8]> {
/// If the suffix length is larger than the string length, it will return None.
pub fn suffix_bytes_iter(&self, suffix_len: usize) -> impl Iterator<Item = Option<&[u8]>> {
self.views().into_iter().map(move |v| {
let len = (*v as u32) as usize;

if len < suffix_len {
return &[] as &[u8];
}

if len <= 12 {
unsafe { &StringViewArray::inline_value(v, len)[len - suffix_len..] }
None
} else if len <= 12 {
Some(unsafe { &StringViewArray::inline_value(v, len)[len - suffix_len..] })
} else {
let view = ByteView::from(*v);
let data = unsafe {
self.data_buffers()
.get_unchecked(view.buffer_index as usize)
};
let offset = view.offset as usize;
unsafe { data.get_unchecked(offset + len - suffix_len..offset + len) }
Some(unsafe { data.get_unchecked(offset + len - suffix_len..offset + len) })
}
})
}
Expand Down
16 changes: 12 additions & 4 deletions arrow-string/src/predicate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,11 @@ impl<'a> Predicate<'a> {
string_view_array
.prefix_bytes_iter(v.len())
.map(|haystack| {
equals_bytes(haystack, v.as_bytes(), equals_kernel) != negate
equals_bytes(
haystack.unwrap_or_default(),
v.as_bytes(),
equals_kernel,
) != negate
})
.collect::<Vec<_>>(),
)
Expand All @@ -151,7 +155,7 @@ impl<'a> Predicate<'a> {
.prefix_bytes_iter(v.len())
.map(|haystack| {
equals_bytes(
haystack,
haystack.unwrap_or_default(),
v.as_bytes(),
equals_ignore_ascii_case_kernel,
) != negate
Expand All @@ -170,7 +174,11 @@ impl<'a> Predicate<'a> {
string_view_array
.suffix_bytes_iter(v.len())
.map(|haystack| {
equals_bytes(haystack, v.as_bytes(), equals_kernel) != negate
equals_bytes(
haystack.unwrap_or_default(),
v.as_bytes(),
equals_kernel,
) != negate
})
.collect::<Vec<_>>(),
)
Expand All @@ -187,7 +195,7 @@ impl<'a> Predicate<'a> {
.suffix_bytes_iter(v.len())
.map(|haystack| {
equals_bytes(
haystack,
haystack.unwrap_or_default(),
v.as_bytes(),
equals_ignore_ascii_case_kernel,
) != negate
Expand Down

0 comments on commit 5e55c42

Please sign in to comment.