Skip to content

Commit

Permalink
Minor: Remove redundant implementation of StringArrayType (#14023)
Browse files Browse the repository at this point in the history
* Minor: Remove redundant implementation of StringArrayType

Signed-off-by: Tai Le Manh <[email protected]>

* Deprecate rather than remove StringArrayType

---------

Signed-off-by: Tai Le Manh <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
tlm365 and alamb authored Jan 7, 2025
1 parent 4e877a0 commit f31ca5b
Show file tree
Hide file tree
Showing 11 changed files with 17 additions and 77 deletions.
3 changes: 1 addition & 2 deletions datafusion/functions-nested/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use crate::utils::make_scalar_function;
use arrow::compute::cast;
use arrow_array::builder::{ArrayBuilder, LargeStringBuilder, StringViewBuilder};
use arrow_array::cast::AsArray;
use arrow_array::{GenericStringArray, StringViewArray};
use arrow_array::{GenericStringArray, StringArrayType, StringViewArray};
use arrow_schema::DataType::{
Dictionary, FixedSizeList, LargeList, LargeUtf8, List, Null, Utf8, Utf8View,
};
Expand All @@ -45,7 +45,6 @@ use datafusion_common::exec_err;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_functions::strings::StringArrayType;
use datafusion_functions::{downcast_arg, downcast_named_arg};
use datafusion_macros::user_doc;
use std::sync::Arc;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/datetime/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,14 @@ use std::sync::Arc;

use arrow::array::{
Array, ArrowPrimitiveType, AsArray, GenericStringArray, PrimitiveArray,
StringViewArray,
StringArrayType, StringViewArray,
};
use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
use arrow::datatypes::DataType;
use chrono::format::{parse, Parsed, StrftimeItems};
use chrono::LocalResult::Single;
use chrono::{DateTime, TimeZone, Utc};

use crate::strings::StringArrayType;
use datafusion_common::cast::as_generic_string_array;
use datafusion_common::{
exec_err, unwrap_or_internal_err, DataFusionError, Result, ScalarType, ScalarValue,
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/regex/regexpcount.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::strings::StringArrayType;
use arrow::array::{Array, ArrayRef, AsArray, Datum, Int64Array};
use arrow::array::{Array, ArrayRef, AsArray, Datum, Int64Array, StringArrayType};
use arrow::datatypes::{DataType, Int64Type};
use arrow::datatypes::{
DataType::Int64, DataType::LargeUtf8, DataType::Utf8, DataType::Utf8View,
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/string/repeat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@
use std::any::Any;
use std::sync::Arc;

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{
ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
OffsetSizeTrait, StringViewArray,
OffsetSizeTrait, StringArrayType, StringViewArray,
};
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions/src/string/split_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
// specific language governing permissions and limitations
// under the License.

use crate::strings::StringArrayType;
use crate::utils::utf8_to_str_type;
use arrow::array::{
ArrayRef, GenericStringArray, Int64Array, OffsetSizeTrait, StringViewArray,
ArrayRef, GenericStringArray, Int64Array, OffsetSizeTrait, StringArrayType,
StringViewArray,
};
use arrow::array::{AsArray, GenericStringBuilder};
use arrow::datatypes::DataType;
Expand Down
61 changes: 3 additions & 58 deletions datafusion/functions/src/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,64 +26,7 @@ use arrow::datatypes::DataType;
use arrow_buffer::{MutableBuffer, NullBuffer, NullBufferBuilder};

/// Abstracts iteration over different types of string arrays.
///
/// The [`StringArrayType`] trait helps write generic code for string functions that can work with
/// different types of string arrays.
///
/// Currently three types are supported:
/// - [`StringArray`]
/// - [`LargeStringArray`]
/// - [`StringViewArray`]
///
/// It is inspired / copied from [arrow-rs].
///
/// [arrow-rs]: https://github.com/apache/arrow-rs/blob/bf0ea9129e617e4a3cf915a900b747cc5485315f/arrow-string/src/like.rs#L151-L157
///
/// # Examples
/// Generic function that works for [`StringArray`], [`LargeStringArray`]
/// and [`StringViewArray`]:
/// ```
/// # use arrow::array::{StringArray, LargeStringArray, StringViewArray};
/// # use datafusion_functions::strings::StringArrayType;
///
/// /// Combines string values for any StringArrayType type. It can be invoked on
/// /// and combination of `StringArray`, `LargeStringArray` or `StringViewArray`
/// fn combine_values<'a, S1, S2>(array1: S1, array2: S2) -> Vec<String>
/// where S1: StringArrayType<'a>, S2: StringArrayType<'a>
/// {
/// // iterate over the elements of the 2 arrays in parallel
/// array1
/// .iter()
/// .zip(array2.iter())
/// .map(|(s1, s2)| {
/// // if both values are non null, combine them
/// if let (Some(s1), Some(s2)) = (s1, s2) {
/// format!("{s1}{s2}")
/// } else {
/// "None".to_string()
/// }
/// })
/// .collect()
/// }
///
/// let string_array = StringArray::from(vec!["foo", "bar"]);
/// let large_string_array = LargeStringArray::from(vec!["foo2", "bar2"]);
/// let string_view_array = StringViewArray::from(vec!["foo3", "bar3"]);
///
/// // can invoke this function a string array and large string array
/// assert_eq!(
/// combine_values(&string_array, &large_string_array),
/// vec![String::from("foofoo2"), String::from("barbar2")]
/// );
///
/// // Can call the same function with string array and string view array
/// assert_eq!(
/// combine_values(&string_array, &string_view_array),
/// vec![String::from("foofoo3"), String::from("barbar3")]
/// );
/// ```
///
/// [`LargeStringArray`]: arrow::array::LargeStringArray
#[deprecated(since = "45.0.0", note = "Use arrow::array::StringArrayType instead")]
pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
/// Return an [`ArrayIter`] over the values of the array.
///
Expand All @@ -94,6 +37,7 @@ pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
fn is_ascii(&self) -> bool;
}

#[allow(deprecated)]
impl<'a, T: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<T> {
fn iter(&self) -> ArrayIter<Self> {
GenericStringArray::<T>::iter(self)
Expand All @@ -104,6 +48,7 @@ impl<'a, T: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<T> {
}
}

#[allow(deprecated)]
impl<'a> StringArrayType<'a> for &'a StringViewArray {
fn iter(&self) -> ArrayIter<Self> {
StringViewArray::iter(self)
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions/src/unicode/character_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
// specific language governing permissions and limitations
// under the License.

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_int_type};
use arrow::array::{
Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, PrimitiveBuilder,
StringArrayType,
};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
use datafusion_common::Result;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/unicode/lpad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@ use std::sync::Arc;

use arrow::array::{
Array, ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
OffsetSizeTrait, StringViewArray,
OffsetSizeTrait, StringArrayType, StringViewArray,
};
use arrow::datatypes::DataType;
use unicode_segmentation::UnicodeSegmentation;
use DataType::{LargeUtf8, Utf8, Utf8View};

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/unicode/rpad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@
// specific language governing permissions and limitations
// under the License.

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{
ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
OffsetSizeTrait, StringViewArray,
OffsetSizeTrait, StringArrayType, StringViewArray,
};
use arrow::datatypes::DataType;
use datafusion_common::cast::as_int64_array;
Expand Down
5 changes: 3 additions & 2 deletions datafusion/functions/src/unicode/strpos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
use std::any::Any;
use std::sync::Arc;

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_int_type};
use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
use arrow::array::{
ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray, StringArrayType,
};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::{
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions/src/unicode/substr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
use std::any::Any;
use std::sync::Arc;

use crate::strings::{make_and_append_view, StringArrayType};
use crate::strings::make_and_append_view;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{
Array, ArrayIter, ArrayRef, AsArray, GenericStringBuilder, Int64Array,
OffsetSizeTrait, StringViewArray,
OffsetSizeTrait, StringArrayType, StringViewArray,
};
use arrow::datatypes::DataType;
use arrow_buffer::{NullBufferBuilder, ScalarBuffer};
Expand Down

0 comments on commit f31ca5b

Please sign in to comment.