Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-9390: [C++][Doc] Review compute function names #7755

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/api_aggregate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Result<Datum> Sum(const Datum& value, ExecContext* ctx) {
}

Result<Datum> MinMax(const Datum& value, const MinMaxOptions& options, ExecContext* ctx) {
return CallFunction("minmax", {value}, &options, ctx);
return CallFunction("min_max", {value}, &options, ctx);
}

} // namespace compute
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/compute/api_scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ static Result<Datum> ExecSetLookup(const std::string& func_name, const Datum& da
}

Result<Datum> IsIn(const Datum& values, const Datum& value_set, ExecContext* ctx) {
return ExecSetLookup("isin", values, value_set,
return ExecSetLookup("is_in", values, value_set,
/*add_nulls_to_hash_table=*/false, ctx);
}

Result<Datum> Match(const Datum& values, const Datum& value_set, ExecContext* ctx) {
return ExecSetLookup("match", values, value_set,
Result<Datum> IndexIn(const Datum& values, const Datum& value_set, ExecContext* ctx) {
return ExecSetLookup("index_in", values, value_set,
/*add_nulls_to_hash_table=*/true, ctx);
}

Expand Down
17 changes: 8 additions & 9 deletions cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,14 @@ struct ArithmeticOptions : public FunctionOptions {
bool check_overflow;
};

struct ARROW_EXPORT BinaryContainsExactOptions : public FunctionOptions {
explicit BinaryContainsExactOptions(std::string pattern)
: pattern(std::move(pattern)) {}
struct ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
explicit MatchSubstringOptions(std::string pattern) : pattern(std::move(pattern)) {}

/// The exact pattern to look for inside input values.
/// The exact substring to look for inside input values.
std::string pattern;
};

/// Options for IsIn and Match functions
/// Options for IsIn and IndexIn functions
struct ARROW_EXPORT SetLookupOptions : public FunctionOptions {
explicit SetLookupOptions(Datum value_set, bool skip_nulls)
: value_set(std::move(value_set)), skip_nulls(skip_nulls) {}
Expand All @@ -60,7 +59,7 @@ struct ARROW_EXPORT SetLookupOptions : public FunctionOptions {
/// Whether nulls in `value_set` count for lookup.
///
/// If true, any null in `value_set` is ignored and nulls in the input
/// produce null (Match) or false (IsIn) values in the output.
/// produce null (IndexIn) or false (IsIn) values in the output.
/// If false, any null in `value_set` is successfully matched in
/// the input.
bool skip_nulls;
Expand Down Expand Up @@ -238,7 +237,7 @@ ARROW_EXPORT
Result<Datum> IsIn(const Datum& values, const Datum& value_set,
ExecContext* ctx = NULLPTR);

/// \brief Match examines each slot in the values against a value_set array.
/// \brief IndexIn examines each slot in the values against a value_set array.
/// If the value is not found in value_set, null will be output.
/// If found, the index of occurrence within value_set (ignoring duplicates)
/// will be output.
Expand All @@ -259,8 +258,8 @@ Result<Datum> IsIn(const Datum& values, const Datum& value_set,
/// \since 1.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Match(const Datum& values, const Datum& value_set,
ExecContext* ctx = NULLPTR);
Result<Datum> IndexIn(const Datum& values, const Datum& value_set,
ExecContext* ctx = NULLPTR);

/// \brief IsValid returns true for each element of `values` that is not null,
/// false otherwise
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/compute/api_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ namespace compute {

Result<std::shared_ptr<Array>> NthToIndices(const Array& values, int64_t n,
ExecContext* ctx) {
PartitionOptions options(/*pivot=*/n);
ARROW_ASSIGN_OR_RAISE(
Datum result, CallFunction("partition_indices", {Datum(values)}, &options, ctx));
PartitionNthOptions options(/*pivot=*/n);
ARROW_ASSIGN_OR_RAISE(Datum result, CallFunction("partition_nth_indices",
{Datum(values)}, &options, ctx));
return result.make_array();
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ struct ARROW_EXPORT TakeOptions : public FunctionOptions {
};

/// \brief Partitioning options for NthToIndices
struct PartitionOptions : public FunctionOptions {
explicit PartitionOptions(int64_t pivot) : pivot(pivot) {}
struct PartitionNthOptions : public FunctionOptions {
explicit PartitionNthOptions(int64_t pivot) : pivot(pivot) {}

/// The index into the equivalent sorted array of the partition pivot element.
int64_t pivot;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/kernels/aggregate_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunction(std::move(func)));

static auto default_minmax_options = MinMaxOptions::Defaults();
func = std::make_shared<ScalarAggregateFunction>("minmax", Arity::Unary(),
func = std::make_shared<ScalarAggregateFunction>("min_max", Arity::Unary(),
&default_minmax_options);
aggregate::AddMinMaxKernels(aggregate::MinMaxInit, {boolean()}, func.get());
aggregate::AddMinMaxKernels(aggregate::MinMaxInit, NumericTypes(), func.get());
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/kernels/aggregate_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -583,11 +583,11 @@ TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
TYPED_TEST(TestFloatingMinMaxKernel, DefaultOptions) {
auto values = ArrayFromJSON(this->type_singleton(), "[0, 1, 2, 3, 4]");

ASSERT_OK_AND_ASSIGN(auto no_options_provided, CallFunction("minmax", {values}));
ASSERT_OK_AND_ASSIGN(auto no_options_provided, CallFunction("min_max", {values}));

auto default_options = MinMaxOptions::Defaults();
ASSERT_OK_AND_ASSIGN(auto explicit_defaults,
CallFunction("minmax", {values}, &default_options));
CallFunction("min_max", {values}, &default_options));

AssertDatumsEqual(explicit_defaults, no_options_provided);
}
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/arrow/compute/kernels/scalar_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace internal {
namespace {

template <typename Type, typename offset_type = typename Type::offset_type>
void ListValueLengths(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
void ListValueLength(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
using ScalarType = typename TypeTraits<Type>::ScalarType;
using OffsetScalarType = typename TypeTraits<Type>::OffsetScalarType;

Expand All @@ -55,13 +55,13 @@ void ListValueLengths(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
} // namespace

void RegisterScalarNested(FunctionRegistry* registry) {
auto list_value_lengths =
std::make_shared<ScalarFunction>("list_value_lengths", Arity::Unary());
DCHECK_OK(list_value_lengths->AddKernel({InputType(Type::LIST)}, int32(),
ListValueLengths<ListType>));
DCHECK_OK(list_value_lengths->AddKernel({InputType(Type::LARGE_LIST)}, int64(),
ListValueLengths<LargeListType>));
DCHECK_OK(registry->AddFunction(std::move(list_value_lengths)));
auto list_value_length =
std::make_shared<ScalarFunction>("list_value_length", Arity::Unary());
DCHECK_OK(list_value_length->AddKernel({InputType(Type::LIST)}, int32(),
ListValueLength<ListType>));
DCHECK_OK(list_value_length->AddKernel({InputType(Type::LARGE_LIST)}, int64(),
ListValueLength<LargeListType>));
DCHECK_OK(registry->AddFunction(std::move(list_value_length)));
}

} // namespace internal
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/kernels/scalar_nested_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ static std::shared_ptr<DataType> GetOffsetType(const DataType& type) {
return type.id() == Type::LIST ? int32() : int64();
}

TEST(TestScalarNested, ListValueLengths) {
TEST(TestScalarNested, ListValueLength) {
for (auto ty : {list(int32()), large_list(int32())}) {
CheckScalarUnary("list_value_lengths", ty, "[[0, null, 1], null, [2, 3], []]",
CheckScalarUnary("list_value_length", ty, "[[0, null, 1], null, [2, 3], []]",
GetOffsetType(*ty), "[3, null, 2, 0]");
}
}
Expand Down
47 changes: 24 additions & 23 deletions cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,13 @@ std::unique_ptr<KernelState> InitSetLookup(KernelContext* ctx,
return result;
}

struct MatchVisitor {
struct IndexInVisitor {
KernelContext* ctx;
const ArrayData& data;
Datum* out;
Int32Builder builder;

MatchVisitor(KernelContext* ctx, const ArrayData& data, Datum* out)
IndexInVisitor(KernelContext* ctx, const ArrayData& data, Datum* out)
: ctx(ctx), data(data), out(out), builder(ctx->exec_context()->memory_pool()) {}

Status Visit(const DataType&) {
Expand All @@ -190,7 +190,7 @@ struct MatchVisitor {
}

template <typename Type>
Status ProcessMatch() {
Status ProcessIndexIn() {
using T = typename GetViewType<Type>::T;

const auto& state = checked_cast<const SetLookupState<Type>&>(*ctx->state());
Expand Down Expand Up @@ -223,23 +223,24 @@ struct MatchVisitor {

template <typename Type>
enable_if_boolean<Type, Status> Visit(const Type&) {
return ProcessMatch<BooleanType>();
return ProcessIndexIn<BooleanType>();
}

template <typename Type>
enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value, Status> Visit(
const Type&) {
return ProcessMatch<typename UnsignedIntType<sizeof(typename Type::c_type)>::Type>();
return ProcessIndexIn<
typename UnsignedIntType<sizeof(typename Type::c_type)>::Type>();
}

template <typename Type>
enable_if_base_binary<Type, Status> Visit(const Type&) {
return ProcessMatch<typename Type::PhysicalType>();
return ProcessIndexIn<typename Type::PhysicalType>();
}

// Handle Decimal128Type, FixedSizeBinaryType
Status Visit(const FixedSizeBinaryType& type) {
return ProcessMatch<FixedSizeBinaryType>();
return ProcessIndexIn<FixedSizeBinaryType>();
}

Status Execute() {
Expand Down Expand Up @@ -269,9 +270,9 @@ void ExecArrayOrScalar(KernelContext* ctx, const Datum& in, Datum* out,
*out = std::move(out_scalar);
}

void ExecMatch(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
void ExecIndexIn(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
ExecArrayOrScalar(ctx, batch[0], out, [&](const ArrayData& in) {
return MatchVisitor(ctx, in, out).Execute();
return IndexInVisitor(ctx, in, out).Execute();
});
}

Expand Down Expand Up @@ -395,10 +396,10 @@ void AddBasicSetLookupKernels(ScalarKernel kernel,
}
}

// Enables calling isin with CallFunction as though it were binary.
// Enables calling is_in with CallFunction as though it were binary.
class IsInMetaBinary : public MetaFunction {
public:
IsInMetaBinary() : MetaFunction("isin_meta_binary", Arity::Binary()) {}
IsInMetaBinary() : MetaFunction("is_in_meta_binary", Arity::Binary()) {}

Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
const FunctionOptions* options,
Expand All @@ -408,16 +409,16 @@ class IsInMetaBinary : public MetaFunction {
}
};

// Enables calling match with CallFunction as though it were binary.
class MatchMetaBinary : public MetaFunction {
// Enables calling index_in with CallFunction as though it were binary.
class IndexInMetaBinary : public MetaFunction {
public:
MatchMetaBinary() : MetaFunction("match_meta_binary", Arity::Binary()) {}
IndexInMetaBinary() : MetaFunction("index_in_meta_binary", Arity::Binary()) {}

Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
const FunctionOptions* options,
ExecContext* ctx) const override {
DCHECK_EQ(options, nullptr);
return Match(args[0], args[1], ctx);
return IndexIn(args[0], args[1], ctx);
}
};

Expand All @@ -429,33 +430,33 @@ void RegisterScalarSetLookup(FunctionRegistry* registry) {
ScalarKernel isin_base;
isin_base.init = InitSetLookup;
isin_base.exec = ExecIsIn;
auto isin = std::make_shared<ScalarFunction>("isin", Arity::Unary());
auto is_in = std::make_shared<ScalarFunction>("is_in", Arity::Unary());

AddBasicSetLookupKernels(isin_base, /*output_type=*/boolean(), isin.get());
AddBasicSetLookupKernels(isin_base, /*output_type=*/boolean(), is_in.get());

isin_base.signature = KernelSignature::Make({null()}, boolean());
isin_base.null_handling = NullHandling::COMPUTED_PREALLOCATE;
DCHECK_OK(isin->AddKernel(isin_base));
DCHECK_OK(registry->AddFunction(isin));
DCHECK_OK(is_in->AddKernel(isin_base));
DCHECK_OK(registry->AddFunction(is_in));

DCHECK_OK(registry->AddFunction(std::make_shared<IsInMetaBinary>()));
}

// Match uses Int32Builder and so is responsible for all its own allocation
// IndexIn uses Int32Builder and so is responsible for all its own allocation
{
ScalarKernel match_base;
match_base.init = InitSetLookup;
match_base.exec = ExecMatch;
match_base.exec = ExecIndexIn;
match_base.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
match_base.mem_allocation = MemAllocation::NO_PREALLOCATE;
auto match = std::make_shared<ScalarFunction>("match", Arity::Unary());
auto match = std::make_shared<ScalarFunction>("index_in", Arity::Unary());
AddBasicSetLookupKernels(match_base, /*output_type=*/int32(), match.get());

match_base.signature = KernelSignature::Make({null()}, int32());
DCHECK_OK(match->AddKernel(match_base));
DCHECK_OK(registry->AddFunction(match));

DCHECK_OK(registry->AddFunction(std::make_shared<MatchMetaBinary>()));
DCHECK_OK(registry->AddFunction(std::make_shared<IndexInMetaBinary>()));
}
}

Expand Down
Loading