From 78a14ba0d64dddd7c23d8e964e7e3f50ca59fa97 Mon Sep 17 00:00:00 2001 From: Rui Mo Date: Wed, 20 Jul 2022 13:11:53 +0800 Subject: [PATCH] [OPPRO-207] Remove cpp example code (#273) --- cpp/gazelle-cpp/compute/substrait_arrow.h | 4 +- cpp/gazelle-cpp/jni/jni_wrapper.cc | 1 - cpp/src/CMakeLists.txt | 2 - cpp/src/compute/kernels_ext.cc | 71 ----- cpp/src/compute/kernels_ext.h | 63 ---- cpp/src/compute/substrait_utils.cc | 366 ---------------------- cpp/src/compute/substrait_utils.h | 66 ---- cpp/src/jni/exec_backend.cc | 10 +- cpp/src/jni/jni_common.h | 1 - 9 files changed, 4 insertions(+), 580 deletions(-) delete mode 100644 cpp/src/compute/kernels_ext.cc delete mode 100644 cpp/src/compute/kernels_ext.h delete mode 100644 cpp/src/compute/substrait_utils.cc delete mode 100644 cpp/src/compute/substrait_utils.h diff --git a/cpp/gazelle-cpp/compute/substrait_arrow.h b/cpp/gazelle-cpp/compute/substrait_arrow.h index f12277980d6d..fa03313ae2ab 100644 --- a/cpp/gazelle-cpp/compute/substrait_arrow.h +++ b/cpp/gazelle-cpp/compute/substrait_arrow.h @@ -19,9 +19,9 @@ #include -#include +#include "jni/exec_backend.h" -#include "compute/substrait_utils.h" +#include namespace gazellecpp { namespace compute { diff --git a/cpp/gazelle-cpp/jni/jni_wrapper.cc b/cpp/gazelle-cpp/jni/jni_wrapper.cc index f8f1c6791687..c2d9b654deec 100644 --- a/cpp/gazelle-cpp/jni/jni_wrapper.cc +++ b/cpp/gazelle-cpp/jni/jni_wrapper.cc @@ -18,7 +18,6 @@ #include #include "compute/substrait_arrow.h" -#include "compute/substrait_utils.h" #include "jni/jni_errors.h" #ifdef __cplusplus diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index 7c947caa5b7c..04bef8e1ce59 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -233,8 +233,6 @@ set(SPARK_COLUMNAR_PLUGIN_SRCS memory/allocator.cc ${PROTO_SRCS} compute/protobuf_utils.cc - compute/substrait_utils.cc - compute/kernels_ext.cc operators/c2r/arrow_columnar_to_row_converter.cc operators/c2r/columnar_to_row_base.cc operators/shuffle/splitter.cc diff --git a/cpp/src/compute/kernels_ext.cc b/cpp/src/compute/kernels_ext.cc deleted file mode 100644 index a11fc53bcb4b..000000000000 --- a/cpp/src/compute/kernels_ext.cc +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernels_ext.h" - -#include -#include -#include -#include -#include - -#include - -#include "utils/exception.h" - -namespace gluten { -namespace compute { - -bool LazyReadIterator::HasNext() { - if (no_next_) { - return false; - } - if (need_process_) { - // If valid batch is still not processed, no need to get a new batch. - return true; - } - next_array_ = array_iter_->Next().ValueOrDie(); - if (next_array_ == nullptr) { - no_next_ = true; - return false; - } - std::cout << "Input batch from the Java iter:" << std::endl; - // arrow::PrettyPrint(*next_array_.get(), 2, &std::cout); - need_process_ = true; - return true; -} - -arrow::Status LazyReadIterator::Next(std::shared_ptr* out) { - double res = 900000; - builder_->Append(res); - std::shared_ptr array; - auto status = builder_->Finish(&array); - // std::vector> ret_types = { - // arrow::field("res", arrow::float64())}; - ArrowArray cArray; - GLUTEN_THROW_NOT_OK(arrow::ExportArray(*array, &cArray)); - *out = std::make_shared(cArray); - if (need_process_) { - need_process_ = false; - } - // Will return result for only once. - no_next_ = true; - return arrow::Status::OK(); -} - -} // namespace compute -} // namespace gluten diff --git a/cpp/src/compute/kernels_ext.h b/cpp/src/compute/kernels_ext.h deleted file mode 100644 index 15e6eff59f85..000000000000 --- a/cpp/src/compute/kernels_ext.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "memory/allocator.h" -#include "utils/result_iterator.h" - -namespace gluten { -namespace compute { - -using ArrowArrayIterator = arrow::Iterator>; -// This class is an example shows how to get input from the iter. -// In real computing, the output of the iter should be used as the -// input for the following computing. -class LazyReadIterator { - public: - LazyReadIterator(std::shared_ptr array_iter) { - array_iter_ = std::move(array_iter); - std::unique_ptr array_builder; - arrow::MakeBuilder(pool_, arrow::float64(), &array_builder); - builder_.reset(arrow::internal::checked_cast( - array_builder.release())); - } - - bool HasNext(); - - arrow::Status Next(std::shared_ptr* out); - - private: - arrow::MemoryPool* pool_ = - gluten::memory::GetDefaultWrappedArrowMemoryPool().get(); - std::shared_ptr array_iter_; - bool need_process_ = false; - bool no_next_ = false; - std::shared_ptr next_array_; - std::unique_ptr builder_; -}; - -} // namespace compute -} // namespace gluten diff --git a/cpp/src/compute/substrait_utils.cc b/cpp/src/compute/substrait_utils.cc deleted file mode 100644 index fea48cbe9f85..000000000000 --- a/cpp/src/compute/substrait_utils.cc +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "substrait_utils.h" - -#include -#include -#include - -#include "arrow/array/builder_base.h" -#include "arrow/array/builder_primitive.h" -#include "arrow/util/checked_cast.h" -#include "kernels_ext.h" -#include "memory/allocator.h" -#include "protobuf_utils.h" -#include "utils/exception.h" - -namespace gluten { -namespace compute { - -SubstraitParser::SubstraitParser() { - std::cout << "construct SubstraitParser" << std::endl; -} - -std::shared_ptr SubstraitParser::GetResultIterator() { - auto res_iter = std::make_shared(); - return std::make_shared(std::move(res_iter)); -} - -std::shared_ptr SubstraitParser::GetResultIterator( - std::vector> inputs) { - auto res_iter = - std::make_shared(std::move(inputs)); - return std::make_shared(std::move(res_iter)); -} - -void SubstraitParser::ParseLiteral(const substrait::Expression::Literal& slit) { - switch (slit.literal_type_case()) { - case substrait::Expression_Literal::LiteralTypeCase::kFp64: { - double val = slit.fp64(); - std::cout << "double lit: " << val << std::endl; - break; - } - case substrait::Expression_Literal::LiteralTypeCase::kBoolean: { - bool val = slit.boolean(); - break; - } - default: - std::cout << "not supported" << std::endl; - break; - } -} - -void SubstraitParser::ParseScalarFunction( - const substrait::Expression::ScalarFunction& sfunc) { - for (auto& sarg : sfunc.args()) { - ParseExpression(sarg); - } - auto function_id = sfunc.function_reference(); - auto function_name = FindFunction(function_id); - std::cout << "function_name: " << function_name << std::endl; - auto out_type = sfunc.output_type(); - ParseType(out_type); -} - -void SubstraitParser::ParseReferenceSegment( - const ::substrait::Expression::ReferenceSegment& sref) { - switch (sref.reference_type_case()) { - case substrait::Expression::ReferenceSegment::ReferenceTypeCase:: - kStructField: { - auto sfield = sref.struct_field(); - auto field_id = sfield.field(); - std::cout << "field_id: " << field_id << std::endl; - break; - } - default: - std::cout << "not supported" << std::endl; - break; - } -} - -void SubstraitParser::ParseFieldReference( - const substrait::Expression::FieldReference& sfield) { - switch (sfield.reference_type_case()) { - case substrait::Expression::FieldReference::ReferenceTypeCase:: - kDirectReference: { - auto dref = sfield.direct_reference(); - ParseReferenceSegment(dref); - break; - } - case substrait::Expression::FieldReference::ReferenceTypeCase:: - kMaskedReference: { - std::cout << "not supported" << std::endl; - break; - } - default: - std::cout << "not supported" << std::endl; - break; - } -} - -void SubstraitParser::ParseExpression(const substrait::Expression& sexpr) { - switch (sexpr.rex_type_case()) { - case substrait::Expression::RexTypeCase::kLiteral: { - auto slit = sexpr.literal(); - ParseLiteral(slit); - break; - } - case substrait::Expression::RexTypeCase::kScalarFunction: { - auto sfunc = sexpr.scalar_function(); - ParseScalarFunction(sfunc); - break; - } - case substrait::Expression::RexTypeCase::kSelection: { - auto sel = sexpr.selection(); - ParseFieldReference(sel); - break; - } - default: - std::cout << "Expression not supported" << std::endl; - break; - } -} - -void SubstraitParser::ParseType(const substrait::Type& stype) { - switch (stype.kind_case()) { - case substrait::Type::KindCase::kBool: { - auto sbool = stype.bool_(); - auto nullable = sbool.nullability(); - auto type_id = sbool.type_variation_reference(); - break; - } - case substrait::Type::KindCase::kI32: { - auto nullable = stype.i32().nullability(); - break; - } - case substrait::Type::KindCase::kI64: { - auto nullability = stype.i64().nullability(); - break; - } - case substrait::Type::KindCase::kFp64: { - auto sfp64 = stype.fp64(); - auto nullable = sfp64.nullability(); - auto type_id = sfp64.type_variation_reference(); - break; - } - case substrait::Type::KindCase::kStruct: { - auto sstruct = stype.struct_(); - auto stypes = sstruct.types(); - for (auto& type : stypes) { - ParseType(type); - } - break; - } - case substrait::Type::KindCase::kString: { - auto sstring = stype.string(); - auto nullable = sstring.nullability(); - auto type_id = sstring.type_variation_reference(); - break; - } - default: - std::cout << "Type not supported: " << stype.kind_case() << std::endl; - break; - } -} - -void SubstraitParser::ParseNamedStruct( - const substrait::NamedStruct& named_struct) { - auto& snames = named_struct.names(); - for (auto& sname : snames) { - std::cout << "NamedStruct name: " << sname << std::endl; - } - // Parse Struct - auto& sstruct = named_struct.struct_(); - auto& stypes = sstruct.types(); - for (auto& type : stypes) { - ParseType(type); - } -} - -void SubstraitParser::ParseAggregateRel(const substrait::AggregateRel& sagg) { - if (sagg.has_input()) { - ParseRel(sagg.input()); - } - // Parse groupings - auto& groupings = sagg.groupings(); - for (auto& grouping : groupings) { - auto& grouping_exprs = grouping.grouping_expressions(); - for (auto& grouping_expr : grouping_exprs) { - ParseExpression(grouping_expr); - } - } - // Parse measures - for (auto& smea : sagg.measures()) { - auto agg_function = smea.measure(); - auto phase = agg_function.phase(); - auto function_id = agg_function.function_reference(); - auto args = agg_function.args(); - for (auto arg : args) { - ParseExpression(arg); - } - auto out_type = agg_function.output_type(); - } -} - -void SubstraitParser::ParseProjectRel(const substrait::ProjectRel& sproject) { - if (sproject.has_input()) { - ParseRel(sproject.input()); - } - for (auto& expr : sproject.expressions()) { - ParseExpression(expr); - } -} - -void SubstraitParser::ParseFilterRel(const substrait::FilterRel& sfilter) { - if (sfilter.has_input()) { - ParseRel(sfilter.input()); - } - if (sfilter.has_condition()) { - ParseExpression(sfilter.condition()); - } -} - -void SubstraitParser::ParseReadRel(const substrait::ReadRel& sread) { - if (sread.has_base_schema()) { - auto& base_schema = sread.base_schema(); - ParseNamedStruct(base_schema); - } - // Parse local files - if (sread.has_local_files()) { - auto& local_files = sread.local_files(); - auto& files_list = local_files.items(); - for (auto& file : files_list) { - auto& uri_file = file.uri_file(); - auto index = file.partition_index(); - auto start = file.start(); - auto length = file.length(); - } - } - auto& sfilter = sread.filter(); - ParseExpression(sfilter); -} - -void SubstraitParser::ParseRel(const substrait::Rel& srel) { - if (srel.has_aggregate()) { - ParseAggregateRel(srel.aggregate()); - } else if (srel.has_project()) { - ParseProjectRel(srel.project()); - } else if (srel.has_filter()) { - ParseFilterRel(srel.filter()); - } else if (srel.has_read()) { - ParseReadRel(srel.read()); - } else { - std::cout << "not supported" << std::endl; - } -} - -void SubstraitParser::ParseRelRoot(const substrait::RelRoot& sroot) { - if (sroot.has_input()) { - auto& srel = sroot.input(); - ParseRel(srel); - } - auto& snames = sroot.names(); -} - -void SubstraitParser::ParsePlan(const substrait::Plan& splan) { - for (auto& sextension : splan.extensions()) { - if (!sextension.has_extension_function()) { - continue; - } - auto& sfmap = sextension.extension_function(); - auto id = sfmap.function_anchor(); - auto name = sfmap.name(); - functions_map_[id] = name; - std::cout << "Function id: " << id << ", name: " << name << std::endl; - } - for (auto& srel : splan.relations()) { - if (srel.has_root()) { - ParseRelRoot(srel.root()); - } - if (srel.has_rel()) { - ParseRel(srel.rel()); - } - } -} - -std::string SubstraitParser::FindFunction(uint64_t id) { - if (functions_map_.find(id) == functions_map_.end()) { - throw std::runtime_error("Could not find function " + std::to_string(id)); - } - return functions_map_[id]; -} - -class SubstraitParser::FirstStageResultIterator { - public: - FirstStageResultIterator() { - std::unique_ptr array_builder; - arrow::MakeBuilder(pool_, arrow::float64(), &array_builder); - builder_.reset(arrow::internal::checked_cast( - array_builder.release())); - } - - arrow::Result> Next() { - if (!has_next_) { - return nullptr; - } - double res = 10000; - builder_->Append(res); - std::shared_ptr array; - auto status = builder_->Finish(&array); - // res_arrays.push_back(array); - // std::vector> ret_types = { - // arrow::field("res", arrow::float64())}; - has_next_ = false; - ArrowArray arrow_array; - GLUTEN_THROW_NOT_OK(arrow::ExportArray(*array, &arrow_array)); - return std::make_shared(arrow_array); - } - - private: - arrow::MemoryPool* pool_ = - gluten::memory::GetDefaultWrappedArrowMemoryPool().get(); - std::unique_ptr builder_; - bool has_next_ = true; - // std::vector> res_arrays; -}; - -class SubstraitParser::MiddleStageResultIterator { - public: - MiddleStageResultIterator( - std::vector> inputs) { - // TODO: the iter index should be acquired from Substrait Plan. - int iter_idx = 0; - lazy_iter_ = std::make_shared( - std::move(inputs[iter_idx]->ToArrowArrayIterator())); - } - - arrow::Result> Next() { - if (!lazy_iter_->HasNext()) { - return nullptr; - } - std::shared_ptr array; - lazy_iter_->Next(&array); - return array; - } - - private: - std::shared_ptr lazy_iter_; -}; - -} // namespace compute -} // namespace gluten diff --git a/cpp/src/compute/substrait_utils.h b/cpp/src/compute/substrait_utils.h deleted file mode 100644 index a1a7497829aa..000000000000 --- a/cpp/src/compute/substrait_utils.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "jni/exec_backend.h" -#include "substrait/algebra.pb.h" -#include "substrait/capabilities.pb.h" -#include "substrait/extensions/extensions.pb.h" -#include "substrait/function.pb.h" -#include "substrait/parameterized_types.pb.h" -#include "substrait/plan.pb.h" -#include "substrait/type.pb.h" -#include "substrait/type_expressions.pb.h" -#include "utils/result_iterator.h" - -namespace gluten { -namespace compute { - -class SubstraitParser : public ExecBackendBase { - public: - SubstraitParser(); - void ParseLiteral(const ::substrait::Expression::Literal& slit); - void ParseScalarFunction( - const ::substrait::Expression::ScalarFunction& sfunc); - void ParseReferenceSegment( - const ::substrait::Expression::ReferenceSegment& sref); - void ParseFieldReference( - const ::substrait::Expression::FieldReference& sfield); - void ParseExpression(const ::substrait::Expression& sexpr); - void ParseType(const ::substrait::Type& stype); - void ParseNamedStruct(const ::substrait::NamedStruct& named_struct); - void ParseAggregateRel(const ::substrait::AggregateRel& sagg); - void ParseProjectRel(const ::substrait::ProjectRel& sproject); - void ParseFilterRel(const ::substrait::FilterRel& sfilter); - void ParseReadRel(const ::substrait::ReadRel& sread); - void ParseRelRoot(const ::substrait::RelRoot& sroot); - void ParseRel(const ::substrait::Rel& srel); - void ParsePlan(const ::substrait::Plan& splan); - std::shared_ptr GetResultIterator() override; - std::shared_ptr GetResultIterator( - std::vector> inputs) override; - - private: - std::string FindFunction(uint64_t id); - std::unordered_map functions_map_; - class FirstStageResultIterator; - class MiddleStageResultIterator; -}; - -} // namespace compute -} // namespace gluten diff --git a/cpp/src/jni/exec_backend.cc b/cpp/src/jni/exec_backend.cc index a6638faac64c..27d8237fee10 100644 --- a/cpp/src/jni/exec_backend.cc +++ b/cpp/src/jni/exec_backend.cc @@ -20,8 +20,6 @@ #include #include -#include "compute/substrait_utils.h" - namespace gluten { static std::function()> backend_factory; @@ -36,12 +34,8 @@ void SetBackendFactory( std::shared_ptr CreateBackend() { if (backend_factory == nullptr) { - std::cout - << "Execution backend not set. This may due to the backend library not " - "loaded, or SetBackendFactory() is not called in nativeInitNative() JNI call." - " Will create default backend, which is only served as an example." - << std::endl; - return std::make_shared(); + throw std::runtime_error( + "Execution backend not set. This may due to the backend library not loaded, or SetBackendFactory() is not called in nativeInitNative() JNI call."); } return backend_factory(); } diff --git a/cpp/src/jni/jni_common.h b/cpp/src/jni/jni_common.h index 75217b3807ad..78a84bda414a 100644 --- a/cpp/src/jni/jni_common.h +++ b/cpp/src/jni/jni_common.h @@ -41,7 +41,6 @@ #include #include "compute/protobuf_utils.h" -#include "compute/substrait_utils.h" #include "memory/allocator.h" static jint JNI_VERSION = JNI_VERSION_1_8;