From 436ff7ab24816e6587dde4dbdb01ed239b8dfc22 Mon Sep 17 00:00:00 2001 From: yma11 Date: Sat, 14 Jan 2023 17:01:29 +0800 Subject: [PATCH] feat: add extended expression for expression only evaluation --- proto/substrait/extended_expression.proto | 51 ++++++++++++++++++++ site/docs/expressions/extended_expression.md | 19 ++++++++ 2 files changed, 70 insertions(+) create mode 100644 proto/substrait/extended_expression.proto create mode 100644 site/docs/expressions/extended_expression.md diff --git a/proto/substrait/extended_expression.proto b/proto/substrait/extended_expression.proto new file mode 100644 index 000000000..5d1152055 --- /dev/null +++ b/proto/substrait/extended_expression.proto @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: Apache-2.0 +syntax = "proto3"; + +package substrait; + +import "substrait/algebra.proto"; +import "substrait/extensions/extensions.proto"; +import "substrait/plan.proto"; +import "substrait/type.proto"; + +option csharp_namespace = "Substrait.Protobuf"; +option go_package = "github.com/substrait-io/substrait-go/proto"; +option java_multiple_files = true; +option java_package = "io.substrait.proto"; + +message ExpressionReference { + oneof expr_type { + Expression expression = 1; + AggregateFunction measure = 2; + } + // Field names in depth-first order + repeated string output_names = 3; +} + +// Describe a set of operations to complete. +// For compactness sake, identifiers are normalized at the plan level. +message ExtendedExpression { + // Substrait version of the expression. Optional up to 0.17.0, required for later + // versions. + Version version = 7; + + // a list of yaml specifications this expression may depend on + repeated substrait.extensions.SimpleExtensionURI extension_uris = 1; + + // a list of extensions this expression may depend on + repeated substrait.extensions.SimpleExtensionDeclaration extensions = 2; + + // one or more expression trees with same order in plan rel + repeated ExpressionReference referred_expr = 3; + + NamedStruct base_schema = 4; + // additional extensions associated with this expression. + substrait.extensions.AdvancedExtension advanced_extensions = 5; + + // A list of com.google.Any entities that this plan may use. Can be used to + // warn if some embedded message types are unknown. Note that this list may + // include message types that are ignorable (optimizations) or that are + // unused. In many cases, a consumer may be able to work with a plan even if + // one or more message types defined here are unknown. + repeated string expected_type_urls = 6; +} diff --git a/site/docs/expressions/extended_expression.md b/site/docs/expressions/extended_expression.md new file mode 100644 index 000000000..4baa313ce --- /dev/null +++ b/site/docs/expressions/extended_expression.md @@ -0,0 +1,19 @@ +# Extended expression + +Extended expression is provided for expression level protocol instead of plan rels. It mainly targets for expression only evaluation, such as those computed in Filter/Project/Aggregation rels. Different from original expression defined in substrait protocol, it requires more information to completely describe the computation context, including input data schema, referred function signatures and output schema. + +Besides, as it will be used seperately with plan rel representation, it need include basic fields like Version. + +## Input and output data schema + +Similar as `base_schema` defined in [ReadRel](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto#L58), the input data schema tells name/type/nullibilty and layout info of input data for target expression evalutation. It also has a field `name` to define name of output data. + +## Referred expression + +It will has one or more referred expressions in this message and the referred expressions can be [Expression](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto) or [AggregateFunction](https://github.com/substrait-io/substrait/blob/7f272f13f22cd5f5842baea42bcf7961e6251881/proto/substrait/algebra.proto#L1170). More types of expression can be added for more scenarios. + +For multi expressions, user can translate them following same order as it occurs in original plan rel. But it does NOT require the consume side to handle it strictly in previous order. Only need to make sure columns in final output are organized in same order as defined in extended expression message. + +## Function extensions + +As in the expression message, functions are used by referring function anchor so the related extensions are needed to determine detailed function signature.