diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 5e71ac93c..bb51965b5 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -365,6 +365,10 @@ message Rel { ExtensionMultiRel extension_multi = 10; ExtensionLeafRel extension_leaf = 11; CrossRel cross = 12; + + //Physical relations + HashJoinRel hash_join = 13; + MergeJoinRel merge_join = 14; } } @@ -479,6 +483,60 @@ message WriteRel { } } +// The hash equijoin join operator will build a hash table out of the right input based on a set of join keys. +// It will then probe that hash table for incoming inputs, finding matches. +message HashJoinRel { + RelCommon common = 1; + Rel left = 2; + Rel right = 3; + repeated Expression.FieldReference left_keys = 4; + repeated Expression.FieldReference right_keys = 5; + Expression post_join_filter = 6; + + JoinType type = 7; + + enum JoinType { + JOIN_TYPE_UNSPECIFIED = 0; + JOIN_TYPE_INNER = 1; + JOIN_TYPE_OUTER = 2; + JOIN_TYPE_LEFT = 3; + JOIN_TYPE_RIGHT = 4; + JOIN_TYPE_LEFT_SEMI = 5; + JOIN_TYPE_RIGHT_SEMI = 6; + JOIN_TYPE_LEFT_ANTI = 7; + JOIN_TYPE_RIGHT_ANTI = 8; + } + + substrait.extensions.AdvancedExtension advanced_extension = 10; +} + +// The merge equijoin does a join by taking advantage of two sets that are sorted on the join keys. +// This allows the join operation to be done in a streaming fashion. +message MergeJoinRel { + RelCommon common = 1; + Rel left = 2; + Rel right = 3; + repeated Expression.FieldReference left_keys = 4; + repeated Expression.FieldReference right_keys = 5; + Expression post_join_filter = 6; + + JoinType type = 7; + + enum JoinType { + JOIN_TYPE_UNSPECIFIED = 0; + JOIN_TYPE_INNER = 1; + JOIN_TYPE_OUTER = 2; + JOIN_TYPE_LEFT = 3; + JOIN_TYPE_RIGHT = 4; + JOIN_TYPE_LEFT_SEMI = 5; + JOIN_TYPE_RIGHT_SEMI = 6; + JOIN_TYPE_LEFT_ANTI = 7; + JOIN_TYPE_RIGHT_ANTI = 8; + } + + substrait.extensions.AdvancedExtension advanced_extension = 10; +} + // The argument of a function message FunctionArgument { oneof arg_type { diff --git a/site/docs/relations/physical_relations.md b/site/docs/relations/physical_relations.md index cbd295df1..c3d8da102 100644 --- a/site/docs/relations/physical_relations.md +++ b/site/docs/relations/physical_relations.md @@ -17,14 +17,14 @@ The hash equijoin join operator will build a hash table out of the right input b ### Hash Equijoin Properties -| Property | Description | Required | -| ------------------- | ------------------------------------------------------------ | ------------------------ | -| Left Input | A relational input. | Required | -| Right Input | A relational input. | Required | -| Join Expression | A boolean condition that describes whether each record from the left set "match" the record from the right set. The condition must only include the following operations: AND, ==, field references, is not distinct from. Field references correspond to the direct output order of the data. | Required. | +| Property | Description | Required | +|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------| +| Left Input | A relational input.(Probe-side) | Required | +| Right Input | A relational input.(Build-side) | Required | +| Left Keys | References to the fields to join on in the left input. | Required | +| Right Keys | References to the fields to join on in the right input. | Required | | Post Join Predicate | An additional expression that can be used to reduce the output of the join operation post the equality condition. Minimizes the overhead of secondary join conditions that cannot be evaluated using the equijoin keys. | Optional, defaults true. | -| Join Type | One of the join types defined in the Join operator. | Required | - +| Join Type | One of the join types defined in the Join operator. | Required | ## NLJ Operator @@ -62,13 +62,14 @@ The merge equijoin does a join by taking advantage of two sets that are sorted o ### Merge Join Properties -| Property | Description | Required | -| ------------------- | ------------------------------------------------------------ | --------------------------------------------- | -| Left Input | A relational input. | Required | -| Right Input | A relational input. | Required | -| Join Expression | A boolean condition that describes whether each record from the left set "match" the record from the right set. The condition must only include the following operations: AND, ==, field references, is not distinct from. Field references correspond to the direct output order of the data. | Optional. Defaults to true (a Cartesian join). | -| Post Join Predicate | An additional expression that can be used to reduce the output of the join operation post the equality condition. Minimizes the overhead of secondary join conditions that cannot be evaluated using the equijoin keys. | Optional, defaults true. | -| Join Type | One of the join types defined in the Join operator. | Required | +| Property | Description | Required | +|---------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------| +| Left Input | A relational input. | Required | +| Right Input | A relational input. | Required | +| Left Keys | References to the fields to join on in the left input. | Required | +| Right Keys | References to the fields to join on in the right input. | Reauired | +| Post Join Predicate | An additional expression that can be used to reduce the output of the join operation post the equality condition. Minimizes the overhead of secondary join conditions that cannot be evaluated using the equijoin keys. | Optional, defaults true. | +| Join Type | One of the join types defined in the Join operator. | Required | ## Exchange Operator