Skip to content

Commit

Permalink
Merge branch 'main' into extend_expression
Browse files Browse the repository at this point in the history
  • Loading branch information
yma11 authored Jan 18, 2023
2 parents f541a92 + 431651e commit 9ed2cdd
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 14 deletions.
58 changes: 58 additions & 0 deletions proto/substrait/algebra.proto
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,10 @@ message Rel {
ExtensionMultiRel extension_multi = 10;
ExtensionLeafRel extension_leaf = 11;
CrossRel cross = 12;

//Physical relations
HashJoinRel hash_join = 13;
MergeJoinRel merge_join = 14;
}
}

Expand Down Expand Up @@ -479,6 +483,60 @@ message WriteRel {
}
}

// The hash equijoin join operator will build a hash table out of the right input based on a set of join keys.
// It will then probe that hash table for incoming inputs, finding matches.
message HashJoinRel {
RelCommon common = 1;
Rel left = 2;
Rel right = 3;
repeated Expression.FieldReference left_keys = 4;
repeated Expression.FieldReference right_keys = 5;
Expression post_join_filter = 6;

JoinType type = 7;

enum JoinType {
JOIN_TYPE_UNSPECIFIED = 0;
JOIN_TYPE_INNER = 1;
JOIN_TYPE_OUTER = 2;
JOIN_TYPE_LEFT = 3;
JOIN_TYPE_RIGHT = 4;
JOIN_TYPE_LEFT_SEMI = 5;
JOIN_TYPE_RIGHT_SEMI = 6;
JOIN_TYPE_LEFT_ANTI = 7;
JOIN_TYPE_RIGHT_ANTI = 8;
}

substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// The merge equijoin does a join by taking advantage of two sets that are sorted on the join keys.
// This allows the join operation to be done in a streaming fashion.
message MergeJoinRel {
RelCommon common = 1;
Rel left = 2;
Rel right = 3;
repeated Expression.FieldReference left_keys = 4;
repeated Expression.FieldReference right_keys = 5;
Expression post_join_filter = 6;

JoinType type = 7;

enum JoinType {
JOIN_TYPE_UNSPECIFIED = 0;
JOIN_TYPE_INNER = 1;
JOIN_TYPE_OUTER = 2;
JOIN_TYPE_LEFT = 3;
JOIN_TYPE_RIGHT = 4;
JOIN_TYPE_LEFT_SEMI = 5;
JOIN_TYPE_RIGHT_SEMI = 6;
JOIN_TYPE_LEFT_ANTI = 7;
JOIN_TYPE_RIGHT_ANTI = 8;
}

substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// The argument of a function
message FunctionArgument {
oneof arg_type {
Expand Down
29 changes: 15 additions & 14 deletions site/docs/relations/physical_relations.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ The hash equijoin join operator will build a hash table out of the right input b

### Hash Equijoin Properties

| Property | Description | Required |
| ------------------- | ------------------------------------------------------------ | ------------------------ |
| Left Input | A relational input. | Required |
| Right Input | A relational input. | Required |
| Join Expression | A boolean condition that describes whether each record from the left set "match" the record from the right set. The condition must only include the following operations: AND, ==, field references, is not distinct from. Field references correspond to the direct output order of the data. | Required. |
| Property | Description | Required |
|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|
| Left Input | A relational input.(Probe-side) | Required |
| Right Input | A relational input.(Build-side) | Required |
| Left Keys | References to the fields to join on in the left input. | Required |
| Right Keys | References to the fields to join on in the right input. | Required |
| Post Join Predicate | An additional expression that can be used to reduce the output of the join operation post the equality condition. Minimizes the overhead of secondary join conditions that cannot be evaluated using the equijoin keys. | Optional, defaults true. |
| Join Type | One of the join types defined in the Join operator. | Required |

| Join Type | One of the join types defined in the Join operator. | Required |


## NLJ Operator
Expand Down Expand Up @@ -62,13 +62,14 @@ The merge equijoin does a join by taking advantage of two sets that are sorted o

### Merge Join Properties

| Property | Description | Required |
| ------------------- | ------------------------------------------------------------ | --------------------------------------------- |
| Left Input | A relational input. | Required |
| Right Input | A relational input. | Required |
| Join Expression | A boolean condition that describes whether each record from the left set "match" the record from the right set. The condition must only include the following operations: AND, ==, field references, is not distinct from. Field references correspond to the direct output order of the data. | Optional. Defaults to true (a Cartesian join). |
| Post Join Predicate | An additional expression that can be used to reduce the output of the join operation post the equality condition. Minimizes the overhead of secondary join conditions that cannot be evaluated using the equijoin keys. | Optional, defaults true. |
| Join Type | One of the join types defined in the Join operator. | Required |
| Property | Description | Required |
|---------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------|
| Left Input | A relational input. | Required |
| Right Input | A relational input. | Required |
| Left Keys | References to the fields to join on in the left input. | Required |
| Right Keys | References to the fields to join on in the right input. | Reauired |
| Post Join Predicate | An additional expression that can be used to reduce the output of the join operation post the equality condition. Minimizes the overhead of secondary join conditions that cannot be evaluated using the equijoin keys. | Optional, defaults true. |
| Join Type | One of the join types defined in the Join operator. | Required |

## Exchange Operator

Expand Down

0 comments on commit 9ed2cdd

Please sign in to comment.