Skip to content

Commit

Permalink
Merge branch 'apache:dev' into dev-fix-sink-multi-table-path
Browse files Browse the repository at this point in the history
  • Loading branch information
hailin0 authored Jul 10, 2024
2 parents 6676ed9 + 57e5627 commit 94c6ed7
Show file tree
Hide file tree
Showing 206 changed files with 13,967 additions and 2,375 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1052,7 +1052,7 @@ jobs:

kafka-connector-it:
needs: [ changes, sanity-check ]
if: needs.changes.outputs.api == 'true'
if: needs.changes.outputs.api == 'true' || contains(needs.changes.outputs.it-modules, 'connector-kafka-e2e')
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand All @@ -1068,15 +1068,14 @@ jobs:
distribution: 'temurin'
cache: 'maven'
- name: run kafka connector integration test
if: needs.changes.outputs.api == 'true'
run: |
./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-kafka-e2e -am -Pci
env:
MAVEN_OPTS: -Xmx4096m

rocketmq-connector-it:
needs: [ changes, sanity-check ]
if: needs.changes.outputs.api == 'true'
if: needs.changes.outputs.api == 'true' || contains(needs.changes.outputs.it-modules, 'connector-rocketmq-e2e')
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand All @@ -1092,7 +1091,6 @@ jobs:
distribution: 'temurin'
cache: 'maven'
- name: run rocket connector integration test
if: needs.changes.outputs.api == 'true'
run: |
./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-rocketmq-e2e -am -Pci
env:
Expand Down
1 change: 1 addition & 0 deletions .licenserc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,6 @@ header:
- 'seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/protocol/task/AbstractSeaTunnelMessageTask.java'
- 'seatunnel-engine/seatunnel-engine-common/src/main/java/org/apache/seatunnel/engine/common/utils/PassiveCompletableFuture.java'
- 'seatunnel-connectors-v2/connector-cdc/connector-cdc-postgres/src/main/java/io/debezium/connector/postgresql/connection/PostgresReplicationConnection.java'
- 'seatunnel-shade/seatunnel-hazelcast/seatunnel-hazelcast-shade/src/main/java/com/hazelcast/**'

comment: on-failure
4 changes: 3 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,9 @@ seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine
seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/StandaloneCheckpointIDCounter.java from https://github.com/apache/flink
seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/metrics from https://github.com/hazelcast/hazelcast
seatunnel-api/src/main/java/org/apache/seatunnel/api/common/metrics from https://github.com/hazelcast/hazelcast
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sqlengine/zeta/ZetaSQLEngine.java from https://github.com/JSQLParser/JSqlParser
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sqlengine/zeta/ZetaSQLEngine.java from https://github.com/JSQLParser/JSqlParser
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sqlengine/zeta/ZetaSQLType.java from https://github.com/JSQLParser/JSqlParser
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sqlengine/zeta/ZetaSQLFilter.java from https://github.com/JSQLParser/JSqlParser
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sqlengine/zeta/ZetaSQLFunction.java from https://github.com/JSQLParser/JSqlParser
seatunnel-shade/seatunnel-hazelcast/seatunnel-hazelcast-shade/src/main/java/com/hazelcast/** from https://github.com/hazelcast/hazelcast
seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/LiteNodeDropOutTcpIpJoiner.java from https://github.com/hazelcast/hazelcast
48 changes: 48 additions & 0 deletions config/hazelcast-master.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

hazelcast:
cluster-name: seatunnel
network:
rest-api:
enabled: true
endpoint-groups:
CLUSTER_WRITE:
enabled: true
DATA:
enabled: true
join:
tcp-ip:
enabled: true
member-list:
- localhost:5801
- localhost:5802
port:
auto-increment: false
port: 5801
properties:
hazelcast.invocation.max.retry.count: 20
hazelcast.tcp.join.port.try.count: 30
hazelcast.logging.type: log4j2
hazelcast.operation.generic.thread.count: 50
hazelcast.heartbeat.failuredetector.type: phi-accrual
hazelcast.heartbeat.interval.seconds: 2
hazelcast.max.no.heartbeat.seconds: 180
hazelcast.heartbeat.phiaccrual.failuredetector.threshold: 10
hazelcast.heartbeat.phiaccrual.failuredetector.sample.size: 200
hazelcast.heartbeat.phiaccrual.failuredetector.min.std.dev.millis: 100

41 changes: 41 additions & 0 deletions config/hazelcast-worker.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

hazelcast:
cluster-name: seatunnel
network:
join:
tcp-ip:
enabled: true
member-list:
- localhost:5801
- localhost:5802
port:
auto-increment: false
port: 5802
properties:
hazelcast.invocation.max.retry.count: 20
hazelcast.tcp.join.port.try.count: 30
hazelcast.logging.type: log4j2
hazelcast.operation.generic.thread.count: 50
hazelcast.heartbeat.failuredetector.type: phi-accrual
hazelcast.heartbeat.interval.seconds: 2
hazelcast.max.no.heartbeat.seconds: 180
hazelcast.heartbeat.phiaccrual.failuredetector.threshold: 10
hazelcast.heartbeat.phiaccrual.failuredetector.sample.size: 200
hazelcast.heartbeat.phiaccrual.failuredetector.min.std.dev.millis: 100

6 changes: 6 additions & 0 deletions config/hazelcast.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,10 @@ hazelcast:
hazelcast.tcp.join.port.try.count: 30
hazelcast.logging.type: log4j2
hazelcast.operation.generic.thread.count: 50
hazelcast.heartbeat.failuredetector.type: phi-accrual
hazelcast.heartbeat.interval.seconds: 2
hazelcast.max.no.heartbeat.seconds: 180
hazelcast.heartbeat.phiaccrual.failuredetector.threshold: 10
hazelcast.heartbeat.phiaccrual.failuredetector.sample.size: 200
hazelcast.heartbeat.phiaccrual.failuredetector.min.std.dev.millis: 100

30 changes: 30 additions & 0 deletions config/jvm_master_options
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# JVM Heap
-Xms2g
-Xmx2g

# JVM Dump
-XX:+HeapDumpOnOutOfMemoryError
-XX:HeapDumpPath=/tmp/seatunnel/dump/zeta-server

# Metaspace
-XX:MaxMetaspaceSize=2g

# G1GC
-XX:+UseG1GC
30 changes: 30 additions & 0 deletions config/jvm_worker_options
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# JVM Heap
-Xms2g
-Xmx2g

# JVM Dump
-XX:+HeapDumpOnOutOfMemoryError
-XX:HeapDumpPath=/tmp/seatunnel/dump/zeta-server

# Metaspace
-XX:MaxMetaspaceSize=2g

# G1GC
-XX:+UseG1GC
1 change: 0 additions & 1 deletion docs/en/Connector-v2-release-state.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ SeaTunnel uses a grading system for connectors to help you understand what to ex
| [Hive](connector-v2/source/Hive.md) | Source | GA | 2.2.0-beta |
| [Http](connector-v2/sink/Http.md) | Sink | Beta | 2.2.0-beta |
| [Http](connector-v2/source/Http.md) | Source | Beta | 2.2.0-beta |
| [Hudi](connector-v2/source/Hudi.md) | Source | Beta | 2.2.0-beta |
| [Iceberg](connector-v2/source/Iceberg.md) | Source | Beta | 2.2.0-beta |
| [InfluxDB](connector-v2/sink/InfluxDB.md) | Sink | Beta | 2.3.0 |
| [InfluxDB](connector-v2/source/InfluxDB.md) | Source | Beta | 2.3.0-beta |
Expand Down
4 changes: 4 additions & 0 deletions docs/en/concept/JobEnvConfig.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ This parameter configures the parallelism of source and sink.

Used to control the default retry times when a job fails. The default value is 3, and it only works in the Zeta engine.

### job.retry.interval.seconds

Used to control the default retry interval when a job fails. The default value is 3 seconds, and it only works in the Zeta engine.

### savemode.execute.location

This parameter is used to specify the location of the savemode when the job is executed in the Zeta engine.
Expand Down
1 change: 1 addition & 0 deletions docs/en/concept/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ Some Notes:
- quota with `'` if the value has special character (like `(`)
- if the replacement variables is in `"` or `'`, like `resName` and `nameVal`, you need add `"`
- the value can't have space `' '`, like `-i jobName='this is a job name' `, this will be replaced to `job.name = "this"`
- If you want to use dynamic parameters,you can use the following format: -i date=$(date +"%Y%m%d").

## What's More

Expand Down
2 changes: 1 addition & 1 deletion docs/en/connector-v2/formats/avro.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ source {
bootstrap.servers = "kafkaCluster:9092"
topic = "test_avro_topic"
result_table_name = "kafka_table"
kafka.auto.offset.reset = "earliest"
start_mode = "earliest"
format = avro
format_error_handle_way = skip
schema = {
Expand Down
13 changes: 13 additions & 0 deletions docs/en/connector-v2/sink/Hbase.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,20 @@ Hbase {
all_columns = seatunnel
}
}
```

## Writes To The Specified Column Family

```hocon
Hbase {
zookeeper_quorum = "hbase_e2e:2181"
table = "assign_cf_table"
rowkey_column = ["id"]
family_name {
c_double = "cf1"
c_bigint = "cf2"
}
}
```

## Changelog
Expand Down
98 changes: 98 additions & 0 deletions docs/en/connector-v2/sink/Hudi.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Hudi

> Hudi sink connector
## Description

Used to write data to Hudi.

## Key features

- [x] [exactly-once](../../concept/connector-v2-features.md)
- [x] [cdc](../../concept/connector-v2-features.md)

## Options

| name | type | required | default value |
|----------------------------|--------|----------|---------------|
| table_name | string | yes | - |
| table_dfs_path | string | yes | - |
| conf_files_path | string | no | - |
| record_key_fields | string | no | - |
| partition_fields | string | no | - |
| table_type | enum | no | copy_on_write |
| op_type | enum | no | insert |
| batch_interval_ms | Int | no | 1000 |
| insert_shuffle_parallelism | Int | no | 2 |
| upsert_shuffle_parallelism | Int | no | 2 |
| min_commits_to_keep | Int | no | 20 |
| max_commits_to_keep | Int | no | 30 |
| common-options | config | no | - |

### table_name [string]

`table_name` The name of hudi table.

### table_dfs_path [string]

`table_dfs_path` The dfs root path of hudi table,such as 'hdfs://nameserivce/data/hudi/hudi_table/'.

### table_type [enum]

`table_type` The type of hudi table. The value is 'copy_on_write' or 'merge_on_read'.

### conf_files_path [string]

`conf_files_path` The environment conf file path list(local path), which used to init hdfs client to read hudi table file. The example is '/home/test/hdfs-site.xml;/home/test/core-site.xml;/home/test/yarn-site.xml'.

### op_type [enum]

`op_type` The operation type of hudi table. The value is 'insert' or 'upsert' or 'bulk_insert'.

### batch_interval_ms [Int]

`batch_interval_ms` The interval time of batch write to hudi table.

### insert_shuffle_parallelism [Int]

`insert_shuffle_parallelism` The parallelism of insert data to hudi table.

### upsert_shuffle_parallelism [Int]

`upsert_shuffle_parallelism` The parallelism of upsert data to hudi table.

### min_commits_to_keep [Int]

`min_commits_to_keep` The min commits to keep of hudi table.

### max_commits_to_keep [Int]

`max_commits_to_keep` The max commits to keep of hudi table.

### common options

Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details.

## Examples

```hocon
source {
Hudi {
table_dfs_path = "hdfs://nameserivce/data/hudi/hudi_table/"
table_type = "copy_on_write"
conf_files_path = "/home/test/hdfs-site.xml;/home/test/core-site.xml;/home/test/yarn-site.xml"
use.kerberos = true
kerberos.principal = "test_user@xxx"
kerberos.principal.file = "/home/test/test_user.keytab"
}
}
```

## Changelog

### 2.2.0-beta 2022-09-26

- Add Hudi Source Connector

Loading

0 comments on commit 94c6ed7

Please sign in to comment.