From 099c896deda85b7b692f8888573f361de1ea1bf8 Mon Sep 17 00:00:00 2001 From: Iryna Grankova <87977540+igrankova@users.noreply.github.com> Date: Thu, 30 Dec 2021 16:21:35 +0200 Subject: [PATCH 001/215] Update fields in destination-connectors specifications: gcs, jdbc, kafka, keen, kinesis, kvdb, local-json, mariadb-columnstore, mongodb (#8809) * Files title/description update for issue # 8747 Co-authored-by: Serhii Chvaliuk --- .../seed/destination_definitions.yaml | 14 +- .../resources/seed/destination_specs.yaml | 125 ++++++++++-------- .../connectors/destination-gcs/Dockerfile | 2 +- .../src/main/resources/spec.json | 16 +-- .../connectors/destination-jdbc/Dockerfile | 2 +- .../src/main/resources/spec.json | 9 +- .../connectors/destination-kafka/Dockerfile | 2 +- .../src/main/resources/spec.json | 48 +++---- .../connectors/destination-keen/Dockerfile | 2 +- .../src/main/resources/spec.json | 7 +- .../connectors/destination-kinesis/Dockerfile | 2 +- .../src/main/resources/spec.json | 16 +-- .../destination_kvdb/spec.json | 6 +- .../destination-local-json/Dockerfile | 2 +- .../src/main/resources/spec.json | 1 + .../Dockerfile | 2 +- .../src/main/resources/spec.json | 8 +- .../connectors/destination-mongodb/Dockerfile | 2 +- .../src/main/resources/spec.json | 8 +- docs/integrations/destinations/gcs.md | 1 + docs/integrations/destinations/kafka.md | 1 + docs/integrations/destinations/keen.md | 1 + .../destinations/mariadb-columnstore.md | 1 + docs/integrations/destinations/mongodb.md | 1 + 24 files changed, 149 insertions(+), 130 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index eaae8e1430c88..720a4e043b0f3 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -31,7 +31,7 @@ - name: Chargify (Keen) destinationDefinitionId: 81740ce8-d764-4ea7-94df-16bb41de36ae dockerRepository: airbyte/destination-keen - dockerImageTag: 0.2.0 + dockerImageTag: 0.2.1 documentationUrl: https://docs.airbyte.io/integrations/destinations/keen icon: chargify.svg - name: Clickhouse @@ -60,7 +60,7 @@ - name: Google Cloud Storage (GCS) destinationDefinitionId: ca8f6566-e555-4b40-943a-545bf123117a dockerRepository: airbyte/destination-gcs - dockerImageTag: 0.1.17 + dockerImageTag: 0.1.18 documentationUrl: https://docs.airbyte.io/integrations/destinations/gcs icon: googlecloudstorage.svg - name: Google PubSub @@ -72,13 +72,13 @@ - name: Kafka destinationDefinitionId: 9f760101-60ae-462f-9ee6-b7a9dafd454d dockerRepository: airbyte/destination-kafka - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://docs.airbyte.io/integrations/destinations/kafka icon: kafka.svg - name: Kinesis destinationDefinitionId: 6d1d66d4-26ab-4602-8d32-f85894b04955 dockerRepository: airbyte/destination-kinesis - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.1 documentationUrl: https://docs.airbyte.io/integrations/destinations/kinesis icon: kinesis.svg - name: Local CSV @@ -90,7 +90,7 @@ - name: Local JSON destinationDefinitionId: a625d593-bba5-4a1c-a53d-2d246268a816 dockerRepository: airbyte/destination-local-json - dockerImageTag: 0.2.8 + dockerImageTag: 0.2.9 documentationUrl: https://docs.airbyte.io/integrations/destinations/local-json icon: file.svg - name: MQTT @@ -114,7 +114,7 @@ - name: MongoDB destinationDefinitionId: 8b746512-8c2e-6ac1-4adc-b59faafd473c dockerRepository: airbyte/destination-mongodb - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://docs.airbyte.io/integrations/destinations/mongodb icon: mongodb.svg - name: MySQL @@ -185,6 +185,6 @@ - name: MariaDB ColumnStore destinationDefinitionId: 294a4790-429b-40ae-9516-49826b9702e1 dockerRepository: airbyte/destination-mariadb-columnstore - dockerImageTag: 0.1.1 + dockerImageTag: 0.1.2 documentationUrl: https://docs.airbyte.io/integrations/destinations/mariadb-columnstore icon: mariadb.svg diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 938177c9485ae..311d747898828 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -607,7 +607,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-keen:0.2.0" +- dockerImage: "airbyte/destination-keen:0.2.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/keen" connectionSpecification: @@ -620,13 +620,16 @@ additionalProperties: false properties: project_id: - description: "Keen Project ID" + description: "To get Keen Project ID, navigate to the Access tab from the\ + \ left-hand, side panel and check the Project Details section." + title: "Project ID" type: "string" examples: - "58b4acc22ba938934e888322e" api_key: title: "API Key" - description: "Keen Master API key" + description: "To get Keen Master API Key, navigate to the Access tab from\ + \ the left-hand, side panel and check the Project Details section." type: "string" examples: - "ABCDEFGHIJKLMNOPRSTUWXYZ" @@ -634,7 +637,7 @@ infer_timestamp: title: "Infer Timestamp" description: "Allow connector to guess keen.timestamp value based on the\ - \ streamed data" + \ streamed data." type: "boolean" default: true supportsIncremental: true @@ -1128,7 +1131,7 @@ - "overwrite" - "append" supportsNamespaces: true -- dockerImage: "airbyte/destination-gcs:0.1.17" +- dockerImage: "airbyte/destination-gcs:0.1.18" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/gcs" connectionSpecification: @@ -1146,11 +1149,13 @@ gcs_bucket_name: title: "GCS Bucket Name" type: "string" - description: "The name of the GCS bucket." + description: "You can find the bucket name in the App Engine Admin console\ + \ Application Settings page, under the label Google Cloud Storage Bucket." examples: - "airbyte_sync" gcs_bucket_path: - description: "Directory under the GCS bucket where data will be written." + description: "GCS Bucket Path string Subdirectory under the above bucket\ + \ to sync the data into." type: "string" examples: - "data_sync/test" @@ -1158,7 +1163,7 @@ title: "GCS Bucket Region" type: "string" default: "" - description: "The region of the GCS bucket." + description: "Select a Region of the GCS Bucket." enum: - "" - "-- North America --" @@ -1207,7 +1212,7 @@ title: "Credential" type: "object" oneOf: - - title: "HMAC key" + - title: "HMAC Key" required: - "credential_type" - "hmac_key_access_id" @@ -1256,7 +1261,7 @@ \ to no compression." type: "object" oneOf: - - title: "no compression" + - title: "No Compression" required: - "codec" properties: @@ -1303,7 +1308,7 @@ - "xz" default: "xz" compression_level: - title: "Compression level" + title: "Compression Level" description: "See here for details." type: "integer" @@ -1321,7 +1326,7 @@ - "zstandard" default: "zstandard" compression_level: - title: "Compression level" + title: "Compression Level" description: "Negative levels are 'fast' modes akin to lz4 or\ \ snappy, levels above 9 are generally for archival purposes,\ \ and levels above 18 use a lot of memory." @@ -1330,7 +1335,7 @@ minimum: -5 maximum: 22 include_checksum: - title: "Include checksum" + title: "Include Checksum" description: "If true, include a checksum with each data block." type: "boolean" default: false @@ -1505,7 +1510,7 @@ supportsDBT: false supported_destination_sync_modes: - "append" -- dockerImage: "airbyte/destination-kafka:0.1.2" +- dockerImage: "airbyte/destination-kafka:0.1.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/kafka" connectionSpecification: @@ -1536,7 +1541,7 @@ additionalProperties: true properties: bootstrap_servers: - title: "Bootstrap servers" + title: "Bootstrap Servers" description: "A list of host/port pairs to use for establishing the initial\ \ connection to the Kafka cluster. The client will make use of all servers\ \ irrespective of which servers are specified here for bootstrapping—this\ @@ -1550,7 +1555,7 @@ examples: - "kafka-broker1:9092,kafka-broker2:9092" topic_pattern: - title: "Topic pattern" + title: "Topic Pattern" description: "Topic pattern in which the records will be sent. You can use\ \ patterns like '{namespace}' and/or '{stream}' to send the message to\ \ a specific topic based on these values. Notice that the topic name will\ @@ -1560,13 +1565,13 @@ - "sample.topic" - "{namespace}.{stream}.sample" test_topic: - title: "Test topic" + title: "Test Topic" description: "Topic to test if Airbyte can produce messages." type: "string" examples: - "test.topic" sync_producer: - title: "Sync producer" + title: "Sync Producer" description: "Wait synchronously until the record has been sent to Kafka." type: "boolean" default: false @@ -1596,7 +1601,7 @@ - "SASL_PLAINTEXT" default: "SASL_PLAINTEXT" sasl_mechanism: - title: "SASL mechanism" + title: "SASL Mechanism" description: "SASL mechanism used for client connections. This may\ \ be any mechanism for which a security provider is available." type: "string" @@ -1604,7 +1609,7 @@ enum: - "PLAIN" sasl_jaas_config: - title: "SASL JAAS config" + title: "SASL JAAS Config" description: "JAAS login context parameters for SASL connections in\ \ the format used by JAAS configuration files." type: "string" @@ -1622,7 +1627,7 @@ - "SASL_SSL" default: "SASL_SSL" sasl_mechanism: - title: "SASL mechanism" + title: "SASL Mechanism" description: "SASL mechanism used for client connections. This may\ \ be any mechanism for which a security provider is available." type: "string" @@ -1632,7 +1637,7 @@ - "OAUTHBEARER" - "SCRAM-SHA-256" sasl_jaas_config: - title: "SASL JAAS config" + title: "SASL JAAS Config" description: "JAAS login context parameters for SASL connections in\ \ the format used by JAAS configuration files." type: "string" @@ -1640,7 +1645,7 @@ airbyte_secret: true client_id: title: "Client ID" - description: "An id string to pass to the server when making requests. The\ + description: "An ID string to pass to the server when making requests. The\ \ purpose of this is to be able to track the source of requests beyond\ \ just ip/port by allowing a logical application name to be included in\ \ server-side request logging." @@ -1659,7 +1664,7 @@ - "1" - "all" enable_idempotence: - title: "Enable idempotence" + title: "Enable Idempotence" description: "When set to 'true', the producer will ensure that exactly\ \ one copy of each message is written in the stream. If 'false', producer\ \ retries due to broker failures, etc., may write duplicates of the retried\ @@ -1667,7 +1672,7 @@ type: "boolean" default: false compression_type: - title: "Compression type" + title: "Compression Type" description: "The compression type for all data generated by the producer." type: "string" default: "none" @@ -1678,7 +1683,7 @@ - "lz4" - "zstd" batch_size: - title: "Batch size" + title: "Batch Size" description: "The producer will attempt to batch records together into fewer\ \ requests whenever multiple records are being sent to the same partition." type: "integer" @@ -1692,14 +1697,15 @@ examples: - 0 max_in_flight_requests_per_connection: - title: "Max in flight requests per connection" + title: "Max in Flight Requests per Connection" description: "The maximum number of unacknowledged requests the client will\ - \ send on a single connection before blocking." + \ send on a single connection before blocking. Can be greater than 1,\ + \ and the maximum value supported with idempotency is 5." type: "integer" examples: - 5 client_dns_lookup: - title: "Client DNS lookup" + title: "Client DNS Lookup" description: "Controls how the client uses DNS lookups. If set to use_all_dns_ips,\ \ connect to each returned IP address in sequence until a successful connection\ \ is established. After a disconnection, the next IP is used. Once all\ @@ -1717,13 +1723,13 @@ - "resolve_canonical_bootstrap_servers_only" - "use_all_dns_ips" buffer_memory: - title: "Buffer memory" + title: "Buffer Memory" description: "The total bytes of memory the producer can use to buffer records\ \ waiting to be sent to the server." type: "string" examples: 33554432 max_request_size: - title: "Max request size" + title: "Max Request Size" description: "The maximum size of a request in bytes." type: "integer" examples: @@ -1736,14 +1742,14 @@ examples: - 2147483647 socket_connection_setup_timeout_ms: - title: "Socket connection setup timeout" + title: "Socket Connection Setup Timeout" description: "The amount of time the client will wait for the socket connection\ \ to be established." type: "string" examples: - 10000 socket_connection_setup_timeout_max_ms: - title: "Socket connection setup max timeout" + title: "Socket Connection Setup Max Timeout" description: "The maximum amount of time the client will wait for the socket\ \ connection to be established. The connection setup timeout will increase\ \ exponentially for each consecutive connection failure up to this maximum." @@ -1751,7 +1757,7 @@ examples: - 30000 max_block_ms: - title: "Max block ms" + title: "Max Block ms" description: "The configuration controls how long the KafkaProducer's send(),\ \ partitionsFor(), initTransactions(), sendOffsetsToTransaction(), commitTransaction()\ \ and abortTransaction() methods will block." @@ -1759,7 +1765,7 @@ examples: - 60000 request_timeout_ms: - title: "Request timeout" + title: "Request Timeout" description: "The configuration controls the maximum amount of time the\ \ client will wait for the response of a request. If the response is not\ \ received before the timeout elapses the client will resend the request\ @@ -1768,21 +1774,21 @@ examples: - 30000 delivery_timeout_ms: - title: "Delivery timeout" + title: "Delivery Timeout" description: "An upper bound on the time to report success or failure after\ \ a call to 'send()' returns." type: "integer" examples: - 120000 send_buffer_bytes: - title: "Send buffer bytes" + title: "Send Buffer bytes" description: "The size of the TCP send buffer (SO_SNDBUF) to use when sending\ \ data. If the value is -1, the OS default will be used." type: "integer" examples: - 131072 receive_buffer_bytes: - title: "Receive buffer bytes" + title: "Receive Buffer bytes" description: "The size of the TCP receive buffer (SO_RCVBUF) to use when\ \ reading data. If the value is -1, the OS default will be used." type: "integer" @@ -1793,7 +1799,7 @@ supportsDBT: false supported_destination_sync_modes: - "append" -- dockerImage: "airbyte/destination-kinesis:0.1.0" +- dockerImage: "airbyte/destination-kinesis:0.1.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/kinesis" connectionSpecification: @@ -1809,34 +1815,36 @@ properties: endpoint: title: "Endpoint" - description: "Aws Kinesis endpoint." + description: "AWS Kinesis endpoint." type: "string" order: 0 region: title: "Region" - description: "Aws region." + description: "AWS region. Your account determines the Regions that are available\ + \ to you." type: "string" order: 1 shardCount: - title: "shardCount" + title: "Shard Count" description: "Number of shards to which the data should be streamed." type: "integer" default: 5 order: 2 accessKey: - title: "accessKey" - description: "Aws access key." + title: "Access Key" + description: "Generate the AWS Access Key for current user." airbyte_secret: true type: "string" order: 3 privateKey: - title: "privateKey" - description: "Aws private key." + title: "Private Key" + description: "The AWS Private Key - a string of numbers and letters that\ + \ are unique for each account, also known as a \"recovery phrase\"." airbyte_secret: true type: "string" order: 4 bufferSize: - title: "bufferSize" + title: "Buffer Size" description: "Buffer size for storing kinesis records before being batch\ \ streamed." type: "integer" @@ -1875,7 +1883,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-local-json:0.2.8" +- dockerImage: "airbyte/destination-local-json:0.2.9" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/local-json" connectionSpecification: @@ -1891,6 +1899,7 @@ \ files will be placed inside that local mount. For more information check\ \ out our docs" + title: "Destination Path" type: "string" examples: - "/json_data" @@ -2250,7 +2259,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-mongodb:0.1.2" +- dockerImage: "airbyte/destination-mongodb:0.1.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/mongodb" connectionSpecification: @@ -2265,7 +2274,7 @@ instance_type: description: "MongoDb instance to connect to. For MongoDB Atlas and Replica\ \ Set TLS connection is used by default." - title: "MongoDb instance type" + title: "MongoDb Instance Type" type: "object" order: 0 oneOf: @@ -2283,12 +2292,12 @@ host: title: "Host" type: "string" - description: "Host of a Mongo database to be replicated." + description: "The Host of a Mongo database to be replicated." order: 0 port: title: "Port" type: "integer" - description: "Port of a Mongo database to be replicated." + description: "The Port of a Mongo database to be replicated." minimum: 0 maximum: 65536 default: 27017 @@ -2296,7 +2305,7 @@ - "27017" order: 1 tls: - title: "TLS connection" + title: "TLS Connection" type: "boolean" description: "Indicates whether TLS encryption protocol will be used\ \ to connect to MongoDB. It is recommended to use TLS connection\ @@ -3987,7 +3996,7 @@ - "overwrite" - "append" - "append_dedup" -- dockerImage: "airbyte/destination-mariadb-columnstore:0.1.1" +- dockerImage: "airbyte/destination-mariadb-columnstore:0.1.2" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/mariadb-columnstore" connectionSpecification: @@ -4003,12 +4012,12 @@ properties: host: title: "Host" - description: "Hostname of the database." + description: "The Hostname of the database." type: "string" order: 0 port: title: "Port" - description: "Port of the database." + description: "The Port of the database." type: "integer" minimum: 0 maximum: 65536 @@ -4023,12 +4032,12 @@ order: 2 username: title: "Username" - description: "Username to use to access the database." + description: "The Username which is used to access the database." type: "string" order: 3 password: title: "Password" - description: "Password associated with the username." + description: "The Password associated with the username." type: "string" airbyte_secret: true order: 4 diff --git a/airbyte-integrations/connectors/destination-gcs/Dockerfile b/airbyte-integrations/connectors/destination-gcs/Dockerfile index 127870936e94b..f92f19ab11915 100644 --- a/airbyte-integrations/connectors/destination-gcs/Dockerfile +++ b/airbyte-integrations/connectors/destination-gcs/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-gcs COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.17 +LABEL io.airbyte.version=0.1.18 LABEL io.airbyte.name=airbyte/destination-gcs diff --git a/airbyte-integrations/connectors/destination-gcs/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-gcs/src/main/resources/spec.json index 2da97717b99f7..f88f3961dcd5d 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-gcs/src/main/resources/spec.json @@ -21,11 +21,11 @@ "gcs_bucket_name": { "title": "GCS Bucket Name", "type": "string", - "description": "The name of the GCS bucket.", + "description": "You can find the bucket name in the App Engine Admin console Application Settings page, under the label Google Cloud Storage Bucket.", "examples": ["airbyte_sync"] }, "gcs_bucket_path": { - "description": "Directory under the GCS bucket where data will be written.", + "description": "GCS Bucket Path string Subdirectory under the above bucket to sync the data into.", "type": "string", "examples": ["data_sync/test"] }, @@ -33,7 +33,7 @@ "title": "GCS Bucket Region", "type": "string", "default": "", - "description": "The region of the GCS bucket.", + "description": "Select a Region of the GCS Bucket.", "enum": [ "", "-- North America --", @@ -85,7 +85,7 @@ "type": "object", "oneOf": [ { - "title": "HMAC key", + "title": "HMAC Key", "required": [ "credential_type", "hmac_key_access_id", @@ -135,7 +135,7 @@ "type": "object", "oneOf": [ { - "title": "no compression", + "title": "No Compression", "required": ["codec"], "properties": { "codec": { @@ -185,7 +185,7 @@ "default": "xz" }, "compression_level": { - "title": "Compression level", + "title": "Compression Level", "description": "See here for details.", "type": "integer", "default": 6, @@ -204,7 +204,7 @@ "default": "zstandard" }, "compression_level": { - "title": "Compression level", + "title": "Compression Level", "description": "Negative levels are 'fast' modes akin to lz4 or snappy, levels above 9 are generally for archival purposes, and levels above 18 use a lot of memory.", "type": "integer", "default": 3, @@ -212,7 +212,7 @@ "maximum": 22 }, "include_checksum": { - "title": "Include checksum", + "title": "Include Checksum", "description": "If true, include a checksum with each data block.", "type": "boolean", "default": false diff --git a/airbyte-integrations/connectors/destination-jdbc/Dockerfile b/airbyte-integrations/connectors/destination-jdbc/Dockerfile index ef697fbe05ad6..ddf5ca5aa0f33 100644 --- a/airbyte-integrations/connectors/destination-jdbc/Dockerfile +++ b/airbyte-integrations/connectors/destination-jdbc/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-jdbc COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.3.7 +LABEL io.airbyte.version=0.3.8 LABEL io.airbyte.name=airbyte/destination-jdbc diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-jdbc/src/main/resources/spec.json index d411fd5476171..d3b352bc5146e 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/resources/spec.json @@ -12,20 +12,23 @@ "additionalProperties": true, "properties": { "username": { - "description": "Username to use to access the database.", + "description": "The username which is used to access the database.", + "title": "Username", "type": "string" }, "password": { - "description": "Password associated with the username.", + "description": "The password associated with this username.", + "title": "Password", "type": "string", "airbyte_secret": true }, "jdbc_url": { "description": "JDBC formatted url. See the standard here.", + "title": "JDBC URL", "type": "string" }, "schema": { - "description": "The default schema tables are written to if the source does not specify a namespace. The usual value for this field is \"public\".", + "description": "If you leave the schema unspecified, JDBC defaults to a schema named \"public\".", "type": "string", "examples": ["public"], "default": "public", diff --git a/airbyte-integrations/connectors/destination-kafka/Dockerfile b/airbyte-integrations/connectors/destination-kafka/Dockerfile index 700fdee0e16d5..e82d36c9fa398 100644 --- a/airbyte-integrations/connectors/destination-kafka/Dockerfile +++ b/airbyte-integrations/connectors/destination-kafka/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-kafka COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/destination-kafka diff --git a/airbyte-integrations/connectors/destination-kafka/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-kafka/src/main/resources/spec.json index 2eb35a5b22cd9..0b77d659f55d8 100644 --- a/airbyte-integrations/connectors/destination-kafka/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-kafka/src/main/resources/spec.json @@ -33,25 +33,25 @@ "additionalProperties": true, "properties": { "bootstrap_servers": { - "title": "Bootstrap servers", + "title": "Bootstrap Servers", "description": "A list of host/port pairs to use for establishing the initial connection to the Kafka cluster. The client will make use of all servers irrespective of which servers are specified here for bootstrapping—this list only impacts the initial hosts used to discover the full set of servers. This list should be in the form host1:port1,host2:port2,.... Since these servers are just used for the initial connection to discover the full cluster membership (which may change dynamically), this list need not contain the full set of servers (you may want more than one, though, in case a server is down).", "type": "string", "examples": ["kafka-broker1:9092,kafka-broker2:9092"] }, "topic_pattern": { - "title": "Topic pattern", + "title": "Topic Pattern", "description": "Topic pattern in which the records will be sent. You can use patterns like '{namespace}' and/or '{stream}' to send the message to a specific topic based on these values. Notice that the topic name will be transformed to a standard naming convention.", "type": "string", "examples": ["sample.topic", "{namespace}.{stream}.sample"] }, "test_topic": { - "title": "Test topic", + "title": "Test Topic", "description": "Topic to test if Airbyte can produce messages.", "type": "string", "examples": ["test.topic"] }, "sync_producer": { - "title": "Sync producer", + "title": "Sync Producer", "description": "Wait synchronously until the record has been sent to Kafka.", "type": "boolean", "default": false @@ -86,14 +86,14 @@ "default": "SASL_PLAINTEXT" }, "sasl_mechanism": { - "title": "SASL mechanism", + "title": "SASL Mechanism", "description": "SASL mechanism used for client connections. This may be any mechanism for which a security provider is available.", "type": "string", "default": "PLAIN", "enum": ["PLAIN"] }, "sasl_jaas_config": { - "title": "SASL JAAS config", + "title": "SASL JAAS Config", "description": "JAAS login context parameters for SASL connections in the format used by JAAS configuration files.", "type": "string", "default": "", @@ -115,14 +115,14 @@ "default": "SASL_SSL" }, "sasl_mechanism": { - "title": "SASL mechanism", + "title": "SASL Mechanism", "description": "SASL mechanism used for client connections. This may be any mechanism for which a security provider is available.", "type": "string", "default": "GSSAPI", "enum": ["GSSAPI", "OAUTHBEARER", "SCRAM-SHA-256"] }, "sasl_jaas_config": { - "title": "SASL JAAS config", + "title": "SASL JAAS Config", "description": "JAAS login context parameters for SASL connections in the format used by JAAS configuration files.", "type": "string", "default": "", @@ -134,7 +134,7 @@ }, "client_id": { "title": "Client ID", - "description": "An id string to pass to the server when making requests. The purpose of this is to be able to track the source of requests beyond just ip/port by allowing a logical application name to be included in server-side request logging.", + "description": "An ID string to pass to the server when making requests. The purpose of this is to be able to track the source of requests beyond just ip/port by allowing a logical application name to be included in server-side request logging.", "type": "string", "examples": ["airbyte-producer"] }, @@ -146,20 +146,20 @@ "enum": ["0", "1", "all"] }, "enable_idempotence": { - "title": "Enable idempotence", + "title": "Enable Idempotence", "description": "When set to 'true', the producer will ensure that exactly one copy of each message is written in the stream. If 'false', producer retries due to broker failures, etc., may write duplicates of the retried message in the stream.", "type": "boolean", "default": false }, "compression_type": { - "title": "Compression type", + "title": "Compression Type", "description": "The compression type for all data generated by the producer.", "type": "string", "default": "none", "enum": ["none", "gzip", "snappy", "lz4", "zstd"] }, "batch_size": { - "title": "Batch size", + "title": "Batch Size", "description": "The producer will attempt to batch records together into fewer requests whenever multiple records are being sent to the same partition.", "type": "integer", "examples": [16384] @@ -171,13 +171,13 @@ "examples": [0] }, "max_in_flight_requests_per_connection": { - "title": "Max in flight requests per connection", - "description": "The maximum number of unacknowledged requests the client will send on a single connection before blocking.", + "title": "Max in Flight Requests per Connection", + "description": "The maximum number of unacknowledged requests the client will send on a single connection before blocking. Can be greater than 1, and the maximum value supported with idempotency is 5.", "type": "integer", "examples": [5] }, "client_dns_lookup": { - "title": "Client DNS lookup", + "title": "Client DNS Lookup", "description": "Controls how the client uses DNS lookups. If set to use_all_dns_ips, connect to each returned IP address in sequence until a successful connection is established. After a disconnection, the next IP is used. Once all IPs have been used once, the client resolves the IP(s) from the hostname again. If set to resolve_canonical_bootstrap_servers_only, resolve each bootstrap address into a list of canonical names. After the bootstrap phase, this behaves the same as use_all_dns_ips. If set to default (deprecated), attempt to connect to the first IP address returned by the lookup, even if the lookup returns multiple IP addresses.", "type": "string", "default": "use_all_dns_ips", @@ -189,13 +189,13 @@ ] }, "buffer_memory": { - "title": "Buffer memory", + "title": "Buffer Memory", "description": "The total bytes of memory the producer can use to buffer records waiting to be sent to the server.", "type": "string", "examples": 33554432 }, "max_request_size": { - "title": "Max request size", + "title": "Max Request Size", "description": "The maximum size of a request in bytes.", "type": "integer", "examples": [1048576] @@ -207,43 +207,43 @@ "examples": [2147483647] }, "socket_connection_setup_timeout_ms": { - "title": "Socket connection setup timeout", + "title": "Socket Connection Setup Timeout", "description": "The amount of time the client will wait for the socket connection to be established.", "type": "string", "examples": [10000] }, "socket_connection_setup_timeout_max_ms": { - "title": "Socket connection setup max timeout", + "title": "Socket Connection Setup Max Timeout", "description": "The maximum amount of time the client will wait for the socket connection to be established. The connection setup timeout will increase exponentially for each consecutive connection failure up to this maximum.", "type": "string", "examples": [30000] }, "max_block_ms": { - "title": "Max block ms", + "title": "Max Block ms", "description": "The configuration controls how long the KafkaProducer's send(), partitionsFor(), initTransactions(), sendOffsetsToTransaction(), commitTransaction() and abortTransaction() methods will block.", "type": "string", "examples": [60000] }, "request_timeout_ms": { - "title": "Request timeout", + "title": "Request Timeout", "description": "The configuration controls the maximum amount of time the client will wait for the response of a request. If the response is not received before the timeout elapses the client will resend the request if necessary or fail the request if retries are exhausted.", "type": "integer", "examples": [30000] }, "delivery_timeout_ms": { - "title": "Delivery timeout", + "title": "Delivery Timeout", "description": "An upper bound on the time to report success or failure after a call to 'send()' returns.", "type": "integer", "examples": [120000] }, "send_buffer_bytes": { - "title": "Send buffer bytes", + "title": "Send Buffer bytes", "description": "The size of the TCP send buffer (SO_SNDBUF) to use when sending data. If the value is -1, the OS default will be used.", "type": "integer", "examples": [131072] }, "receive_buffer_bytes": { - "title": "Receive buffer bytes", + "title": "Receive Buffer bytes", "description": "The size of the TCP receive buffer (SO_RCVBUF) to use when reading data. If the value is -1, the OS default will be used.", "type": "integer", "examples": [32768] diff --git a/airbyte-integrations/connectors/destination-keen/Dockerfile b/airbyte-integrations/connectors/destination-keen/Dockerfile index e5aa71b5273c7..e256d34a316a1 100644 --- a/airbyte-integrations/connectors/destination-keen/Dockerfile +++ b/airbyte-integrations/connectors/destination-keen/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-keen COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.2.0 +LABEL io.airbyte.version=0.2.1 LABEL io.airbyte.name=airbyte/destination-keen diff --git a/airbyte-integrations/connectors/destination-keen/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-keen/src/main/resources/spec.json index 44090de4bbad5..56abae7b719b6 100644 --- a/airbyte-integrations/connectors/destination-keen/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-keen/src/main/resources/spec.json @@ -12,20 +12,21 @@ "additionalProperties": false, "properties": { "project_id": { - "description": "Keen Project ID", + "description": "To get Keen Project ID, navigate to the Access tab from the left-hand, side panel and check the Project Details section.", + "title": "Project ID", "type": "string", "examples": ["58b4acc22ba938934e888322e"] }, "api_key": { "title": "API Key", - "description": "Keen Master API key", + "description": "To get Keen Master API Key, navigate to the Access tab from the left-hand, side panel and check the Project Details section.", "type": "string", "examples": ["ABCDEFGHIJKLMNOPRSTUWXYZ"], "airbyte_secret": true }, "infer_timestamp": { "title": "Infer Timestamp", - "description": "Allow connector to guess keen.timestamp value based on the streamed data", + "description": "Allow connector to guess keen.timestamp value based on the streamed data.", "type": "boolean", "default": true } diff --git a/airbyte-integrations/connectors/destination-kinesis/Dockerfile b/airbyte-integrations/connectors/destination-kinesis/Dockerfile index 40d927ee4df30..d74736a6cc6ab 100644 --- a/airbyte-integrations/connectors/destination-kinesis/Dockerfile +++ b/airbyte-integrations/connectors/destination-kinesis/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-kinesis COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/destination-kinesis diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-kinesis/src/main/resources/spec.json index aa61301bc5bc8..65aea2da4e20d 100644 --- a/airbyte-integrations/connectors/destination-kinesis/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/resources/spec.json @@ -13,39 +13,39 @@ "properties": { "endpoint": { "title": "Endpoint", - "description": "Aws Kinesis endpoint.", + "description": "AWS Kinesis endpoint.", "type": "string", "order": 0 }, "region": { "title": "Region", - "description": "Aws region.", + "description": "AWS region. Your account determines the Regions that are available to you.", "type": "string", "order": 1 }, "shardCount": { - "title": "shardCount", + "title": "Shard Count", "description": "Number of shards to which the data should be streamed.", "type": "integer", "default": 5, "order": 2 }, "accessKey": { - "title": "accessKey", - "description": "Aws access key.", + "title": "Access Key", + "description": "Generate the AWS Access Key for current user.", "airbyte_secret": true, "type": "string", "order": 3 }, "privateKey": { - "title": "privateKey", - "description": "Aws private key.", + "title": "Private Key", + "description": "The AWS Private Key - a string of numbers and letters that are unique for each account, also known as a \"recovery phrase\".", "airbyte_secret": true, "type": "string", "order": 4 }, "bufferSize": { - "title": "bufferSize", + "title": "Buffer Size", "description": "Buffer size for storing kinesis records before being batch streamed.", "type": "integer", "minimum": 1, diff --git a/airbyte-integrations/connectors/destination-kvdb/destination_kvdb/spec.json b/airbyte-integrations/connectors/destination-kvdb/destination_kvdb/spec.json index 45db708aa0c3c..e394c1e53ebe6 100644 --- a/airbyte-integrations/connectors/destination-kvdb/destination_kvdb/spec.json +++ b/airbyte-integrations/connectors/destination-kvdb/destination_kvdb/spec.json @@ -6,7 +6,7 @@ "supportsNormalization": false, "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Destination Kvdb", + "title": "Destination KVdb", "type": "object", "required": ["bucket_id", "secret_key"], "additionalProperties": false, @@ -14,13 +14,13 @@ "bucket_id": { "title": "Bucket ID", "type": "string", - "description": "The ID of your KVDB bucket", + "description": "The ID of your KVdb bucket.", "order": 1 }, "secret_key": { "title": "Secret Key", "type": "string", - "description": "Your bucket's secret key", + "description": "Your bucket Secret Key.", "order": 2 } } diff --git a/airbyte-integrations/connectors/destination-local-json/Dockerfile b/airbyte-integrations/connectors/destination-local-json/Dockerfile index 2e046b93988ed..0eaba73891301 100644 --- a/airbyte-integrations/connectors/destination-local-json/Dockerfile +++ b/airbyte-integrations/connectors/destination-local-json/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-local-json COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.2.8 +LABEL io.airbyte.version=0.2.9 LABEL io.airbyte.name=airbyte/destination-local-json diff --git a/airbyte-integrations/connectors/destination-local-json/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-local-json/src/main/resources/spec.json index 3acd992f190a5..d6fc2f30f6b33 100644 --- a/airbyte-integrations/connectors/destination-local-json/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-local-json/src/main/resources/spec.json @@ -13,6 +13,7 @@ "properties": { "destination_path": { "description": "Path to the directory where json files will be written. The files will be placed inside that local mount. For more information check out our docs", + "title": "Destination Path", "type": "string", "examples": ["/json_data"] } diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/Dockerfile b/airbyte-integrations/connectors/destination-mariadb-columnstore/Dockerfile index 10345807e4933..7e5385da9af40 100644 --- a/airbyte-integrations/connectors/destination-mariadb-columnstore/Dockerfile +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-mariadb-columnstore COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/destination-mariadb-columnstore diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/resources/spec.json index 43b5311e97d3a..0c847796389f2 100644 --- a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/resources/spec.json @@ -13,13 +13,13 @@ "properties": { "host": { "title": "Host", - "description": "Hostname of the database.", + "description": "The Hostname of the database.", "type": "string", "order": 0 }, "port": { "title": "Port", - "description": "Port of the database.", + "description": "The Port of the database.", "type": "integer", "minimum": 0, "maximum": 65536, @@ -35,13 +35,13 @@ }, "username": { "title": "Username", - "description": "Username to use to access the database.", + "description": "The Username which is used to access the database.", "type": "string", "order": 3 }, "password": { "title": "Password", - "description": "Password associated with the username.", + "description": "The Password associated with the username.", "type": "string", "airbyte_secret": true, "order": 4 diff --git a/airbyte-integrations/connectors/destination-mongodb/Dockerfile b/airbyte-integrations/connectors/destination-mongodb/Dockerfile index e081daff9a934..6d5e67ab1b15d 100644 --- a/airbyte-integrations/connectors/destination-mongodb/Dockerfile +++ b/airbyte-integrations/connectors/destination-mongodb/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-mongodb COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/destination-mongodb diff --git a/airbyte-integrations/connectors/destination-mongodb/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-mongodb/src/main/resources/spec.json index eb5e6e33e5102..f5d5f41feaecb 100644 --- a/airbyte-integrations/connectors/destination-mongodb/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-mongodb/src/main/resources/spec.json @@ -13,7 +13,7 @@ "properties": { "instance_type": { "description": "MongoDb instance to connect to. For MongoDB Atlas and Replica Set TLS connection is used by default.", - "title": "MongoDb instance type", + "title": "MongoDb Instance Type", "type": "object", "order": 0, "oneOf": [ @@ -29,13 +29,13 @@ "host": { "title": "Host", "type": "string", - "description": "Host of a Mongo database to be replicated.", + "description": "The Host of a Mongo database to be replicated.", "order": 0 }, "port": { "title": "Port", "type": "integer", - "description": "Port of a Mongo database to be replicated.", + "description": "The Port of a Mongo database to be replicated.", "minimum": 0, "maximum": 65536, "default": 27017, @@ -43,7 +43,7 @@ "order": 1 }, "tls": { - "title": "TLS connection", + "title": "TLS Connection", "type": "boolean", "description": "Indicates whether TLS encryption protocol will be used to connect to MongoDB. It is recommended to use TLS connection if possible. For more information see documentation.", "default": false, diff --git a/docs/integrations/destinations/gcs.md b/docs/integrations/destinations/gcs.md index 1157c2aed658f..b030fafe64a5e 100644 --- a/docs/integrations/destinations/gcs.md +++ b/docs/integrations/destinations/gcs.md @@ -222,6 +222,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.18 | 2021-12-30 | [\#8809](https://github.com/airbytehq/airbyte/pull/8809) | Update connector fields title/description | | 0.1.17 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | | 0.1.16 | 2021-12-20 | [\#8974](https://github.com/airbytehq/airbyte/pull/8974) | Release a new version to ensure there is no excessive logging. | | 0.1.15 | 2021-12-03 | [\#8386](https://github.com/airbytehq/airbyte/pull/8386) | Add new GCP regions | diff --git a/docs/integrations/destinations/kafka.md b/docs/integrations/destinations/kafka.md index d7f3fa98ae16f..98539e5a908cc 100644 --- a/docs/integrations/destinations/kafka.md +++ b/docs/integrations/destinations/kafka.md @@ -98,6 +98,7 @@ _NOTE_: Some configurations for SSL are not available yet. | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.3 | 2021-12-30 | [\#8809](https://github.com/airbytehq/airbyte/pull/8809) | Update connector fields title/description | | 0.1.2 | 2021-09-14 | [\#6040](https://github.com/airbytehq/airbyte/pull/6040) | Change spec.json and config parser | | 0.1.1 | 2021-07-30 | [\#5125](https://github.com/airbytehq/airbyte/pull/5125) | Enable `additionalPropertities` in spec.json | | 0.1.0 | 2021-07-21 | [\#3746](https://github.com/airbytehq/airbyte/pull/3746) | Initial Release | diff --git a/docs/integrations/destinations/keen.md b/docs/integrations/destinations/keen.md index b8c4f84588a9f..598a0eff51688 100644 --- a/docs/integrations/destinations/keen.md +++ b/docs/integrations/destinations/keen.md @@ -78,6 +78,7 @@ If you have any questions, please reach out to us at team@keen.io and we’ll be | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.1 | 2021-12-30 | [\#8809](https://github.com/airbytehq/airbyte/pull/8809) | Update connector fields title/description | | 0.2.0 | 2021-09-10 | [\#5973](https://github.com/airbytehq/airbyte/pull/5973) | Fix timestamp inference for complex schemas | | 0.1.0 | 2021-08-18 | [\#5339](https://github.com/airbytehq/airbyte/pull/5339) | Keen Destination Release! | diff --git a/docs/integrations/destinations/mariadb-columnstore.md b/docs/integrations/destinations/mariadb-columnstore.md index 5bb75254e8595..e05ed4e4ce95e 100644 --- a/docs/integrations/destinations/mariadb-columnstore.md +++ b/docs/integrations/destinations/mariadb-columnstore.md @@ -76,6 +76,7 @@ Using this feature requires additional configuration, when creating the destinat | Version | Date | Pull Request | Subject | |:--------| :--- |:---------------------------------------------------------|:------------------------------------------| +| 0.1.2 | 2021-12-30 | [\#8809](https://github.com/airbytehq/airbyte/pull/8809) | Update connector fields title/description | | 0.1.1 | 2021-12-01 | [\#8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key. | | 0.1.0 | 2021-11-15 | [\#7961](https://github.com/airbytehq/airbyte/pull/7961) | Added MariaDB ColumnStore destination. | diff --git a/docs/integrations/destinations/mongodb.md b/docs/integrations/destinations/mongodb.md index c7b2905920499..6dcb2222551df 100644 --- a/docs/integrations/destinations/mongodb.md +++ b/docs/integrations/destinations/mongodb.md @@ -93,6 +93,7 @@ Collection names should begin with an underscore or a letter character, and cann | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.3 | 2021-12-30 | [8809](https://github.com/airbytehq/airbyte/pull/8809) | Update connector fields title/description | | 0.1.2 | 2021-10-18 | [6945](https://github.com/airbytehq/airbyte/pull/6945) | Create a secure-only MongoDb destination | | 0.1.1 | 2021-09-29 | [6536](https://github.com/airbytehq/airbyte/pull/6536) | Destination MongoDb: added support via TLS/SSL | From 7d6a5ef30ba1c4db84e26550d30da4cca1d9e100 Mon Sep 17 00:00:00 2001 From: Baz Date: Thu, 30 Dec 2021 17:13:56 +0200 Subject: [PATCH 002/215] edited manage.sh (#9205) --- tools/integrations/manage.sh | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/integrations/manage.sh b/tools/integrations/manage.sh index 62776fbb3e40c..38395b060e068 100755 --- a/tools/integrations/manage.sh +++ b/tools/integrations/manage.sh @@ -80,21 +80,23 @@ cmd_publish() { publish_spec_to_cache=false fi - # before we start working sanity check that this version has not been published yet, so that we do not spend a lot of - # time building, running tests to realize this version is a duplicate. - _error_if_tag_exists "$versioned_image" - - cmd_build "$path" "$run_tests" - + # setting local variables for docker image versioning local image_name; image_name=$(_get_docker_image_name "$path"/Dockerfile) local image_version; image_version=$(_get_docker_image_version "$path"/Dockerfile) local versioned_image=$image_name:$image_version local latest_image=$image_name:latest echo "image_name $image_name" - echo "$versioned_image $versioned_image" + echo "versioned_image $versioned_image" echo "latest_image $latest_image" + # before we start working sanity check that this version has not been published yet, so that we do not spend a lot of + # time building, running tests to realize this version is a duplicate. + _error_if_tag_exists "$versioned_image" + + # building the connector + cmd_build "$path" "$run_tests" + # in case curing the build / tests someone this version has been published. _error_if_tag_exists "$versioned_image" From b8c33bdfed54c63406f86614fe9c8f0492760d97 Mon Sep 17 00:00:00 2001 From: Iryna Grankova <87977540+igrankova@users.noreply.github.com> Date: Thu, 30 Dec 2021 17:30:26 +0200 Subject: [PATCH 003/215] Update fields in source-connectors specifications: mongodb-strict-encrypt, mongodb-v2 (#9202) --- .../b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 35 ++++++++++--------- .../source-mongodb-strict-encrypt/Dockerfile | 2 +- .../resources/expected_spec.json | 28 +++++++-------- .../connectors/source-mongodb-v2/Dockerfile | 2 +- .../src/main/resources/spec.json | 30 ++++++++-------- docs/integrations/sources/mongodb-v2.md | 19 +++++----- 8 files changed, 61 insertions(+), 59 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json index 9ea997bd86f20..ca312b3e32319 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e", "name": "MongoDb", "dockerRepository": "airbyte/source-mongodb-v2", - "dockerImageTag": "0.1.9", + "dockerImageTag": "0.1.10", "documentationUrl": "https://docs.airbyte.io/integrations/sources/mongodb-v2", "icon": "mongodb.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 0fc7630fc1c8f..6bad35657522a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -426,7 +426,7 @@ - name: MongoDb sourceDefinitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e dockerRepository: airbyte/source-mongodb-v2 - dockerImageTag: 0.1.9 + dockerImageTag: 0.1.10 documentationUrl: https://docs.airbyte.io/integrations/sources/mongodb-v2 icon: mongodb.svg sourceType: database diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index c125f9d0f5d13..138431f88b884 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -4369,7 +4369,7 @@ path_in_connector_config: - "credentials" - "client_secret" -- dockerImage: "airbyte/source-mongodb-v2:0.1.9" +- dockerImage: "airbyte/source-mongodb-v2:0.1.10" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" changelogUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" @@ -4383,9 +4383,9 @@ properties: instance_type: type: "object" - title: "MongoDb instance type" - description: "MongoDb instance to connect to. For MongoDB Atlas and Replica\ - \ Set TLS connection is used by default." + title: "MongoDb Instance Type" + description: "The MongoDb instance to connect to. For MongoDB Atlas and\ + \ Replica Set TLS connection is used by default." order: 0 oneOf: - title: "Standalone MongoDb Instance" @@ -4402,12 +4402,12 @@ host: title: "Host" type: "string" - description: "Host of a Mongo database to be replicated." + description: "The host name of the Mongo database." order: 0 port: title: "Port" type: "integer" - description: "Port of a Mongo database to be replicated." + description: "The port of the Mongo database." minimum: 0 maximum: 65536 default: 27017 @@ -4415,7 +4415,7 @@ - "27017" order: 1 tls: - title: "TLS connection" + title: "TLS Connection" type: "boolean" description: "Indicates whether TLS encryption protocol will be used\ \ to connect to MongoDB. It is recommended to use TLS connection\ @@ -4434,17 +4434,18 @@ - "replica" default: "replica" server_addresses: - title: "Server addresses" + title: "Server Addresses" type: "string" description: "The members of a replica set. Please specify `host`:`port`\ - \ of each member seperated by comma." + \ of each member separated by comma." examples: - "host1:27017,host2:27017,host3:27017" order: 0 replica_set: title: "Replica Set" type: "string" - description: "A replica set name." + description: "A replica set in MongoDB is a group of mongod processes\ + \ that maintain the same data set." order: 1 - title: "MongoDB Atlas" additionalProperties: false @@ -4460,28 +4461,28 @@ cluster_url: title: "Cluster URL" type: "string" - description: "URL of a cluster to connect to." + description: "The URL of a cluster to connect to." order: 0 database: - title: "Database name" + title: "Database Name" type: "string" - description: "Database to be replicated." + description: "The database you want to replicate." order: 1 user: title: "User" type: "string" - description: "User" + description: "The username which is used to access the database." order: 2 password: title: "Password" type: "string" - description: "Password" + description: "The password associated with this username." airbyte_secret: true order: 3 auth_source: - title: "Authentication source" + title: "Authentication Source" type: "string" - description: "Authentication source where user information is stored" + description: "The authentication source where the user information is stored." default: "admin" examples: - "admin" diff --git a/airbyte-integrations/connectors/source-mongodb-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-mongodb-strict-encrypt/Dockerfile index c93f9cc60b37a..cf8f82ac06b52 100644 --- a/airbyte-integrations/connectors/source-mongodb-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-mongodb-strict-encrypt/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-mongodb-strict-encrypt COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.6 +LABEL io.airbyte.version=0.1.7 LABEL io.airbyte.name=airbyte/source-mongodb-strict-encrypt diff --git a/airbyte-integrations/connectors/source-mongodb-strict-encrypt/src/test-integration/resources/expected_spec.json b/airbyte-integrations/connectors/source-mongodb-strict-encrypt/src/test-integration/resources/expected_spec.json index 51c455c75c6eb..26dadbda7bb67 100644 --- a/airbyte-integrations/connectors/source-mongodb-strict-encrypt/src/test-integration/resources/expected_spec.json +++ b/airbyte-integrations/connectors/source-mongodb-strict-encrypt/src/test-integration/resources/expected_spec.json @@ -10,8 +10,8 @@ "properties": { "instance_type": { "type": "object", - "title": "MongoDb instance type", - "description": "MongoDb instance to connect to. For MongoDB Atlas and Replica Set TLS connection is used by default.", + "title": "MongoDb Instance Type", + "description": "The MongoDb instance to connect to. For MongoDB Atlas and Replica Set TLS connection is used by default.", "order": 0, "oneOf": [ { @@ -26,13 +26,13 @@ "host": { "title": "Host", "type": "string", - "description": "Host of a Mongo database to be replicated.", + "description": "The host name of the Mongo database.", "order": 0 }, "port": { "title": "Port", "type": "integer", - "description": "Port of a Mongo database to be replicated.", + "description": "The port of the Mongo database.", "minimum": 0, "maximum": 65536, "default": 27017, @@ -51,16 +51,16 @@ "default": "replica" }, "server_addresses": { - "title": "Server addresses", + "title": "Server Addresses", "type": "string", - "description": "The members of a replica set. Please specify `host`:`port` of each member seperated by comma.", + "description": "The members of a replica set. Please specify `host`:`port` of each member separated by comma.", "examples": ["host1:27017,host2:27017,host3:27017"], "order": 0 }, "replica_set": { "title": "Replica Set", "type": "string", - "description": "A replica set name.", + "description": "A replica set in MongoDB is a group of mongod processes that maintain the same data set.", "order": 1 } } @@ -78,7 +78,7 @@ "cluster_url": { "title": "Cluster URL", "type": "string", - "description": "URL of a cluster to connect to.", + "description": "The URL of a cluster to connect to.", "order": 0 } } @@ -86,28 +86,28 @@ ] }, "database": { - "title": "Database name", + "title": "Database Name", "type": "string", - "description": "Database to be replicated.", + "description": "The database you want to replicate.", "order": 1 }, "user": { "title": "User", "type": "string", - "description": "User", + "description": "The username which is used to access the database.", "order": 2 }, "password": { "title": "Password", "type": "string", - "description": "Password", + "description": "The password associated with this username.", "airbyte_secret": true, "order": 3 }, "auth_source": { - "title": "Authentication source", + "title": "Authentication Source", "type": "string", - "description": "Authentication source where user information is stored", + "description": "The authentication source where the user information is stored.", "default": "admin", "examples": ["admin"], "order": 4 diff --git a/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile b/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile index 5330652b6bf29..de0380fd37d97 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile +++ b/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-mongodb-v2 COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.9 +LABEL io.airbyte.version=0.1.10 LABEL io.airbyte.name=airbyte/source-mongodb-v2 diff --git a/airbyte-integrations/connectors/source-mongodb-v2/src/main/resources/spec.json b/airbyte-integrations/connectors/source-mongodb-v2/src/main/resources/spec.json index d90d4846548e3..e78ca96d82d89 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-mongodb-v2/src/main/resources/spec.json @@ -10,8 +10,8 @@ "properties": { "instance_type": { "type": "object", - "title": "MongoDb instance type", - "description": "MongoDb instance to connect to. For MongoDB Atlas and Replica Set TLS connection is used by default.", + "title": "MongoDb Instance Type", + "description": "The MongoDb instance to connect to. For MongoDB Atlas and Replica Set TLS connection is used by default.", "order": 0, "oneOf": [ { @@ -26,13 +26,13 @@ "host": { "title": "Host", "type": "string", - "description": "Host of a Mongo database to be replicated.", + "description": "The host name of the Mongo database.", "order": 0 }, "port": { "title": "Port", "type": "integer", - "description": "Port of a Mongo database to be replicated.", + "description": "The port of the Mongo database.", "minimum": 0, "maximum": 65536, "default": 27017, @@ -40,7 +40,7 @@ "order": 1 }, "tls": { - "title": "TLS connection", + "title": "TLS Connection", "type": "boolean", "description": "Indicates whether TLS encryption protocol will be used to connect to MongoDB. It is recommended to use TLS connection if possible. For more information see documentation.", "default": false, @@ -58,16 +58,16 @@ "default": "replica" }, "server_addresses": { - "title": "Server addresses", + "title": "Server Addresses", "type": "string", - "description": "The members of a replica set. Please specify `host`:`port` of each member seperated by comma.", + "description": "The members of a replica set. Please specify `host`:`port` of each member separated by comma.", "examples": ["host1:27017,host2:27017,host3:27017"], "order": 0 }, "replica_set": { "title": "Replica Set", "type": "string", - "description": "A replica set name.", + "description": "A replica set in MongoDB is a group of mongod processes that maintain the same data set.", "order": 1 } } @@ -85,7 +85,7 @@ "cluster_url": { "title": "Cluster URL", "type": "string", - "description": "URL of a cluster to connect to.", + "description": "The URL of a cluster to connect to.", "order": 0 } } @@ -93,28 +93,28 @@ ] }, "database": { - "title": "Database name", + "title": "Database Name", "type": "string", - "description": "Database to be replicated.", + "description": "The database you want to replicate.", "order": 1 }, "user": { "title": "User", "type": "string", - "description": "User", + "description": "The username which is used to access the database.", "order": 2 }, "password": { "title": "Password", "type": "string", - "description": "Password", + "description": "The password associated with this username.", "airbyte_secret": true, "order": 3 }, "auth_source": { - "title": "Authentication source", + "title": "Authentication Source", "type": "string", - "description": "Authentication source where user information is stored", + "description": "The authentication source where the user information is stored.", "default": "admin", "examples": ["admin"], "order": 4 diff --git a/docs/integrations/sources/mongodb-v2.md b/docs/integrations/sources/mongodb-v2.md index 9345e4475256b..0db57aa9c996c 100644 --- a/docs/integrations/sources/mongodb-v2.md +++ b/docs/integrations/sources/mongodb-v2.md @@ -102,12 +102,13 @@ For more information regarding configuration parameters, please see [MongoDb Doc | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| 0.1.9 | 2021-12-07 | [8491](https://github.com/airbytehq/airbyte/pull/8491) | Configure 10000 limit doc reading during Discovery step | -| 0.1.8 | 2021-11-29 | [8306](https://github.com/airbytehq/airbyte/pull/8306) | Added milliseconds for date format for cursor | -| 0.1.7 | 2021-11-22 | [8161](https://github.com/airbytehq/airbyte/pull/8161) | Updated Performance and updated cursor for timestamp type | -| 0.1.5 | 2021-11-17 | [8046](https://github.com/airbytehq/airbyte/pull/8046) | Added milliseconds to convert timestamp to datetime format | -| 0.1.4 | 2021-11-15 | [7982](https://github.com/airbytehq/airbyte/pull/7982) | Updated Performance | -| 0.1.3 | 2021-10-19 | [7160](https://github.com/airbytehq/airbyte/pull/7160) | Fixed nested document parsing | -| 0.1.2 | 2021-10-07 | [6860](https://github.com/airbytehq/airbyte/pull/6860) | Added filter to avoid MongoDb system collections | -| 0.1.1 | 2021-09-21 | [6364](https://github.com/airbytehq/airbyte/pull/6364) | Source MongoDb: added support via TLS/SSL | -| 0.1.0 | 2021-08-30 | [5530](https://github.com/airbytehq/airbyte/pull/5530) | New source: MongoDb ported to java | +| 0.1.10 | 2021-12-30 | [9202](https://github.com/airbytehq/airbyte/pull/9202) | Update connector fields title/description | +| 0.1.9 | 2021-12-07 | [8491](https://github.com/airbytehq/airbyte/pull/8491) | Configure 10000 limit doc reading during Discovery step | +| 0.1.8 | 2021-11-29 | [8306](https://github.com/airbytehq/airbyte/pull/8306) | Added milliseconds for date format for cursor | +| 0.1.7 | 2021-11-22 | [8161](https://github.com/airbytehq/airbyte/pull/8161) | Updated Performance and updated cursor for timestamp type | +| 0.1.5 | 2021-11-17 | [8046](https://github.com/airbytehq/airbyte/pull/8046) | Added milliseconds to convert timestamp to datetime format | +| 0.1.4 | 2021-11-15 | [7982](https://github.com/airbytehq/airbyte/pull/7982) | Updated Performance | +| 0.1.3 | 2021-10-19 | [7160](https://github.com/airbytehq/airbyte/pull/7160) | Fixed nested document parsing | +| 0.1.2 | 2021-10-07 | [6860](https://github.com/airbytehq/airbyte/pull/6860) | Added filter to avoid MongoDb system collections | +| 0.1.1 | 2021-09-21 | [6364](https://github.com/airbytehq/airbyte/pull/6364) | Source MongoDb: added support via TLS/SSL | +| 0.1.0 | 2021-08-30 | [5530](https://github.com/airbytehq/airbyte/pull/5530) | New source: MongoDb ported to java | From dd120a2349039fb85915937ac3a5785eee379fce Mon Sep 17 00:00:00 2001 From: Karina Kuznietsova Date: Thu, 30 Dec 2021 18:15:39 +0200 Subject: [PATCH 004/215] Update fields in source-connectors specifications: python-http-tutorial (#9152) * Changed Titles and Descriptions * fix spec.json Co-authored-by: Karina Co-authored-by: ykurochkin --- .../connectors/source-python-http-tutorial/Dockerfile | 2 +- .../source_python_http_tutorial/spec.json | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/Dockerfile b/airbyte-integrations/connectors/source-python-http-tutorial/Dockerfile index d0337e7511ee6..d27f2e3884faa 100644 --- a/airbyte-integrations/connectors/source-python-http-tutorial/Dockerfile +++ b/airbyte-integrations/connectors/source-python-http-tutorial/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/source-python-http-tutorial diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/spec.json b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/spec.json index aa0e2c9e09806..c62b4b93f0142 100644 --- a/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/spec.json +++ b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/spec.json @@ -8,12 +8,14 @@ "additionalProperties": false, "properties": { "start_date": { + "title": "Start Date", "type": "string", - "description": "Start getting data from that date.", + "description": "UTC date and time in the format 2017-01-25. Any data before this date will not be replicated.", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", "examples": ["YYYY-MM-DD"] }, "base": { + "title": "Currency", "type": "string", "description": "ISO reference currency. See here." } From ff45a31eacf16aabac1e8600fbb614aca5ec8481 Mon Sep 17 00:00:00 2001 From: Vadym Date: Thu, 30 Dec 2021 19:37:56 +0200 Subject: [PATCH 005/215] SonarQube QA GitHub jobs (#8668) * Add detect changed modules and build reports workflow. Add python scripts for given workflow. Add unit tests for detect_changed_modules.py. * Update build_static_checkers_reports.py. Update task.py to work within any python module. Fixed report generation. * Update Detect Changed Modules action. * Fix action. Update unit tests. Code cleanup and fixes. * Wrap ci_static_check_reports into bash command. * Detect Changed Modules add install requirements command. * Update ci_detect_changed_modules package. * Rename ci_build_checkers_reports -> ci_build_python_checkers_reports. * Return json as ci_detect_changed_modules output. * Update Detect Changed Modules set output. * Update unit tests. Add changed module filter. --- ...tect-changed-modules-and-build-reports.yml | 42 ++++++++ .gitignore | 1 + airbyte-integrations/connectors/tasks.py | 28 ++++- tools/ci_static_check_reports/__init__.py | 3 + .../__init__.py | 3 + .../main.py | 100 ++++++++++++++++++ .../ci_detect_changed_modules/__init__.py | 0 .../ci_detect_changed_modules/main.py | 52 +++++++++ tools/ci_static_check_reports/setup.py | 29 +++++ .../unit_tests/__init__.py | 3 + .../test_build_static_checkers_reports.py | 42 ++++++++ .../unit_tests/test_detect_changed_modules.py | 58 ++++++++++ 12 files changed, 356 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/detect-changed-modules-and-build-reports.yml create mode 100644 tools/ci_static_check_reports/__init__.py create mode 100644 tools/ci_static_check_reports/ci_build_python_static_checkers_reports/__init__.py create mode 100644 tools/ci_static_check_reports/ci_build_python_static_checkers_reports/main.py create mode 100644 tools/ci_static_check_reports/ci_detect_changed_modules/__init__.py create mode 100644 tools/ci_static_check_reports/ci_detect_changed_modules/main.py create mode 100644 tools/ci_static_check_reports/setup.py create mode 100644 tools/ci_static_check_reports/unit_tests/__init__.py create mode 100644 tools/ci_static_check_reports/unit_tests/test_build_static_checkers_reports.py create mode 100644 tools/ci_static_check_reports/unit_tests/test_detect_changed_modules.py diff --git a/.github/workflows/detect-changed-modules-and-build-reports.yml b/.github/workflows/detect-changed-modules-and-build-reports.yml new file mode 100644 index 0000000000000..ca5d9f9e730d2 --- /dev/null +++ b/.github/workflows/detect-changed-modules-and-build-reports.yml @@ -0,0 +1,42 @@ +name: Detect Changed Modules and Build Reports +on: + push: +jobs: + detect-changed-modules: + name: Detect Changed Modules + timeout-minutes: 5 + runs-on: ubuntu-latest + outputs: + changed-modules: ${{ steps.detect-changed-modules.outputs.changed-modules }} + steps: + - name: Checkout Airbyte + uses: actions/checkout@v2 + with: + fetch-depth: 1000 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Intall Requirements + run: pip install ./tools/ci_static_check_reports/. + - name: Detect Changed Modules + id: detect-changed-modules + run: | + git fetch + echo "::set-output name=changed-modules::'$(ci_detect_changed_modules $(git diff --name-only $(git merge-base HEAD origin/master)))'" + build-reports: + name: Build Python Static Checkers Reports + needs: + - detect-changed-modules + runs-on: ubuntu-latest + steps: + - name: Checkout Airbyte + uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Intall Requirements + run: pip install ./tools/ci_static_check_reports/. + - name: Build Reports + run: ci_build_python_checkers_reports ${{needs.detect-changed-modules.outputs.changed-modules}} diff --git a/.gitignore b/.gitignore index a226c06883014..17b544c1d7e26 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ data .project .settings **/gmon.out +static_checker_reports/ # Logs acceptance_tests_logs/ diff --git a/airbyte-integrations/connectors/tasks.py b/airbyte-integrations/connectors/tasks.py index 2082e9900e917..a210d283a0416 100644 --- a/airbyte-integrations/connectors/tasks.py +++ b/airbyte-integrations/connectors/tasks.py @@ -6,7 +6,7 @@ import tempfile from glob import glob from multiprocessing import Pool -from typing import Any, Dict, Iterable, List, Set +from typing import Any, Dict, Iterable, List, Optional, Set import virtualenv from invoke import Context, Exit, task @@ -22,9 +22,11 @@ "colorama": "0.4.4", "coverage": "6.2", "flake": "0.0.1a2", + "flake_junit": "2.1", "isort": "5.10.1", "mccabe": "0.6.1", "mypy": "0.910", + "lxml": "4.7", } @@ -94,13 +96,26 @@ def _run_single_connector_task(args: Iterable) -> int: return _run_task(*args) -def _run_task(ctx: Context, connector_string: str, task_name: str, multi_envs: bool = True, **kwargs: Any) -> int: +def _run_task( + ctx: Context, + connector_string: str, + task_name: str, + multi_envs: bool = True, + module_path: Optional[str] = None, + task_commands: Dict = TASK_COMMANDS, + **kwargs: Any, +) -> int: """ Run task in its own environment. """ + cur_dir = os.getcwd() if multi_envs: - source_path = f"source_{connector_string.replace('-', '_')}" - os.chdir(os.path.join(CONNECTORS_DIR, f"source-{connector_string}")) + if module_path: + os.chdir(module_path) + source_path = connector_string + else: + os.chdir(os.path.join(CONNECTORS_DIR, f"source-{connector_string}")) + source_path = f"source_{connector_string.replace('-', '_')}" else: source_path = connector_string @@ -111,7 +126,7 @@ def _run_task(ctx: Context, connector_string: str, task_name: str, multi_envs: b commands = [] - commands.extend([cmd.format(source_path=source_path, venv=venv_name, **kwargs) for cmd in TASK_COMMANDS[task_name]]) + commands.extend([cmd.format(source_path=source_path, venv=venv_name, **kwargs) for cmd in task_commands[task_name]]) exit_code: int = 0 @@ -125,6 +140,9 @@ def _run_task(ctx: Context, connector_string: str, task_name: str, multi_envs: b finally: shutil.rmtree(venv_name, ignore_errors=True) + if module_path: + os.chdir(cur_dir) + return exit_code diff --git a/tools/ci_static_check_reports/__init__.py b/tools/ci_static_check_reports/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/tools/ci_static_check_reports/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/__init__.py b/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/main.py b/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/main.py new file mode 100644 index 0000000000000..6e779b3e4e1f2 --- /dev/null +++ b/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/main.py @@ -0,0 +1,100 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# +import argparse +import json +import os +import sys +from typing import Dict, List + +from invoke import Context + +sys.path.insert(0, "airbyte-integrations/connectors") +from tasks import CONFIG_FILE, TOOLS_VERSIONS, _run_task # noqa + +TASK_COMMANDS: Dict[str, List[str]] = { + "black": [ + f"pip install black~={TOOLS_VERSIONS['black']}", + f"XDG_CACHE_HOME={os.devnull} black -v {{check_option}} --diff {{source_path}}/. > {{reports_path}}/black.txt", + ], + "coverage": [ + "pip install .", + f"pip install coverage[toml]~={TOOLS_VERSIONS['coverage']}", + "coverage xml --rcfile={toml_config_file} -o {reports_path}/coverage.xml", + ], + "flake": [ + f"pip install mccabe~={TOOLS_VERSIONS['mccabe']}", + f"pip install pyproject-flake8~={TOOLS_VERSIONS['flake']}", + f"pip install flake8-junit-report~={TOOLS_VERSIONS['flake_junit']}", + "pflake8 -v {source_path} --output-file={reports_path}/flake.txt --bug-report", + "flake8_junit {reports_path}/flake.txt {reports_path}/flake.xml", + "rm -f {reports_path}/flake.txt", + ], + "isort": [ + f"pip install colorama~={TOOLS_VERSIONS['colorama']}", + f"pip install isort~={TOOLS_VERSIONS['isort']}", + "isort -v {check_option} {source_path}/. > {reports_path}/isort.txt", + ], + "mypy": [ + "pip install .", + f"pip install lxml~={TOOLS_VERSIONS['lxml']}", + f"pip install mypy~={TOOLS_VERSIONS['mypy']}", + "mypy {source_path} --config-file={toml_config_file} --cobertura-xml-report={reports_path}", + ], + "test": [ + "mkdir {venv}/source-acceptance-test", + "cp -f $(git ls-tree -r HEAD --name-only {source_acceptance_test_path} | tr '\n' ' ') {venv}/source-acceptance-test", + "pip install build", + f"python -m build {os.path.join('{venv}', 'source-acceptance-test')}", + f"pip install {os.path.join('{venv}', 'source-acceptance-test', 'dist', 'source_acceptance_test-*.whl')}", + "[ -f requirements.txt ] && pip install -r requirements.txt 2> /dev/null", + "pip install .", + "pip install .[tests]", + "pip install pytest-cov", + "pytest -v --cov={source_path} --cov-report xml:{reports_path}/pytest.xml {source_path}/unit_tests", + ], +} + + +def build_static_checkers_reports(modules: list, static_checker_reports_path: str) -> int: + ctx = Context() + toml_config_file = os.path.join(os.getcwd(), "pyproject.toml") + + for module_path in modules: + reports_path = f"{os.getcwd()}/{static_checker_reports_path}/{module_path}" + if not os.path.exists(reports_path): + os.makedirs(reports_path) + + for checker in TASK_COMMANDS: + _run_task( + ctx, + f"{os.getcwd()}/{module_path}", + checker, + module_path=module_path, + multi_envs=True, + check_option="", + task_commands=TASK_COMMANDS, + toml_config_file=toml_config_file, + reports_path=reports_path, + source_acceptance_test_path=os.path.join(os.getcwd(), "airbyte-integrations/bases/source-acceptance-test"), + ) + return 0 + + +def main() -> int: + parser = argparse.ArgumentParser(description="Working with Python Static Report Builder.") + parser.add_argument("changed_modules", nargs="*") + parser.add_argument("--static-checker-reports-path", help="SonarQube host", required=False, type=str, default="static_checker_reports") + + args = parser.parse_args() + changed_python_module_paths = [ + module["dir"] + for module in json.loads(args.changed_modules[0]) + if module["lang"] == "py" and os.path.exists(module["dir"]) and "setup.py" in os.listdir(module["dir"]) + ] + print("Changed python modules: ", changed_python_module_paths) + return build_static_checkers_reports(changed_python_module_paths, static_checker_reports_path=args.static_checker_reports_path) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/ci_static_check_reports/ci_detect_changed_modules/__init__.py b/tools/ci_static_check_reports/ci_detect_changed_modules/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tools/ci_static_check_reports/ci_detect_changed_modules/main.py b/tools/ci_static_check_reports/ci_detect_changed_modules/main.py new file mode 100644 index 0000000000000..a2a68c3be06f1 --- /dev/null +++ b/tools/ci_static_check_reports/ci_detect_changed_modules/main.py @@ -0,0 +1,52 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# +import json +import os +import sys +from typing import Dict, List, Set + +# Filenames used to detect whether the dir is a module +LANGUAGE_MODULE_ID_FILE = { + ".py": "setup.py", + # TODO: Add ID files for other languages +} + + +def find_base_path(path: str, modules: List[Dict[str, str]], unique_modules: Set[str], file_ext: str = "", lookup_file: str = None) -> None: + filename, file_extension = os.path.splitext(path) + lookup_file = lookup_file or LANGUAGE_MODULE_ID_FILE.get(file_extension) + + dir_path = os.path.dirname(filename) + if dir_path and os.path.exists(dir_path): + is_module_root = lookup_file in os.listdir(dir_path) + if is_module_root: + if dir_path not in unique_modules: + modules.append({"dir": dir_path, "lang": file_ext[1:]}) + unique_modules.add(dir_path) + else: + find_base_path(dir_path, modules, unique_modules, file_ext=file_extension, lookup_file=lookup_file) + + +def list_changed_modules(changed_files: List[str]) -> List[Dict[str, str]]: + """ + changed_filed are the list of files which were modified in current branch. + E.g. changed_files = ["tools/ci_static_check_reports/__init__.py", "tools/ci_static_check_reports/setup.py", ...] + """ + + modules: List[Dict[str, str]] = [] + unique_modules: set = set() + for file_path in changed_files: + _, file_extension = os.path.splitext(file_path) + find_base_path(file_path, modules, file_ext=file_extension, unique_modules=unique_modules) + return modules + + +def main() -> int: + changed_modules = list_changed_modules(sys.argv[1:]) + print(json.dumps(changed_modules)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/ci_static_check_reports/setup.py b/tools/ci_static_check_reports/setup.py new file mode 100644 index 0000000000000..4241142328843 --- /dev/null +++ b/tools/ci_static_check_reports/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +TEST_REQUIREMENTS = [ + "pytest~=6.1", +] + +setup( + name="ci_static_check_reports", + description="CI tool to detect changed modules and then generate static check reports.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=["invoke~=1.6.0", "virtualenv~=20.10.0"], + package_data={"": ["*.json", "schemas/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, + entry_points={ + "console_scripts": [ + "ci_detect_changed_modules = ci_detect_changed_modules.main:main", + "ci_build_python_checkers_reports = ci_build_python_static_checkers_reports.main:main", + ], + }, +) diff --git a/tools/ci_static_check_reports/unit_tests/__init__.py b/tools/ci_static_check_reports/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/tools/ci_static_check_reports/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/tools/ci_static_check_reports/unit_tests/test_build_static_checkers_reports.py b/tools/ci_static_check_reports/unit_tests/test_build_static_checkers_reports.py new file mode 100644 index 0000000000000..77b9437d4a727 --- /dev/null +++ b/tools/ci_static_check_reports/unit_tests/test_build_static_checkers_reports.py @@ -0,0 +1,42 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# +import os +import subprocess + +import pytest + + +@pytest.mark.parametrize( + "changed_module,should_build_reports", + [ + ('[{"dir": "tools/ci_static_check_reports", "lang": "py"}]', True), + ('[{"dir": "airbyte-integrations/connectors/destination-bigquery", "lang": "java"}]', False), + ('[{"dir": "airbyte-integrations/connectors/not-existing-module", "lang": "other"}]', False), + ], +) +def test_build_static_checkers_reports(changed_module: str, should_build_reports: bool) -> None: + subprocess.call(["ci_build_python_checkers_reports", changed_module], shell=False) + static_checker_reports_path = f"static_checker_reports/{changed_module}" + + static_checker_reports_path_exists = os.path.exists(static_checker_reports_path) + black_exists = os.path.exists(os.path.join(static_checker_reports_path, "black.txt")) + coverage_exists = os.path.exists(os.path.join(static_checker_reports_path, "coverage.xml")) + flake_exists = os.path.exists(os.path.join(static_checker_reports_path, "flake.xml")) + isort_exists = os.path.exists(os.path.join(static_checker_reports_path, "isort.txt")) + cobertura_exists = os.path.exists(os.path.join(static_checker_reports_path, "cobertura.xml")) + pytest_exists = os.path.exists(os.path.join(static_checker_reports_path, "pytest.xml")) + report_paths_exist = [ + static_checker_reports_path_exists, + black_exists, + coverage_exists, + flake_exists, + isort_exists, + cobertura_exists, + pytest_exists, + ] + + if should_build_reports: + assert all(report_paths_exist) + else: + assert not all(report_paths_exist) diff --git a/tools/ci_static_check_reports/unit_tests/test_detect_changed_modules.py b/tools/ci_static_check_reports/unit_tests/test_detect_changed_modules.py new file mode 100644 index 0000000000000..468e7dc21ac09 --- /dev/null +++ b/tools/ci_static_check_reports/unit_tests/test_detect_changed_modules.py @@ -0,0 +1,58 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# +from typing import List, Set + +import pytest +from ci_detect_changed_modules.main import list_changed_modules + + +@pytest.mark.parametrize( + "changed_files,changed_modules", + [ + (["path/to/file1", "file2.txt", "path/to/file3.txt"], []), + ( + [ + "airbyte-cdk/python/airbyte_cdk/entrypoint.py", + "airbyte-cdk/python/airbyte_cdk/file1", + "airbyte-cdk/python/airbyte_cdk/file2.py", + ], + [{"dir": "airbyte-cdk/python", "lang": "py"}], + ), + ( + [ + "airbyte-cdk/python/airbyte_cdk/entrypoint.py", + "airbyte-integrations/connectors/source-asana/source_asana/streams.py", + "airbyte-integrations/connectors/source-asana/source_asana/source.py", + "airbyte-integrations/connectors/source-braintree/integration_tests/abnormal_state.json", + ], + [{"dir": "airbyte-cdk/python", "lang": "py"}, {"dir": "airbyte-integrations/connectors/source-asana", "lang": "py"}], + ), + ( + [], + [], + ), + # TODO: update test after non-python modules are supported + ( + [ + "airbyte-integrations/connectors/source-clickhouse-strict-encrypt/src/main/" + "java/io/airbyte/integrations/source/clickhouse/ClickHouseStrictEncryptSource.java" + ], + [], + ), + ( + ["airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json"], + [], + ), + ( + ["airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/destination.py"], + [ + {"dir": "airbyte-integrations/connectors/destination-amazon-sqs", "lang": "py"}, + ], + ), + ], +) +def test_list_changed_modules(changed_files: List[str], changed_modules: Set[str]) -> None: + calculated_changed_modules = list_changed_modules(changed_files) + + assert calculated_changed_modules == changed_modules From e3b727beba18ce9b0c17c577f75d0bdcd5b5dc9f Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Thu, 30 Dec 2021 23:31:40 -0800 Subject: [PATCH 006/215] Remove json avro schema converter hack (#9232) * Remove hack code * Format code * Update log message * Limit change to gcs formatter --- ...ltBigQueryDenormalizedRecordFormatter.java | 34 +++++++-------- ...csBigQueryDenormalizedRecordFormatter.java | 28 +++++++------ .../uploader/BigQueryUploaderFactory.java | 42 +++++++++---------- .../destination/gcs/avro/GcsAvroWriter.java | 16 +++---- .../s3/avro/JsonToAvroSchemaConverter.java | 1 - 5 files changed, 63 insertions(+), 58 deletions(-) diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/DefaultBigQueryDenormalizedRecordFormatter.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/DefaultBigQueryDenormalizedRecordFormatter.java index 7632680638470..a08751b25164f 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/DefaultBigQueryDenormalizedRecordFormatter.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/DefaultBigQueryDenormalizedRecordFormatter.java @@ -50,26 +50,26 @@ public class DefaultBigQueryDenormalizedRecordFormatter extends DefaultBigQueryR private final Set fieldsContainRefDefinitionValue = new HashSet<>(); - public DefaultBigQueryDenormalizedRecordFormatter(JsonNode jsonSchema, StandardNameTransformer namingResolver) { + public DefaultBigQueryDenormalizedRecordFormatter(final JsonNode jsonSchema, final StandardNameTransformer namingResolver) { super(jsonSchema, namingResolver); } @Override - protected JsonNode formatJsonSchema(JsonNode jsonSchema) { + protected JsonNode formatJsonSchema(final JsonNode jsonSchema) { populateEmptyArrays(jsonSchema); surroundArraysByObjects(jsonSchema); return jsonSchema; } - private List findArrays(JsonNode node) { + private List findArrays(final JsonNode node) { if (node != null) { return node.findParents(TYPE_FIELD).stream() .filter( jsonNode -> { - JsonNode type = jsonNode.get(TYPE_FIELD); + final JsonNode type = jsonNode.get(TYPE_FIELD); if (type.isArray()) { - ArrayNode typeNode = (ArrayNode) type; - for (JsonNode arrayTypeNode : typeNode) { + final ArrayNode typeNode = (ArrayNode) type; + for (final JsonNode arrayTypeNode : typeNode) { if (arrayTypeNode.isTextual() && arrayTypeNode.textValue().equals("array")) return true; } @@ -84,21 +84,21 @@ private List findArrays(JsonNode node) { } } - private void populateEmptyArrays(JsonNode node) { + private void populateEmptyArrays(final JsonNode node) { findArrays(node).forEach(jsonNode -> { if (!jsonNode.has(ARRAY_ITEMS_FIELD)) { - ObjectNode nodeToChange = (ObjectNode) jsonNode; + final ObjectNode nodeToChange = (ObjectNode) jsonNode; nodeToChange.putObject(ARRAY_ITEMS_FIELD).putArray(TYPE_FIELD).add("string"); } }); } - private void surroundArraysByObjects(JsonNode node) { + private void surroundArraysByObjects(final JsonNode node) { findArrays(node).forEach( jsonNode -> { - JsonNode arrayNode = jsonNode.deepCopy(); + final JsonNode arrayNode = jsonNode.deepCopy(); - ObjectNode newNode = (ObjectNode) jsonNode; + final ObjectNode newNode = (ObjectNode) jsonNode; newNode.removeAll(); newNode.putArray(TYPE_FIELD).add("object"); newNode.putObject(PROPERTIES_FIELD).set(NESTED_ARRAY_FIELD, arrayNode); @@ -127,7 +127,7 @@ public JsonNode formatRecord(final AirbyteRecordMessage recordMessage) { return data; } - protected void addAirbyteColumns(ObjectNode data, final AirbyteRecordMessage recordMessage) { + protected void addAirbyteColumns(final ObjectNode data, final AirbyteRecordMessage recordMessage) { final long emittedAtMicroseconds = recordMessage.getEmittedAt(); final String formattedEmittedAt = QueryParameterValue.timestamp(emittedAtMicroseconds).getValue(); @@ -157,7 +157,7 @@ protected void formatDateTimeFields(final FieldList fields, final JsonNode root) } } - private JsonNode getArrayNode(FieldList fields, JsonNode root) { + private JsonNode getArrayNode(final FieldList fields, final JsonNode root) { // Arrays can have only one field final Field arrayField = fields.get(0); // If an array of records, we should use subfields @@ -174,7 +174,7 @@ private JsonNode getArrayNode(FieldList fields, JsonNode root) { return Jsons.jsonNode(ImmutableMap.of(NESTED_ARRAY_FIELD, items)); } - private JsonNode getObjectNode(FieldList fields, JsonNode root) { + private JsonNode getObjectNode(final FieldList fields, final JsonNode root) { final List fieldNames = fields.stream().map(Field::getName).collect(Collectors.toList()); return Jsons.jsonNode(Jsons.keys(root).stream() .filter(key -> { @@ -191,7 +191,7 @@ private JsonNode getObjectNode(FieldList fields, JsonNode root) { @Override public Schema getBigQuerySchema(final JsonNode jsonSchema) { - final List fieldList = getSchemaFields(namingResolver, jsonSchema); + final List fieldList = getSchemaFields(namingResolver, jsonSchema); if (fieldList.stream().noneMatch(f -> f.getName().equals(JavaBaseConstants.COLUMN_NAME_AB_ID))) { fieldList.add(Field.of(JavaBaseConstants.COLUMN_NAME_AB_ID, StandardSQLTypeName.STRING)); } @@ -206,7 +206,7 @@ private List getSchemaFields(final StandardNameTransformer namingResolver LOGGER.info("getSchemaFields : " + jsonSchema + " namingResolver " + namingResolver); Preconditions.checkArgument(jsonSchema.isObject() && jsonSchema.has(PROPERTIES_FIELD)); final ObjectNode properties = (ObjectNode) jsonSchema.get(PROPERTIES_FIELD); - List tmpFields = Jsons.keys(properties).stream() + final List tmpFields = Jsons.keys(properties).stream() .peek(addToRefList(properties)) .map(key -> getField(namingResolver, key, properties.get(key)) .build()) @@ -227,7 +227,7 @@ private List getSchemaFields(final StandardNameTransformer namingResolver * Currently, AirByte doesn't support parsing value by $ref key definition. The issue to * track this 7725 */ - private Consumer addToRefList(ObjectNode properties) { + private Consumer addToRefList(final ObjectNode properties) { return key -> { if (properties.get(key).has(REF_DEFINITION_KEY)) { fieldsContainRefDefinitionValue.add(key); diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/GcsBigQueryDenormalizedRecordFormatter.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/GcsBigQueryDenormalizedRecordFormatter.java index 123e8488b9dfb..55adb42164636 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/GcsBigQueryDenormalizedRecordFormatter.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/GcsBigQueryDenormalizedRecordFormatter.java @@ -6,6 +6,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.cloud.bigquery.Schema; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.StandardNameTransformer; @@ -16,28 +17,31 @@ public class GcsBigQueryDenormalizedRecordFormatter extends DefaultBigQueryDenormalizedRecordFormatter { public GcsBigQueryDenormalizedRecordFormatter( - JsonNode jsonSchema, - StandardNameTransformer namingResolver) { + final JsonNode jsonSchema, + final StandardNameTransformer namingResolver) { super(jsonSchema, namingResolver); } @Override - protected JsonNode formatJsonSchema(JsonNode jsonSchema) { + protected JsonNode formatJsonSchema(final JsonNode jsonSchema) { var textJson = Jsons.serialize(jsonSchema); - /* - * BigQuery avro file loader doesn't support DatTime transformation - * https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro#logical_types Replace - * date-time by timestamp - */ - textJson = textJson.replace("\"format\":\"date-time\"", "\"format\":\"timestamp-micros\""); - // Add string type for Refs - // Avro header convertor requires types for all fields textJson = textJson.replace("{\"$ref\":\"", "{\"type\":[\"string\"], \"$ref\":\""); return super.formatJsonSchema(Jsons.deserialize(textJson)); } @Override - protected void addAirbyteColumns(ObjectNode data, AirbyteRecordMessage recordMessage) { + public Schema getBigQuerySchema(final JsonNode jsonSchema) { + final String schemaString = Jsons.serialize(jsonSchema) + // BigQuery avro file loader doesn't support date-time + // https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro#logical_types + // So we use timestamp for date-time + .replace("\"format\":\"date-time\"", "\"format\":\"timestamp-micros\""); + final JsonNode bigQuerySchema = Jsons.deserialize(schemaString); + return super.getBigQuerySchema(bigQuerySchema); + } + + @Override + protected void addAirbyteColumns(final ObjectNode data, final AirbyteRecordMessage recordMessage) { final long emittedAtMicroseconds = TimeUnit.MILLISECONDS.convert(recordMessage.getEmittedAt(), TimeUnit.MILLISECONDS); data.put(JavaBaseConstants.COLUMN_NAME_AB_ID, UUID.randomUUID().toString()); diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/uploader/BigQueryUploaderFactory.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/uploader/BigQueryUploaderFactory.java index 71e995d24339e..39d45f796f964 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/uploader/BigQueryUploaderFactory.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/uploader/BigQueryUploaderFactory.java @@ -36,7 +36,7 @@ public class BigQueryUploaderFactory { private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryUploaderFactory.class); - public static AbstractBigQueryUploader getUploader(UploaderConfig uploaderConfig) + public static AbstractBigQueryUploader getUploader(final UploaderConfig uploaderConfig) throws IOException { final String schemaName = BigQueryUtils.getSchema(uploaderConfig.getConfig(), uploaderConfig.getConfigStream()); @@ -45,11 +45,11 @@ public static AbstractBigQueryUploader getUploader(UploaderConfig uploaderCon final boolean isGcsUploadingMode = UploadingMethod.GCS.equals(BigQueryUtils.getLoadingMethod(uploaderConfig.getConfig())); - BigQueryRecordFormatter recordFormatter = + final BigQueryRecordFormatter recordFormatter = (isGcsUploadingMode ? uploaderConfig.getFormatterMap().get(UploaderType.AVRO) : uploaderConfig.getFormatterMap().get(UploaderType.STANDARD)); - Schema bigQuerySchema = recordFormatter.getBigQuerySchema(); + final Schema bigQuerySchema = recordFormatter.getBigQuerySchema(); BigQueryUtils.createSchemaAndTableIfNeeded( uploaderConfig.getBigQuery(), @@ -86,20 +86,20 @@ public static AbstractBigQueryUploader getUploader(UploaderConfig uploaderCon } private static AbstractGscBigQueryUploader getGcsBigQueryUploader( - JsonNode config, - ConfiguredAirbyteStream configStream, - TableId targetTable, - TableId tmpTable, - BigQuery bigQuery, - JobInfo.WriteDisposition syncMode, - BigQueryRecordFormatter formatter, - boolean isDefaultAirbyteTmpSchema) + final JsonNode config, + final ConfiguredAirbyteStream configStream, + final TableId targetTable, + final TableId tmpTable, + final BigQuery bigQuery, + final JobInfo.WriteDisposition syncMode, + final BigQueryRecordFormatter formatter, + final boolean isDefaultAirbyteTmpSchema) throws IOException { final GcsDestinationConfig gcsDestinationConfig = GcsDestinationConfig.getGcsDestinationConfig( BigQueryUtils.getGcsAvroJsonNodeConfig(config)); - JsonNode tmpTableSchema = + final JsonNode tmpTableSchema = (isDefaultAirbyteTmpSchema ? null : formatter.getJsonSchema()); final GcsAvroWriter gcsCsvWriter = initGcsWriter(gcsDestinationConfig, configStream, tmpTableSchema); @@ -119,7 +119,7 @@ private static AbstractGscBigQueryUploader getGcsBigQueryUploader( private static GcsAvroWriter initGcsWriter( final GcsDestinationConfig gcsDestinationConfig, final ConfiguredAirbyteStream configuredStream, - final JsonNode bigQuerySchema) + final JsonNode jsonSchema) throws IOException { final Timestamp uploadTimestamp = new Timestamp(System.currentTimeMillis()); @@ -130,17 +130,17 @@ private static GcsAvroWriter initGcsWriter( configuredStream, uploadTimestamp, JSON_CONVERTER, - bigQuerySchema); + jsonSchema); } private static BigQueryDirectUploader getBigQueryDirectUploader( - JsonNode config, - TableId targetTable, - TableId tmpTable, - BigQuery bigQuery, - JobInfo.WriteDisposition syncMode, - String datasetLocation, - BigQueryRecordFormatter formatter) { + final JsonNode config, + final TableId targetTable, + final TableId tmpTable, + final BigQuery bigQuery, + final JobInfo.WriteDisposition syncMode, + final String datasetLocation, + final BigQueryRecordFormatter formatter) { // https://cloud.google.com/bigquery/docs/loading-data-local#loading_data_from_a_local_data_source final WriteChannelConfiguration writeChannelConfiguration = WriteChannelConfiguration.newBuilder(tmpTable) diff --git a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java index 441a14e46ea83..775a2e64af415 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java +++ b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java @@ -58,14 +58,16 @@ public GcsAvroWriter(final GcsDestinationConfig config, final ConfiguredAirbyteStream configuredStream, final Timestamp uploadTimestamp, final JsonAvroConverter converter, - final JsonNode airbyteSchema) + final JsonNode jsonSchema) throws IOException { super(config, s3Client, configuredStream); - Schema schema = (airbyteSchema == null ? GcsUtils.getDefaultAvroSchema(stream.getName(), stream.getNamespace(), true) - : new JsonToAvroSchemaConverter().getAvroSchema(airbyteSchema, stream.getName(), - stream.getNamespace(), true, false, false, true)); - LOGGER.info("Avro schema : {}", schema); + final Schema schema = jsonSchema == null + ? GcsUtils.getDefaultAvroSchema(stream.getName(), stream.getNamespace(), true) + : new JsonToAvroSchemaConverter().getAvroSchema(jsonSchema, stream.getName(), + stream.getNamespace(), true, false, false, true); + LOGGER.info("Avro schema for stream {}: {}", stream.getName(), schema.toString(false)); + final String outputFilename = BaseGcsWriter.getOutputFilename(uploadTimestamp, S3Format.AVRO); objectKey = String.join("/", outputPrefix, outputFilename); gcsFileLocation = String.format("gs://%s/%s", config.getBucketName(), objectKey); @@ -93,8 +95,8 @@ public void write(final UUID id, final AirbyteRecordMessage recordMessage) throw } @Override - public void write(JsonNode formattedData) throws IOException { - GenericData.Record record = avroRecordFactory.getAvroRecord(formattedData); + public void write(final JsonNode formattedData) throws IOException { + final GenericData.Record record = avroRecordFactory.getAvroRecord(formattedData); dataFileWriter.append(record); } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java index 01c207d5e2987..77547aefafb23 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java @@ -201,7 +201,6 @@ Schema getSingleFieldType(final String fieldName, if (fieldDefinition.has("format")) { final String format = fieldDefinition.get("format").asText(); fieldSchema = switch (format) { - case "timestamp-micros" -> LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); case "date-time" -> LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); case "date" -> LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)); case "time" -> LogicalTypes.timeMicros().addToSchema(Schema.create(Schema.Type.LONG)); From 373b440292475f4d952589cae5aeeb84bd4ea32f Mon Sep 17 00:00:00 2001 From: Serhii Chvaliuk Date: Fri, 31 Dec 2021 11:46:57 +0200 Subject: [PATCH 007/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20Stripe:=20Fix?= =?UTF-8?q?=20"date",=20"arrival=5Fdate"=20fields:=20type:=20number=20(#91?= =?UTF-8?q?48)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix "date", "arrival_date" fields: type: number Signed-off-by: Sergey Chvalyuk --- .../e094cb9a-26de-4645-8761-65c0c425d1de.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- .../init/src/main/resources/seed/source_specs.yaml | 2 +- airbyte-integrations/connectors/source-stripe/Dockerfile | 2 +- .../source-stripe/source_stripe/schemas/payouts.json | 5 ++--- .../source-stripe/source_stripe/schemas/transfers.json | 6 ++---- docs/integrations/sources/stripe.md | 1 + 7 files changed, 9 insertions(+), 11 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json index 5915c812520fe..565cb0f98f411 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "e094cb9a-26de-4645-8761-65c0c425d1de", "name": "Stripe", "dockerRepository": "airbyte/source-stripe", - "dockerImageTag": "0.1.26", + "dockerImageTag": "0.1.27", "documentationUrl": "https://docs.airbyte.io/integrations/sources/stripe", "icon": "stripe.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 6bad35657522a..c094e496eb8c3 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -683,7 +683,7 @@ - name: Stripe sourceDefinitionId: e094cb9a-26de-4645-8761-65c0c425d1de dockerRepository: airbyte/source-stripe - dockerImageTag: 0.1.26 + dockerImageTag: 0.1.27 documentationUrl: https://docs.airbyte.io/integrations/sources/stripe icon: stripe.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 138431f88b884..90496a8b860f5 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -7021,7 +7021,7 @@ type: "string" path_in_connector_config: - "client_secret" -- dockerImage: "airbyte/source-stripe:0.1.26" +- dockerImage: "airbyte/source-stripe:0.1.27" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/stripe" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-stripe/Dockerfile b/airbyte-integrations/connectors/source-stripe/Dockerfile index e1afdfd8ba1c5..3aa22b50ac116 100644 --- a/airbyte-integrations/connectors/source-stripe/Dockerfile +++ b/airbyte-integrations/connectors/source-stripe/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.26 +LABEL io.airbyte.version=0.1.27 LABEL io.airbyte.name=airbyte/source-stripe diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/payouts.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/payouts.json index b21b1844b87c7..49806969dc1bb 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/payouts.json +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/payouts.json @@ -74,8 +74,7 @@ "type": ["null", "object"] }, "date": { - "type": ["null", "string"], - "format": "date-time" + "type": ["null", "integer"] }, "method": { "type": ["null", "string"] @@ -117,7 +116,7 @@ "type": ["null", "string"] }, "arrival_date": { - "type": ["null", "number"] + "type": ["null", "integer"] }, "description": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/transfers.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/transfers.json index 243bfeab93ce9..45ace8c56e4f4 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/transfers.json +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/transfers.json @@ -52,8 +52,7 @@ "type": ["null", "string"] }, "date": { - "type": ["null", "string"], - "format": "date-time" + "type": ["null", "integer"] }, "livemode": { "type": ["null", "boolean"] @@ -83,8 +82,7 @@ "type": ["null", "string"] }, "arrival_date": { - "type": ["null", "string"], - "format": "date-time" + "type": ["null", "integer"] }, "description": { "type": ["null", "string"] diff --git a/docs/integrations/sources/stripe.md b/docs/integrations/sources/stripe.md index 56eb2a6b7825a..5146a768bc0c8 100644 --- a/docs/integrations/sources/stripe.md +++ b/docs/integrations/sources/stripe.md @@ -74,6 +74,7 @@ If you would like to test Airbyte using test data on Stripe, `sk_test_` and `rk_ | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.27 | 2021-12-28 | [9148](https://github.com/airbytehq/airbyte/pull/9148) | Fix `date`, `arrival\_date` fields | | 0.1.26 | 2021-12-21 | [8992](https://github.com/airbytehq/airbyte/pull/8992) | Fix type `events.request` in schema | | 0.1.25 | 2021-11-25 | [8250](https://github.com/airbytehq/airbyte/pull/8250) | Rearrange setup fields | | 0.1.24 | 2021-11-08 | [7729](https://github.com/airbytehq/airbyte/pull/7729) | Include tax data in `checkout_sessions_line_items` stream | From 0cc80eeaa940f732c02f8d6c46da32066f2db79c Mon Sep 17 00:00:00 2001 From: Sergei Solonitcyn <11441558+sergei-solonitcyn@users.noreply.github.com> Date: Fri, 31 Dec 2021 12:39:22 +0200 Subject: [PATCH 008/215] fix editorconfig for non-python (#9235) --- .editorconfig | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/.editorconfig b/.editorconfig index 99607eaf727c9..73b614047e4f6 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,20 +1,5 @@ root = true -[*] -charset = utf-8 -indent_size = 4 -indent_style = space -insert_final_newline = true -max_line_length = 140 -tab_width = 4 -ij_continuation_indent_size = 8 -ij_formatter_off_tag = @formatter:off -ij_formatter_on_tag = @formatter:on -ij_formatter_tags_enabled = false -ij_smart_tabs = false -ij_visual_guides = none -ij_wrap_on_typing = false - [{*.bash,*.sh,*.zsh}] indent_size = 2 tab_width = 2 @@ -52,6 +37,20 @@ ij_markdown_min_lines_around_header = 1 ij_markdown_min_lines_between_paragraphs = 1 [{*.py,*.pyw,Tiltfile}] +charset = utf-8 +indent_size = 4 +indent_style = space +insert_final_newline = true +max_line_length = 140 +tab_width = 4 +ij_continuation_indent_size = 8 +ij_formatter_off_tag = @formatter:off +ij_formatter_on_tag = @formatter:on +ij_formatter_tags_enabled = false +ij_smart_tabs = false +ij_visual_guides = none +ij_wrap_on_typing = false + ij_python_align_collections_and_comprehensions = true ij_python_align_multiline_imports = true ij_python_align_multiline_parameters = true From 99a2ae60805ef824c7a9c184cc96b6bda600d988 Mon Sep 17 00:00:00 2001 From: Yurii Bidiuk <35812734+yurii-bidiuk@users.noreply.github.com> Date: Fri, 31 Dec 2021 17:45:20 +0200 Subject: [PATCH 009/215] =?UTF-8?q?=F0=9F=90=9B=20Source=20DB2:=20fixed=20?= =?UTF-8?q?Db2SourceCertificateAcceptanceTest=20(#9187)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix tests * bump version --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-db2-strict-encrypt/Dockerfile | 2 +- .../Db2StrictEncryptSourceCertificateAcceptanceTest.java | 1 + .../source/db2_strict_encrypt/Db2JdbcSourceAcceptanceTest.java | 1 + airbyte-integrations/connectors/source-db2/Dockerfile | 2 +- .../sources/Db2SourceCertificateAcceptanceTest.java | 1 + docs/integrations/sources/db2.md | 1 + 8 files changed, 8 insertions(+), 4 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index c094e496eb8c3..caf7def7d14e1 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -294,7 +294,7 @@ - name: IBM Db2 sourceDefinitionId: 447e0381-3780-4b46-bb62-00a4e3c8b8e2 dockerRepository: airbyte/source-db2 - dockerImageTag: 0.1.3 + dockerImageTag: 0.1.4 documentationUrl: https://docs.airbyte.io/integrations/sources/db2 icon: db2.svg sourceType: database diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 90496a8b860f5..9ae9afede6f3c 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2942,7 +2942,7 @@ - - "client_secret" oauthFlowOutputParameters: - - "refresh_token" -- dockerImage: "airbyte/source-db2:0.1.3" +- dockerImage: "airbyte/source-db2:0.1.4" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/db2" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-db2-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-db2-strict-encrypt/Dockerfile index 55080f3ba8ad0..7bbeac405187b 100644 --- a/airbyte-integrations/connectors/source-db2-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-db2-strict-encrypt/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-db2-strict-encrypt COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-db2-strict-encrypt diff --git a/airbyte-integrations/connectors/source-db2-strict-encrypt/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/Db2StrictEncryptSourceCertificateAcceptanceTest.java b/airbyte-integrations/connectors/source-db2-strict-encrypt/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/Db2StrictEncryptSourceCertificateAcceptanceTest.java index 1d25d4784f08b..eeac2e6814599 100644 --- a/airbyte-integrations/connectors/source-db2-strict-encrypt/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/Db2StrictEncryptSourceCertificateAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-db2-strict-encrypt/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/Db2StrictEncryptSourceCertificateAcceptanceTest.java @@ -178,6 +178,7 @@ private String getCertificate() throws IOException, InterruptedException { db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVR_KEYDB /database/config/db2inst1/server.kdb"); db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVR_STASH /database/config/db2inst1/server.sth"); db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVR_LABEL mylabel"); + db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_VERSIONS TLSV12"); db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVCENAME 50000"); db.execInContainer("su", "-", "db2inst1", "-c", "db2set -i db2inst1 DB2COMM=SSL"); db.execInContainer("su", "-", "db2inst1", "-c", "db2stop force"); diff --git a/airbyte-integrations/connectors/source-db2-strict-encrypt/src/test/java/io/airbyte/integrations/source/db2_strict_encrypt/Db2JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-db2-strict-encrypt/src/test/java/io/airbyte/integrations/source/db2_strict_encrypt/Db2JdbcSourceAcceptanceTest.java index ccb45b9eb44c0..0627551bfeb2a 100644 --- a/airbyte-integrations/connectors/source-db2-strict-encrypt/src/test/java/io/airbyte/integrations/source/db2_strict_encrypt/Db2JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-db2-strict-encrypt/src/test/java/io/airbyte/integrations/source/db2_strict_encrypt/Db2JdbcSourceAcceptanceTest.java @@ -176,6 +176,7 @@ private static String getCertificate() throws IOException, InterruptedException db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVR_KEYDB /database/config/db2inst1/server.kdb"); db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVR_STASH /database/config/db2inst1/server.sth"); db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVR_LABEL mylabel"); + db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_VERSIONS TLSV12"); db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVCENAME 50000"); db.execInContainer("su", "-", "db2inst1", "-c", "db2set -i db2inst1 DB2COMM=SSL"); db.execInContainer("su", "-", "db2inst1", "-c", "db2stop force"); diff --git a/airbyte-integrations/connectors/source-db2/Dockerfile b/airbyte-integrations/connectors/source-db2/Dockerfile index db55be397a48d..29e67afb938df 100644 --- a/airbyte-integrations/connectors/source-db2/Dockerfile +++ b/airbyte-integrations/connectors/source-db2/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-db2 COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-db2 diff --git a/airbyte-integrations/connectors/source-db2/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/Db2SourceCertificateAcceptanceTest.java b/airbyte-integrations/connectors/source-db2/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/Db2SourceCertificateAcceptanceTest.java index 1b985f1a0fc1a..bbcb8735be1bf 100644 --- a/airbyte-integrations/connectors/source-db2/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/Db2SourceCertificateAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-db2/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/Db2SourceCertificateAcceptanceTest.java @@ -172,6 +172,7 @@ private String getCertificate() throws IOException, InterruptedException { db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVR_KEYDB /database/config/db2inst1/server.kdb"); db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVR_STASH /database/config/db2inst1/server.sth"); db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVR_LABEL mylabel"); + db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_VERSIONS TLSV12"); db.execInContainer("su", "-", "db2inst1", "-c", "db2 update dbm cfg using SSL_SVCENAME 50000"); db.execInContainer("su", "-", "db2inst1", "-c", "db2set -i db2inst1 DB2COMM=SSL"); db.execInContainer("su", "-", "db2inst1", "-c", "db2stop force"); diff --git a/docs/integrations/sources/db2.md b/docs/integrations/sources/db2.md index 8e4eb0cf2ec88..951069ad796cb 100644 --- a/docs/integrations/sources/db2.md +++ b/docs/integrations/sources/db2.md @@ -62,6 +62,7 @@ You can also enter your own password for the keystore, but if you don't, the pas | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.4 | 2021-12-30 | [9187](https://github.com/airbytehq/airbyte/pull/9187) [8749](https://github.com/airbytehq/airbyte/pull/8749) | Add support of JdbcType.ARRAY to JdbcSourceOperations. | | 0.1.3 | 2021-11-05 | [7670](https://github.com/airbytehq/airbyte/pull/7670) | Updated unique DB2 types transformation | | 0.1.2 | 2021-10-25 | [7355](https://github.com/airbytehq/airbyte/pull/7355) | Added ssl support | | 0.1.1 | 2021-08-13 | [4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | From a36a8608ca9a26a6e969d87bf42de9f166c64394 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Fri, 31 Dec 2021 18:28:14 -0800 Subject: [PATCH 010/215] add container orchestrator for normalization (#9020) * initial commit * improve labeling * make more generic * refactor constants * move port constants * remove flag * some final config fixes * clean up * oops, didn't check this in * add dbt orchestrator (#9114) * respond to pr feedback --- .../ContainerOrchestratorApp.java | 139 +++++------------- .../DbtJobOrchestrator.java | 69 +++++++++ .../JobOrchestrator.java | 45 ++++++ .../NormalizationJobOrchestrator.java | 68 +++++++++ .../ReplicationJobOrchestrator.java | 107 ++++++++++++++ .../java/io/airbyte/workers/WorkerApp.java | 30 +++- .../temporal/sync/DbtLauncherWorker.java | 137 +++++++++++++++++ .../sync/DbtTransformationActivityImpl.java | 70 +++++---- .../sync/NormalizationActivityImpl.java | 73 ++++----- .../sync/NormalizationLauncherWorker.java | 135 +++++++++++++++++ .../temporal/sync/OrchestratorConstants.java | 45 ++++++ .../sync/ReplicationLauncherWorker.java | 60 ++------ 12 files changed, 757 insertions(+), 221 deletions(-) create mode 100644 airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java create mode 100644 airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/JobOrchestrator.java create mode 100644 airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java create mode 100644 airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtLauncherWorker.java create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationLauncherWorker.java create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java index 8710b21416ddd..833233c0d315a 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java @@ -7,30 +7,16 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.config.Configs; import io.airbyte.config.EnvConfigs; -import io.airbyte.config.ReplicationOutput; -import io.airbyte.config.StandardSyncInput; -import io.airbyte.scheduler.models.IntegrationLauncherConfig; -import io.airbyte.scheduler.models.JobRunConfig; -import io.airbyte.workers.DefaultReplicationWorker; -import io.airbyte.workers.ReplicationWorker; import io.airbyte.workers.WorkerApp; import io.airbyte.workers.WorkerConfigs; -import io.airbyte.workers.WorkerConstants; -import io.airbyte.workers.WorkerException; -import io.airbyte.workers.WorkerUtils; -import io.airbyte.workers.process.AirbyteIntegrationLauncher; import io.airbyte.workers.process.DockerProcessFactory; -import io.airbyte.workers.process.IntegrationLauncher; import io.airbyte.workers.process.KubePortManagerSingleton; import io.airbyte.workers.process.KubeProcessFactory; import io.airbyte.workers.process.ProcessFactory; import io.airbyte.workers.process.WorkerHeartbeatServer; -import io.airbyte.workers.protocols.airbyte.AirbyteMessageTracker; -import io.airbyte.workers.protocols.airbyte.AirbyteSource; -import io.airbyte.workers.protocols.airbyte.DefaultAirbyteDestination; -import io.airbyte.workers.protocols.airbyte.DefaultAirbyteSource; -import io.airbyte.workers.protocols.airbyte.EmptyAirbyteSource; -import io.airbyte.workers.protocols.airbyte.NamespacingMapper; +import io.airbyte.workers.temporal.sync.DbtLauncherWorker; +import io.airbyte.workers.temporal.sync.NormalizationLauncherWorker; +import io.airbyte.workers.temporal.sync.OrchestratorConstants; import io.airbyte.workers.temporal.sync.ReplicationLauncherWorker; import io.fabric8.kubernetes.client.DefaultKubernetesClient; import io.fabric8.kubernetes.client.KubernetesClient; @@ -39,14 +25,12 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.Map; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import lombok.extern.slf4j.Slf4j; /** * Entrypoint for the application responsible for launching containers and handling all message - * passing. Currently, this is only implemented for replication but in the future it will be - * available for normalization and dbt. Also, the current version relies on a heartbeat from a - * Temporal worker. This will also be removed in the future so this can run fully async. + * passing for replication, normalization, and dbt. Also, the current version relies on a heartbeat + * from a Temporal worker. This will also be removed in the future so this can run fully async. * * This application retrieves most of its configuration from copied files from the calling Temporal * worker. @@ -54,108 +38,59 @@ * This app uses default logging which is directly captured by the calling Temporal worker. In the * future this will need to independently interact with cloud storage. */ +@Slf4j public class ContainerOrchestratorApp { - private static final Logger LOGGER = LoggerFactory.getLogger(ContainerOrchestratorApp.class); - - private static void replicationRunner(final Configs configs) throws IOException, WorkerException { - - LOGGER.info("Starting replication runner app..."); - - final WorkerConfigs workerConfigs = new WorkerConfigs(configs); - final ProcessFactory processFactory = getProcessBuilderFactory(configs, workerConfigs); - - LOGGER.info("Attempting to retrieve config files..."); - - final JobRunConfig jobRunConfig = - Jsons.deserialize(Files.readString(Path.of(ReplicationLauncherWorker.INIT_FILE_JOB_RUN_CONFIG)), JobRunConfig.class); - - final IntegrationLauncherConfig sourceLauncherConfig = - Jsons.deserialize(Files.readString(Path.of(ReplicationLauncherWorker.INIT_FILE_SOURCE_LAUNCHER_CONFIG)), IntegrationLauncherConfig.class); - - final IntegrationLauncherConfig destinationLauncherConfig = - Jsons.deserialize(Files.readString(Path.of(ReplicationLauncherWorker.INIT_FILE_DESTINATION_LAUNCHER_CONFIG)), - IntegrationLauncherConfig.class); - - final StandardSyncInput syncInput = - Jsons.deserialize(Files.readString(Path.of(ReplicationLauncherWorker.INIT_FILE_SYNC_INPUT)), StandardSyncInput.class); - - LOGGER.info("Setting up source launcher..."); - final IntegrationLauncher sourceLauncher = new AirbyteIntegrationLauncher( - sourceLauncherConfig.getJobId(), - Math.toIntExact(sourceLauncherConfig.getAttemptId()), - sourceLauncherConfig.getDockerImage(), - processFactory, - syncInput.getResourceRequirements()); - - LOGGER.info("Setting up destination launcher..."); - final IntegrationLauncher destinationLauncher = new AirbyteIntegrationLauncher( - destinationLauncherConfig.getJobId(), - Math.toIntExact(destinationLauncherConfig.getAttemptId()), - destinationLauncherConfig.getDockerImage(), - processFactory, - syncInput.getResourceRequirements()); - - LOGGER.info("Setting up source..."); - // reset jobs use an empty source to induce resetting all data in destination. - final AirbyteSource airbyteSource = - sourceLauncherConfig.getDockerImage().equals(WorkerConstants.RESET_JOB_SOURCE_DOCKER_IMAGE_STUB) ? new EmptyAirbyteSource() - : new DefaultAirbyteSource(workerConfigs, sourceLauncher); - - LOGGER.info("Setting up replication worker..."); - final ReplicationWorker replicationWorker = new DefaultReplicationWorker( - jobRunConfig.getJobId(), - Math.toIntExact(jobRunConfig.getAttemptId()), - airbyteSource, - new NamespacingMapper(syncInput.getNamespaceDefinition(), syncInput.getNamespaceFormat(), syncInput.getPrefix()), - new DefaultAirbyteDestination(workerConfigs, destinationLauncher), - new AirbyteMessageTracker(), - new AirbyteMessageTracker()); - - LOGGER.info("Running replication worker..."); - final Path jobRoot = WorkerUtils.getJobRoot(configs.getWorkspaceRoot(), jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); - final ReplicationOutput replicationOutput = replicationWorker.run(syncInput, jobRoot); - - LOGGER.info("Sending output..."); - // this uses stdout directly because it shouldn't have the logging related prefix - // the replication output is read from the container that launched the runner - System.out.println(Jsons.serialize(replicationOutput)); - - LOGGER.info("Replication runner complete!"); - } - public static void main(final String[] args) throws Exception { WorkerHeartbeatServer heartbeatServer = null; try { // read files that contain all necessary configuration - final String application = Files.readString(Path.of(ReplicationLauncherWorker.INIT_FILE_APPLICATION)); + final String application = Files.readString(Path.of(OrchestratorConstants.INIT_FILE_APPLICATION)); final Map envMap = - (Map) Jsons.deserialize(Files.readString(Path.of(ReplicationLauncherWorker.INIT_FILE_ENV_MAP)), Map.class); + (Map) Jsons.deserialize(Files.readString(Path.of(OrchestratorConstants.INIT_FILE_ENV_MAP)), Map.class); final Configs configs = new EnvConfigs(envMap::get); heartbeatServer = new WorkerHeartbeatServer(WorkerApp.KUBE_HEARTBEAT_PORT); heartbeatServer.startBackground(); - if (application.equals(ReplicationLauncherWorker.REPLICATION)) { - replicationRunner(configs); - } else { - LOGGER.error("Runner failed", new IllegalStateException("Unexpected value: " + application)); - System.exit(1); - } + final WorkerConfigs workerConfigs = new WorkerConfigs(configs); + final ProcessFactory processFactory = getProcessBuilderFactory(configs, workerConfigs); + final JobOrchestrator jobOrchestrator = getJobOrchestrator(configs, workerConfigs, processFactory, application); + + log.info("Starting {} orchestrator...", jobOrchestrator.getOrchestratorName()); + jobOrchestrator.runJob(); + log.info("{} orchestrator complete!", jobOrchestrator.getOrchestratorName()); } finally { if (heartbeatServer != null) { - LOGGER.info("Shutting down heartbeat server..."); + log.info("Shutting down heartbeat server..."); heartbeatServer.stop(); } } // required to kill kube client - LOGGER.info("Runner closing..."); + log.info("Runner closing..."); System.exit(0); } + private static JobOrchestrator getJobOrchestrator(final Configs configs, + final WorkerConfigs workerConfigs, + final ProcessFactory processFactory, + final String application) { + if (application.equals(ReplicationLauncherWorker.REPLICATION)) { + return new ReplicationJobOrchestrator(configs, workerConfigs, processFactory); + } else if (application.equals(NormalizationLauncherWorker.NORMALIZATION)) { + return new NormalizationJobOrchestrator(configs, workerConfigs, processFactory); + } else if (application.equals(DbtLauncherWorker.DBT)) { + return new DbtJobOrchestrator(configs, workerConfigs, processFactory); + } else { + log.error("Runner failed", new IllegalStateException("Unexpected value: " + application)); + System.exit(1); + throw new IllegalStateException(); // should never be reached, but necessary to compile + } + } + /** * Creates a process builder factory that will be used to create connector containers/pods. */ @@ -164,11 +99,11 @@ private static ProcessFactory getProcessBuilderFactory(final Configs configs, fi final KubernetesClient fabricClient = new DefaultKubernetesClient(); final String localIp = InetAddress.getLocalHost().getHostAddress(); final String kubeHeartbeatUrl = localIp + ":" + WorkerApp.KUBE_HEARTBEAT_PORT; - LOGGER.info("Using Kubernetes namespace: {}", configs.getJobKubeNamespace()); + log.info("Using Kubernetes namespace: {}", configs.getJobKubeNamespace()); // this needs to have two ports for the source and two ports for the destination (all four must be // exposed) - KubePortManagerSingleton.init(ReplicationLauncherWorker.PORTS); + KubePortManagerSingleton.init(OrchestratorConstants.PORTS); return new KubeProcessFactory(workerConfigs, configs.getJobKubeNamespace(), fabricClient, kubeHeartbeatUrl, false); } else { diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java new file mode 100644 index 0000000000000..c87fd499dc631 --- /dev/null +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.container_orchestrator; + +import io.airbyte.config.Configs; +import io.airbyte.config.OperatorDbtInput; +import io.airbyte.scheduler.models.IntegrationLauncherConfig; +import io.airbyte.scheduler.models.JobRunConfig; +import io.airbyte.workers.DbtTransformationRunner; +import io.airbyte.workers.DbtTransformationWorker; +import io.airbyte.workers.WorkerConfigs; +import io.airbyte.workers.WorkerUtils; +import io.airbyte.workers.normalization.NormalizationRunnerFactory; +import io.airbyte.workers.process.ProcessFactory; +import io.airbyte.workers.temporal.sync.ReplicationLauncherWorker; +import java.nio.file.Path; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class DbtJobOrchestrator implements JobOrchestrator { + + private final Configs configs; + private final WorkerConfigs workerConfigs; + private final ProcessFactory processFactory; + + public DbtJobOrchestrator(final Configs configs, final WorkerConfigs workerConfigs, final ProcessFactory processFactory) { + this.configs = configs; + this.workerConfigs = workerConfigs; + this.processFactory = processFactory; + } + + @Override + public String getOrchestratorName() { + return "DBT Transformation"; + } + + @Override + public Class getInputClass() { + return OperatorDbtInput.class; + } + + @Override + public void runJob() throws Exception { + final JobRunConfig jobRunConfig = readJobRunConfig(); + final OperatorDbtInput dbtInput = readInput(); + + final IntegrationLauncherConfig destinationLauncherConfig = JobOrchestrator.readAndDeserializeFile( + ReplicationLauncherWorker.INIT_FILE_DESTINATION_LAUNCHER_CONFIG, IntegrationLauncherConfig.class); + + log.info("Setting up dbt worker..."); + final DbtTransformationWorker worker = new DbtTransformationWorker( + jobRunConfig.getJobId(), + Math.toIntExact(jobRunConfig.getAttemptId()), + workerConfigs.getResourceRequirements(), + new DbtTransformationRunner( + workerConfigs, + processFactory, NormalizationRunnerFactory.create( + workerConfigs, + destinationLauncherConfig.getDockerImage(), + processFactory)));; + + log.info("Running dbt worker..."); + final Path jobRoot = WorkerUtils.getJobRoot(configs.getWorkspaceRoot(), jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); + worker.run(dbtInput, jobRoot); + } + +} diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/JobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/JobOrchestrator.java new file mode 100644 index 0000000000000..d207763a4f28f --- /dev/null +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/JobOrchestrator.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.container_orchestrator; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.scheduler.models.JobRunConfig; +import io.airbyte.workers.temporal.sync.OrchestratorConstants; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +/** + * The job orchestrator helps abstract over container launcher application differences across + * replication, normalization, and custom dbt operators. + * + * @param job input type + */ +public interface JobOrchestrator { + + // used for logging + String getOrchestratorName(); + + // used to serialize the loaded input + Class getInputClass(); + + // reads input from a file that was copied to the container launcher + default INPUT readInput() throws IOException { + return readAndDeserializeFile(OrchestratorConstants.INIT_FILE_INPUT, getInputClass()); + } + + // reads the job run config from a file that was copied to the container launcher + default JobRunConfig readJobRunConfig() throws IOException { + return readAndDeserializeFile(OrchestratorConstants.INIT_FILE_JOB_RUN_CONFIG, JobRunConfig.class); + } + + // the unique logic that belongs to each type of job belongs here + void runJob() throws Exception; + + static T readAndDeserializeFile(String path, Class type) throws IOException { + return Jsons.deserialize(Files.readString(Path.of(path)), type); + } + +} diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java new file mode 100644 index 0000000000000..c8cb34206716d --- /dev/null +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.container_orchestrator; + +import io.airbyte.config.Configs; +import io.airbyte.config.NormalizationInput; +import io.airbyte.scheduler.models.IntegrationLauncherConfig; +import io.airbyte.scheduler.models.JobRunConfig; +import io.airbyte.workers.DefaultNormalizationWorker; +import io.airbyte.workers.NormalizationWorker; +import io.airbyte.workers.WorkerConfigs; +import io.airbyte.workers.WorkerUtils; +import io.airbyte.workers.normalization.NormalizationRunnerFactory; +import io.airbyte.workers.process.ProcessFactory; +import io.airbyte.workers.temporal.sync.ReplicationLauncherWorker; +import java.nio.file.Path; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class NormalizationJobOrchestrator implements JobOrchestrator { + + private final Configs configs; + private final WorkerConfigs workerConfigs; + private final ProcessFactory processFactory; + + public NormalizationJobOrchestrator(final Configs configs, final WorkerConfigs workerConfigs, final ProcessFactory processFactory) { + this.configs = configs; + this.workerConfigs = workerConfigs; + this.processFactory = processFactory; + } + + @Override + public String getOrchestratorName() { + return "Normalization"; + } + + @Override + public Class getInputClass() { + return NormalizationInput.class; + } + + @Override + public void runJob() throws Exception { + final JobRunConfig jobRunConfig = readJobRunConfig(); + final NormalizationInput normalizationInput = readInput(); + + final IntegrationLauncherConfig destinationLauncherConfig = JobOrchestrator.readAndDeserializeFile( + ReplicationLauncherWorker.INIT_FILE_DESTINATION_LAUNCHER_CONFIG, IntegrationLauncherConfig.class); + + log.info("Setting up normalization worker..."); + final NormalizationWorker normalizationWorker = new DefaultNormalizationWorker( + jobRunConfig.getJobId(), + Math.toIntExact(jobRunConfig.getAttemptId()), + NormalizationRunnerFactory.create( + workerConfigs, + destinationLauncherConfig.getDockerImage(), + processFactory), + configs.getWorkerEnvironment()); + + log.info("Running normalization worker..."); + final Path jobRoot = WorkerUtils.getJobRoot(configs.getWorkspaceRoot(), jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); + normalizationWorker.run(normalizationInput, jobRoot); + + } + +} diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java new file mode 100644 index 0000000000000..ea8953e74976a --- /dev/null +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.container_orchestrator; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.Configs; +import io.airbyte.config.ReplicationOutput; +import io.airbyte.config.StandardSyncInput; +import io.airbyte.scheduler.models.IntegrationLauncherConfig; +import io.airbyte.scheduler.models.JobRunConfig; +import io.airbyte.workers.DefaultReplicationWorker; +import io.airbyte.workers.ReplicationWorker; +import io.airbyte.workers.WorkerConfigs; +import io.airbyte.workers.WorkerConstants; +import io.airbyte.workers.WorkerUtils; +import io.airbyte.workers.process.AirbyteIntegrationLauncher; +import io.airbyte.workers.process.IntegrationLauncher; +import io.airbyte.workers.process.ProcessFactory; +import io.airbyte.workers.protocols.airbyte.AirbyteMessageTracker; +import io.airbyte.workers.protocols.airbyte.AirbyteSource; +import io.airbyte.workers.protocols.airbyte.DefaultAirbyteDestination; +import io.airbyte.workers.protocols.airbyte.DefaultAirbyteSource; +import io.airbyte.workers.protocols.airbyte.EmptyAirbyteSource; +import io.airbyte.workers.protocols.airbyte.NamespacingMapper; +import io.airbyte.workers.temporal.sync.ReplicationLauncherWorker; +import java.nio.file.Path; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class ReplicationJobOrchestrator implements JobOrchestrator { + + private final ProcessFactory processFactory; + private final WorkerConfigs workerConfigs; + private final Configs configs; + + public ReplicationJobOrchestrator(final Configs configs, final WorkerConfigs workerConfigs, final ProcessFactory processFactory) { + this.configs = configs; + this.workerConfigs = workerConfigs; + this.processFactory = processFactory; + } + + @Override + public String getOrchestratorName() { + return "Replication"; + } + + @Override + public Class getInputClass() { + return StandardSyncInput.class; + } + + @Override + public void runJob() throws Exception { + final JobRunConfig jobRunConfig = readJobRunConfig(); + final StandardSyncInput syncInput = readInput(); + + final IntegrationLauncherConfig sourceLauncherConfig = JobOrchestrator.readAndDeserializeFile( + ReplicationLauncherWorker.INIT_FILE_SOURCE_LAUNCHER_CONFIG, IntegrationLauncherConfig.class); + + final IntegrationLauncherConfig destinationLauncherConfig = JobOrchestrator.readAndDeserializeFile( + ReplicationLauncherWorker.INIT_FILE_DESTINATION_LAUNCHER_CONFIG, IntegrationLauncherConfig.class); + + log.info("Setting up source launcher..."); + final IntegrationLauncher sourceLauncher = new AirbyteIntegrationLauncher( + sourceLauncherConfig.getJobId(), + Math.toIntExact(sourceLauncherConfig.getAttemptId()), + sourceLauncherConfig.getDockerImage(), + processFactory, + syncInput.getResourceRequirements()); + + log.info("Setting up destination launcher..."); + final IntegrationLauncher destinationLauncher = new AirbyteIntegrationLauncher( + destinationLauncherConfig.getJobId(), + Math.toIntExact(destinationLauncherConfig.getAttemptId()), + destinationLauncherConfig.getDockerImage(), + processFactory, + syncInput.getResourceRequirements()); + + log.info("Setting up source..."); + // reset jobs use an empty source to induce resetting all data in destination. + final AirbyteSource airbyteSource = + sourceLauncherConfig.getDockerImage().equals(WorkerConstants.RESET_JOB_SOURCE_DOCKER_IMAGE_STUB) ? new EmptyAirbyteSource() + : new DefaultAirbyteSource(workerConfigs, sourceLauncher); + + log.info("Setting up replication worker..."); + final ReplicationWorker replicationWorker = new DefaultReplicationWorker( + jobRunConfig.getJobId(), + Math.toIntExact(jobRunConfig.getAttemptId()), + airbyteSource, + new NamespacingMapper(syncInput.getNamespaceDefinition(), syncInput.getNamespaceFormat(), syncInput.getPrefix()), + new DefaultAirbyteDestination(workerConfigs, destinationLauncher), + new AirbyteMessageTracker(), + new AirbyteMessageTracker()); + + log.info("Running replication worker..."); + final Path jobRoot = WorkerUtils.getJobRoot(configs.getWorkspaceRoot(), jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); + final ReplicationOutput replicationOutput = replicationWorker.run(syncInput, jobRoot); + + log.info("Sending output..."); + // this uses stdout directly because it shouldn't have the logging related prefix + // the replication output is read from the container that launched the runner + System.out.println(Jsons.serialize(replicationOutput)); + } + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java index 7cd95053089dc..5057e5463c051 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java @@ -140,15 +140,33 @@ public void start() { databasePassword, databaseUrl, airbyteVersion)); final NormalizationActivityImpl normalizationActivity = - new NormalizationActivityImpl(workerConfigs, jobProcessFactory, secretsHydrator, workspaceRoot, workerEnvironment, - logConfigs, databaseUser, - databasePassword, databaseUrl, airbyteVersion); + new NormalizationActivityImpl( + containerOrchestratorEnabled, + workerConfigs, + jobProcessFactory, + orchestratorProcessFactory, + secretsHydrator, + workspaceRoot, + workerEnvironment, + logConfigs, + databaseUser, + databasePassword, + databaseUrl, + airbyteVersion); final DbtTransformationActivityImpl dbtTransformationActivity = - new DbtTransformationActivityImpl(workerConfigs, jobProcessFactory, secretsHydrator, + new DbtTransformationActivityImpl( + containerOrchestratorEnabled, + workerConfigs, + jobProcessFactory, + orchestratorProcessFactory, + secretsHydrator, workspaceRoot, - workerEnvironment, logConfigs, + workerEnvironment, + logConfigs, databaseUser, - databasePassword, databaseUrl, airbyteVersion); + databasePassword, + databaseUrl, + airbyteVersion); new PersistStateActivityImpl(workspaceRoot, configRepository); final PersistStateActivityImpl persistStateActivity = new PersistStateActivityImpl(workspaceRoot, configRepository); final Worker syncWorker = factory.newWorker(TemporalJobType.SYNC.name(), getWorkerOptions(maxWorkers.getMaxSyncWorkers())); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtLauncherWorker.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtLauncherWorker.java new file mode 100644 index 0000000000000..4a3eb9eb9efb7 --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtLauncherWorker.java @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.temporal.sync; + +import io.airbyte.commons.io.LineGobbler; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.logging.LoggingHelper; +import io.airbyte.commons.logging.MdcScope; +import io.airbyte.config.OperatorDbtInput; +import io.airbyte.scheduler.models.IntegrationLauncherConfig; +import io.airbyte.scheduler.models.JobRunConfig; +import io.airbyte.workers.Worker; +import io.airbyte.workers.WorkerApp; +import io.airbyte.workers.WorkerConfigs; +import io.airbyte.workers.WorkerException; +import io.airbyte.workers.WorkerUtils; +import io.airbyte.workers.process.KubeProcessFactory; +import io.airbyte.workers.process.ProcessFactory; +import java.nio.file.Path; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +// todo: DRY the launchers +public class DbtLauncherWorker implements Worker { + + private static final Logger LOGGER = LoggerFactory.getLogger(DbtLauncherWorker.class); + + private static final MdcScope.Builder LOG_MDC_BUILDER = new MdcScope.Builder() + .setLogPrefix("dbt-orchestrator") + .setPrefixColor(LoggingHelper.Color.CYAN_BACKGROUND); + + public static final String DBT = "dbt"; + public static final String INIT_FILE_DESTINATION_LAUNCHER_CONFIG = "destinationLauncherConfig.json"; + + private final WorkerConfigs workerConfigs; + private final ProcessFactory processFactory; + private final String airbyteVersion; + private final Path workspaceRoot; + private final IntegrationLauncherConfig destinationLauncherConfig; + private final JobRunConfig jobRunConfig; + + private final AtomicBoolean cancelled = new AtomicBoolean(false); + + private Process process; + + public DbtLauncherWorker( + final Path workspaceRoot, + final IntegrationLauncherConfig destinationLauncherConfig, + final JobRunConfig jobRunConfig, + final WorkerConfigs workerConfigs, + final ProcessFactory processFactory, + final String airbyteVersion) { + this.workspaceRoot = workspaceRoot; + this.destinationLauncherConfig = destinationLauncherConfig; + this.jobRunConfig = jobRunConfig; + this.workerConfigs = workerConfigs; + this.processFactory = processFactory; + this.airbyteVersion = airbyteVersion; + } + + @Override + public Void run(OperatorDbtInput operatorDbtInput, Path jobRoot) throws WorkerException { + try { + final Path jobPath = WorkerUtils.getJobRoot(workspaceRoot, jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); + + // we want to filter down to remove secrets, so we aren't writing over a bunch of unnecessary + // secrets + final Map envMap = System.getenv().entrySet().stream() + .filter(entry -> OrchestratorConstants.ENV_VARS_TO_TRANSFER.contains(entry.getKey())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + final Map fileMap = Map.of( + OrchestratorConstants.INIT_FILE_APPLICATION, DBT, + OrchestratorConstants.INIT_FILE_JOB_RUN_CONFIG, Jsons.serialize(jobRunConfig), + OrchestratorConstants.INIT_FILE_INPUT, Jsons.serialize(operatorDbtInput), + OrchestratorConstants.INIT_FILE_ENV_MAP, Jsons.serialize(envMap), + INIT_FILE_DESTINATION_LAUNCHER_CONFIG, Jsons.serialize(destinationLauncherConfig)); + + process = processFactory.create( + "runner-" + UUID.randomUUID().toString().substring(0, 10), + 0, + jobPath, + "airbyte/container-orchestrator:" + airbyteVersion, + false, + fileMap, + null, + workerConfigs.getResourceRequirements(), + Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.SYNC_RUNNER), + Map.of( + WorkerApp.KUBE_HEARTBEAT_PORT, WorkerApp.KUBE_HEARTBEAT_PORT, + OrchestratorConstants.PORT1, OrchestratorConstants.PORT1, + OrchestratorConstants.PORT2, OrchestratorConstants.PORT2, + OrchestratorConstants.PORT3, OrchestratorConstants.PORT3, + OrchestratorConstants.PORT4, OrchestratorConstants.PORT4)); + + LineGobbler.gobble(process.getInputStream(), LOGGER::info, LOG_MDC_BUILDER); + LineGobbler.gobble(process.getErrorStream(), LOGGER::error, LOG_MDC_BUILDER); + + WorkerUtils.wait(process); + + if (process.exitValue() != 0) { + throw new WorkerException("Non-zero exit code!"); + } + } catch (Exception e) { + if (cancelled.get()) { + throw new WorkerException("Sync was cancelled.", e); + } else { + throw new WorkerException("Running the sync attempt failed", e); + } + } + + return null; + } + + @Override + public void cancel() { + cancelled.set(true); + + if (process == null) { + return; + } + + LOGGER.debug("Closing dbt launcher process"); + WorkerUtils.gentleClose(workerConfigs, process, 1, TimeUnit.MINUTES); + if (process.isAlive() || process.exitValue() != 0) { + LOGGER.error("Dbt launcher process wasn't successful"); + } + } + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java index cea4ffe3b186b..761e5e608eb2c 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java @@ -4,7 +4,6 @@ package io.airbyte.workers.temporal.sync; -import com.google.common.annotations.VisibleForTesting; import io.airbyte.commons.functional.CheckedSupplier; import io.airbyte.commons.json.Jsons; import io.airbyte.config.AirbyteConfigValidator; @@ -26,15 +25,13 @@ import io.airbyte.workers.temporal.TemporalAttemptExecution; import java.nio.file.Path; import java.util.function.Supplier; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class DbtTransformationActivityImpl implements DbtTransformationActivity { - private static final Logger LOGGER = LoggerFactory.getLogger(DbtTransformationActivityImpl.class); - + private final boolean containerOrchestratorEnabled; private final WorkerConfigs workerConfigs; - private final ProcessFactory processFactory; + private final ProcessFactory jobProcessFactory; + private final ProcessFactory orchestratorProcessFactory; private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; private final AirbyteConfigValidator validator; @@ -45,8 +42,10 @@ public class DbtTransformationActivityImpl implements DbtTransformationActivity private final String databaseUrl; private final String airbyteVersion; - public DbtTransformationActivityImpl(final WorkerConfigs workerConfigs, - final ProcessFactory processFactory, + public DbtTransformationActivityImpl(final boolean containerOrchestratorEnabled, + final WorkerConfigs workerConfigs, + final ProcessFactory jobProcessFactory, + final ProcessFactory orchestratorProcessFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, final WorkerEnvironment workerEnvironment, @@ -55,27 +54,13 @@ public DbtTransformationActivityImpl(final WorkerConfigs workerConfigs, final String databasePassword, final String databaseUrl, final String airbyteVersion) { - this(workerConfigs, processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator(), workerEnvironment, logConfigs, databaseUser, - databasePassword, databaseUrl, airbyteVersion); - } - - @VisibleForTesting - DbtTransformationActivityImpl(final WorkerConfigs workerConfigs, - final ProcessFactory processFactory, - final SecretsHydrator secretsHydrator, - final Path workspaceRoot, - final AirbyteConfigValidator validator, - final WorkerEnvironment workerEnvironment, - final LogConfigs logConfigs, - final String databaseUser, - final String databasePassword, - final String databaseUrl, - final String airbyteVersion) { + this.containerOrchestratorEnabled = containerOrchestratorEnabled; this.workerConfigs = workerConfigs; - this.processFactory = processFactory; + this.jobProcessFactory = jobProcessFactory; + this.orchestratorProcessFactory = orchestratorProcessFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; - this.validator = validator; + this.validator = new AirbyteConfigValidator(); this.workerEnvironment = workerEnvironment; this.logConfigs = logConfigs; this.databaseUser = databaseUser; @@ -98,29 +83,50 @@ public Void run(final JobRunConfig jobRunConfig, return fullInput; }; + CheckedSupplier, Exception> workerFactory; + + if (containerOrchestratorEnabled) { + workerFactory = getContainerLauncherWorkerFactory(workerConfigs, destinationLauncherConfig, jobRunConfig); + } else { + workerFactory = getLegacyWorkerFactory(destinationLauncherConfig, jobRunConfig, resourceRequirements); + } + final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, - getWorkerFactory(destinationLauncherConfig, jobRunConfig, resourceRequirements), + workerFactory, inputSupplier, new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } - private CheckedSupplier, Exception> getWorkerFactory(final IntegrationLauncherConfig destinationLauncherConfig, - final JobRunConfig jobRunConfig, - final ResourceRequirements resourceRequirements) { + private CheckedSupplier, Exception> getLegacyWorkerFactory(final IntegrationLauncherConfig destinationLauncherConfig, + final JobRunConfig jobRunConfig, + final ResourceRequirements resourceRequirements) { return () -> new DbtTransformationWorker( jobRunConfig.getJobId(), Math.toIntExact(jobRunConfig.getAttemptId()), resourceRequirements, new DbtTransformationRunner( workerConfigs, - processFactory, NormalizationRunnerFactory.create( + jobProcessFactory, NormalizationRunnerFactory.create( workerConfigs, destinationLauncherConfig.getDockerImage(), - processFactory))); + jobProcessFactory))); + } + + private CheckedSupplier, Exception> getContainerLauncherWorkerFactory( + final WorkerConfigs workerConfigs, + final IntegrationLauncherConfig destinationLauncherConfig, + final JobRunConfig jobRunConfig) { + return () -> new DbtLauncherWorker( + workspaceRoot, + destinationLauncherConfig, + jobRunConfig, + workerConfigs, + orchestratorProcessFactory, + airbyteVersion); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java index 630bc823ce5a8..3f421bdeaae3e 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java @@ -4,7 +4,6 @@ package io.airbyte.workers.temporal.sync; -import com.google.common.annotations.VisibleForTesting; import io.airbyte.commons.functional.CheckedSupplier; import io.airbyte.commons.json.Jsons; import io.airbyte.config.AirbyteConfigValidator; @@ -24,15 +23,13 @@ import io.airbyte.workers.temporal.TemporalAttemptExecution; import java.nio.file.Path; import java.util.function.Supplier; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class NormalizationActivityImpl implements NormalizationActivity { - private static final Logger LOGGER = LoggerFactory.getLogger(NormalizationActivityImpl.class); - + private final boolean containerOrchestratorEnabled; private final WorkerConfigs workerConfigs; - private final ProcessFactory processFactory; + private final ProcessFactory jobProcessFactory; + private final ProcessFactory orchestratorProcessFactory; private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; private final AirbyteConfigValidator validator; @@ -43,38 +40,25 @@ public class NormalizationActivityImpl implements NormalizationActivity { private final String databaseUrl; private final String airbyteVersion; - public NormalizationActivityImpl(final WorkerConfigs workerConfigs, - final ProcessFactory processFactory, + public NormalizationActivityImpl(final boolean containerOrchestratorEnabled, + final WorkerConfigs workerConfigs, + final ProcessFactory jobProcessFactory, + final ProcessFactory orchestratorProcessFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, final WorkerEnvironment workerEnvironment, - final LogConfigs logConfig, + final LogConfigs logConfigs, final String databaseUser, final String databasePassword, final String databaseUrl, final String airbyteVersion) { - this(workerConfigs, processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator(), workerEnvironment, logConfig, databaseUser, - databasePassword, - databaseUrl, airbyteVersion); - } - - @VisibleForTesting - NormalizationActivityImpl(final WorkerConfigs workerConfigs, - final ProcessFactory processFactory, - final SecretsHydrator secretsHydrator, - final Path workspaceRoot, - final AirbyteConfigValidator validator, - final WorkerEnvironment workerEnvironment, - final LogConfigs logConfigs, - final String databaseUser, - final String databasePassword, - final String databaseUrl, - final String airbyteVersion) { + this.containerOrchestratorEnabled = containerOrchestratorEnabled; this.workerConfigs = workerConfigs; - this.processFactory = processFactory; + this.jobProcessFactory = jobProcessFactory; + this.orchestratorProcessFactory = orchestratorProcessFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; - this.validator = validator; + this.validator = new AirbyteConfigValidator(); this.workerEnvironment = workerEnvironment; this.logConfigs = logConfigs; this.databaseUser = databaseUser; @@ -96,28 +80,49 @@ public Void normalize(final JobRunConfig jobRunConfig, return fullInput; }; + CheckedSupplier, Exception> workerFactory; + + if (containerOrchestratorEnabled) { + workerFactory = getContainerLauncherWorkerFactory(workerConfigs, destinationLauncherConfig, jobRunConfig); + } else { + workerFactory = getLegacyWorkerFactory(workerConfigs, destinationLauncherConfig, jobRunConfig); + } + final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, - getWorkerFactory(workerConfigs, destinationLauncherConfig, jobRunConfig), + workerFactory, inputSupplier, new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } - private CheckedSupplier, Exception> getWorkerFactory( - final WorkerConfigs workerConfigs, - final IntegrationLauncherConfig destinationLauncherConfig, - final JobRunConfig jobRunConfig) { + private CheckedSupplier, Exception> getLegacyWorkerFactory( + final WorkerConfigs workerConfigs, + final IntegrationLauncherConfig destinationLauncherConfig, + final JobRunConfig jobRunConfig) { return () -> new DefaultNormalizationWorker( jobRunConfig.getJobId(), Math.toIntExact(jobRunConfig.getAttemptId()), NormalizationRunnerFactory.create( workerConfigs, destinationLauncherConfig.getDockerImage(), - processFactory), + jobProcessFactory), workerEnvironment); } + private CheckedSupplier, Exception> getContainerLauncherWorkerFactory( + final WorkerConfigs workerConfigs, + final IntegrationLauncherConfig destinationLauncherConfig, + final JobRunConfig jobRunConfig) { + return () -> new NormalizationLauncherWorker( + workspaceRoot, + destinationLauncherConfig, + jobRunConfig, + workerConfigs, + orchestratorProcessFactory, + airbyteVersion); + } + } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationLauncherWorker.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationLauncherWorker.java new file mode 100644 index 0000000000000..ade742b135ad6 --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationLauncherWorker.java @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.temporal.sync; + +import io.airbyte.commons.io.LineGobbler; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.logging.LoggingHelper; +import io.airbyte.commons.logging.MdcScope; +import io.airbyte.config.NormalizationInput; +import io.airbyte.scheduler.models.IntegrationLauncherConfig; +import io.airbyte.scheduler.models.JobRunConfig; +import io.airbyte.workers.Worker; +import io.airbyte.workers.WorkerApp; +import io.airbyte.workers.WorkerConfigs; +import io.airbyte.workers.WorkerException; +import io.airbyte.workers.WorkerUtils; +import io.airbyte.workers.process.KubeProcessFactory; +import io.airbyte.workers.process.ProcessFactory; +import java.nio.file.Path; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class NormalizationLauncherWorker implements Worker { + + private static final Logger LOGGER = LoggerFactory.getLogger(NormalizationLauncherWorker.class); + + private static final MdcScope.Builder LOG_MDC_BUILDER = new MdcScope.Builder() + .setLogPrefix("normalization-orchestrator") + .setPrefixColor(LoggingHelper.Color.CYAN_BACKGROUND); + + public static final String NORMALIZATION = "normalization"; + public static final String INIT_FILE_DESTINATION_LAUNCHER_CONFIG = "destinationLauncherConfig.json"; + + private final WorkerConfigs workerConfigs; + private final ProcessFactory processFactory; + private final String airbyteVersion; + private final AtomicBoolean cancelled = new AtomicBoolean(false); + private final Path workspaceRoot; + private final IntegrationLauncherConfig destinationLauncherConfig; + private final JobRunConfig jobRunConfig; + + private Process process; + + public NormalizationLauncherWorker( + final Path workspaceRoot, + final IntegrationLauncherConfig destinationLauncherConfig, + final JobRunConfig jobRunConfig, + final WorkerConfigs workerConfigs, + final ProcessFactory processFactory, + final String airbyteVersion) { + this.workspaceRoot = workspaceRoot; + this.destinationLauncherConfig = destinationLauncherConfig; + this.jobRunConfig = jobRunConfig; + this.workerConfigs = workerConfigs; + this.processFactory = processFactory; + this.airbyteVersion = airbyteVersion; + } + + @Override + public Void run(NormalizationInput normalizationInput, Path jobRoot) throws WorkerException { + try { + final Path jobPath = WorkerUtils.getJobRoot(workspaceRoot, jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); + + // we want to filter down to remove secrets, so we aren't writing over a bunch of unnecessary + // secrets + final Map envMap = System.getenv().entrySet().stream() + .filter(entry -> OrchestratorConstants.ENV_VARS_TO_TRANSFER.contains(entry.getKey())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + final Map fileMap = Map.of( + OrchestratorConstants.INIT_FILE_APPLICATION, NORMALIZATION, + OrchestratorConstants.INIT_FILE_JOB_RUN_CONFIG, Jsons.serialize(jobRunConfig), + OrchestratorConstants.INIT_FILE_INPUT, Jsons.serialize(normalizationInput), + OrchestratorConstants.INIT_FILE_ENV_MAP, Jsons.serialize(envMap), + INIT_FILE_DESTINATION_LAUNCHER_CONFIG, Jsons.serialize(destinationLauncherConfig)); + + process = processFactory.create( + "runner-" + UUID.randomUUID().toString().substring(0, 10), + 0, + jobPath, + "airbyte/container-orchestrator:" + airbyteVersion, + false, + fileMap, + null, + workerConfigs.getResourceRequirements(), + Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.SYNC_RUNNER), + Map.of( + WorkerApp.KUBE_HEARTBEAT_PORT, WorkerApp.KUBE_HEARTBEAT_PORT, + OrchestratorConstants.PORT1, OrchestratorConstants.PORT1, + OrchestratorConstants.PORT2, OrchestratorConstants.PORT2, + OrchestratorConstants.PORT3, OrchestratorConstants.PORT3, + OrchestratorConstants.PORT4, OrchestratorConstants.PORT4)); + + LineGobbler.gobble(process.getInputStream(), LOGGER::info, LOG_MDC_BUILDER); + LineGobbler.gobble(process.getErrorStream(), LOGGER::error, LOG_MDC_BUILDER); + + WorkerUtils.wait(process); + + if (process.exitValue() != 0) { + throw new WorkerException("Non-zero exit code!"); + } + } catch (Exception e) { + if (cancelled.get()) { + throw new WorkerException("Sync was cancelled.", e); + } else { + throw new WorkerException("Running the sync attempt failed", e); + } + } + + return null; + } + + @Override + public void cancel() { + cancelled.set(true); + + if (process == null) { + return; + } + + LOGGER.debug("Closing normalization launcher process"); + WorkerUtils.gentleClose(workerConfigs, process, 1, TimeUnit.MINUTES); + if (process.isAlive() || process.exitValue() != 0) { + LOGGER.error("Normalization launcher process wasn't successful"); + } + } + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java new file mode 100644 index 0000000000000..b80da9d36f3c0 --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.temporal.sync; + +import io.airbyte.config.EnvConfigs; +import java.util.Set; + +public class OrchestratorConstants { + + // set of env vars necessary for the container orchestrator app to run + public static final Set ENV_VARS_TO_TRANSFER = Set.of( + EnvConfigs.WORKER_ENVIRONMENT, + EnvConfigs.JOB_KUBE_TOLERATIONS, + EnvConfigs.JOB_KUBE_CURL_IMAGE, + EnvConfigs.JOB_KUBE_BUSYBOX_IMAGE, + EnvConfigs.JOB_KUBE_SOCAT_IMAGE, + EnvConfigs.JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY, + EnvConfigs.JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_SECRET, + EnvConfigs.JOB_KUBE_NODE_SELECTORS, + EnvConfigs.DOCKER_NETWORK, + EnvConfigs.LOCAL_DOCKER_MOUNT, + EnvConfigs.WORKSPACE_DOCKER_MOUNT, + EnvConfigs.WORKSPACE_ROOT, + EnvConfigs.DEFAULT_JOB_KUBE_NAMESPACE, + EnvConfigs.JOB_MAIN_CONTAINER_CPU_REQUEST, + EnvConfigs.JOB_MAIN_CONTAINER_CPU_LIMIT, + EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_REQUEST, + EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_LIMIT, + EnvConfigs.LOCAL_ROOT); + + public static final String INIT_FILE_ENV_MAP = "envMap.json"; + public static final String INIT_FILE_INPUT = "input.json"; + public static final String INIT_FILE_JOB_RUN_CONFIG = "jobRunConfig.json"; + public static final String INIT_FILE_APPLICATION = "application.txt"; + + // define two ports for stdout/stderr usage on the container orchestrator pod + public static final int PORT1 = 9877; + public static final int PORT2 = 9878; + public static final int PORT3 = 9879; + public static final int PORT4 = 9880; + public static final Set PORTS = Set.of(PORT1, PORT2, PORT3, PORT4); + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationLauncherWorker.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationLauncherWorker.java index 4efb5dfb825d9..6fad439553e8a 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationLauncherWorker.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationLauncherWorker.java @@ -8,7 +8,6 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.LoggingHelper; import io.airbyte.commons.logging.MdcScope; -import io.airbyte.config.EnvConfigs; import io.airbyte.config.ReplicationOutput; import io.airbyte.config.StandardSyncInput; import io.airbyte.scheduler.models.IntegrationLauncherConfig; @@ -23,7 +22,6 @@ import java.nio.file.Path; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -42,44 +40,12 @@ public class ReplicationLauncherWorker implements Worker PORTS = Set.of(PORT1, PORT2, PORT3, PORT4); - - // set of env vars necessary for the container orchestrator app to run - public static final Set ENV_VARS_TO_TRANSFER = Set.of( - EnvConfigs.WORKER_ENVIRONMENT, - EnvConfigs.JOB_KUBE_TOLERATIONS, - EnvConfigs.JOB_KUBE_CURL_IMAGE, - EnvConfigs.JOB_KUBE_BUSYBOX_IMAGE, - EnvConfigs.JOB_KUBE_SOCAT_IMAGE, - EnvConfigs.JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY, - EnvConfigs.JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_SECRET, - EnvConfigs.JOB_KUBE_NODE_SELECTORS, - EnvConfigs.DOCKER_NETWORK, - EnvConfigs.LOCAL_DOCKER_MOUNT, - EnvConfigs.WORKSPACE_DOCKER_MOUNT, - EnvConfigs.WORKSPACE_ROOT, - EnvConfigs.DEFAULT_JOB_KUBE_NAMESPACE, - EnvConfigs.JOB_MAIN_CONTAINER_CPU_REQUEST, - EnvConfigs.JOB_MAIN_CONTAINER_CPU_LIMIT, - EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_REQUEST, - EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_LIMIT, - EnvConfigs.LOCAL_ROOT); private final AtomicBoolean cancelled = new AtomicBoolean(false); private final IntegrationLauncherConfig sourceLauncherConfig; @@ -121,16 +87,16 @@ public ReplicationOutput run(StandardSyncInput standardSyncInput, Path jobRoot) // we want to filter down to remove secrets, so we aren't writing over a bunch of unnecessary // secrets final Map envMap = System.getenv().entrySet().stream() - .filter(entry -> ENV_VARS_TO_TRANSFER.contains(entry.getKey())) + .filter(entry -> OrchestratorConstants.ENV_VARS_TO_TRANSFER.contains(entry.getKey())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); final Map fileMap = Map.of( - INIT_FILE_APPLICATION, REPLICATION, - INIT_FILE_JOB_RUN_CONFIG, Jsons.serialize(jobRunConfig), + OrchestratorConstants.INIT_FILE_APPLICATION, REPLICATION, + OrchestratorConstants.INIT_FILE_JOB_RUN_CONFIG, Jsons.serialize(jobRunConfig), + OrchestratorConstants.INIT_FILE_INPUT, Jsons.serialize(syncInput), + OrchestratorConstants.INIT_FILE_ENV_MAP, Jsons.serialize(envMap), INIT_FILE_SOURCE_LAUNCHER_CONFIG, Jsons.serialize(sourceLauncherConfig), - INIT_FILE_DESTINATION_LAUNCHER_CONFIG, Jsons.serialize(destinationLauncherConfig), - INIT_FILE_SYNC_INPUT, Jsons.serialize(syncInput), - INIT_FILE_ENV_MAP, Jsons.serialize(envMap)); + INIT_FILE_DESTINATION_LAUNCHER_CONFIG, Jsons.serialize(destinationLauncherConfig)); process = processFactory.create( "runner-" + UUID.randomUUID().toString().substring(0, 10), @@ -144,10 +110,10 @@ public ReplicationOutput run(StandardSyncInput standardSyncInput, Path jobRoot) Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.SYNC_RUNNER), Map.of( WorkerApp.KUBE_HEARTBEAT_PORT, WorkerApp.KUBE_HEARTBEAT_PORT, - PORT1, PORT1, - PORT2, PORT2, - PORT3, PORT3, - PORT4, PORT4)); + OrchestratorConstants.PORT1, OrchestratorConstants.PORT1, + OrchestratorConstants.PORT2, OrchestratorConstants.PORT2, + OrchestratorConstants.PORT3, OrchestratorConstants.PORT3, + OrchestratorConstants.PORT4, OrchestratorConstants.PORT4)); final AtomicReference output = new AtomicReference<>(); @@ -194,10 +160,10 @@ public void cancel() { return; } - LOGGER.debug("Closing sync runner process"); + LOGGER.debug("Closing replication launcher process"); WorkerUtils.gentleClose(workerConfigs, process, 1, TimeUnit.MINUTES); if (process.isAlive() || process.exitValue() != 0) { - LOGGER.error("Sync runner process wasn't successful"); + LOGGER.error("Replication launcher process wasn't successful"); } } From f9eac08f02e9a6ce7b9756bf84e99fbc42b7ba16 Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Mon, 3 Jan 2022 09:24:27 +0100 Subject: [PATCH 011/215] Simplify Connection handler prototyp (#9098) simplify the prototype of the connectionHandler methods that rely only on UUID --- .../java/io/airbyte/server/apis/ConfigurationApi.java | 4 ++-- .../airbyte/server/handlers/ConnectionsHandler.java | 11 +++++------ .../airbyte/server/handlers/DestinationHandler.java | 4 +--- .../io/airbyte/server/handlers/SourceHandler.java | 4 +--- .../server/handlers/WebBackendConnectionsHandler.java | 11 ++++++----- .../io/airbyte/server/handlers/WorkspacesHandler.java | 4 +--- .../server/handlers/ConnectionsHandlerTest.java | 11 ++++------- .../io/airbyte/server/handlers/SourceHandlerTest.java | 2 +- .../handlers/WebBackendConnectionsHandlerTest.java | 10 +++++----- .../server/handlers/WorkspacesHandlerTest.java | 5 +---- 10 files changed, 27 insertions(+), 39 deletions(-) diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index ff3ff00388bf1..9ecc6afe61386 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -535,14 +535,14 @@ public ConnectionReadList searchConnections(final ConnectionSearch connectionSea @Override public ConnectionRead getConnection(final ConnectionIdRequestBody connectionIdRequestBody) { - return execute(() -> connectionsHandler.getConnection(connectionIdRequestBody)); + return execute(() -> connectionsHandler.getConnection(connectionIdRequestBody.getConnectionId())); } @Override public void deleteConnection(final ConnectionIdRequestBody connectionIdRequestBody) { execute(() -> { operationsHandler.deleteOperationsForConnection(connectionIdRequestBody); - connectionsHandler.deleteConnection(connectionIdRequestBody); + connectionsHandler.deleteConnection(connectionIdRequestBody.getConnectionId()); return null; }); } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java index fb0ed1b759ed9..0f5ffa9fb66ba 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java @@ -10,7 +10,6 @@ import com.google.common.collect.Lists; import io.airbyte.analytics.TrackingClient; import io.airbyte.api.model.ConnectionCreate; -import io.airbyte.api.model.ConnectionIdRequestBody; import io.airbyte.api.model.ConnectionRead; import io.airbyte.api.model.ConnectionReadList; import io.airbyte.api.model.ConnectionSchedule; @@ -260,9 +259,9 @@ public ConnectionReadList listConnections() throws JsonValidationException, Conf return new ConnectionReadList().connections(connectionReads); } - public ConnectionRead getConnection(final ConnectionIdRequestBody connectionIdRequestBody) + public ConnectionRead getConnection(final UUID connectionId) throws JsonValidationException, IOException, ConfigNotFoundException { - return connectionHelper.buildConnectionRead(connectionIdRequestBody.getConnectionId()); + return connectionHelper.buildConnectionRead(connectionId); } public ConnectionReadList searchConnections(final ConnectionSearch connectionSearch) @@ -315,12 +314,12 @@ public boolean matchSearch(final DestinationSearch destinationSearch, final Dest return (destinationReadFromSearch == null || destinationReadFromSearch.equals(destinationRead)); } - public void deleteConnection(final ConnectionIdRequestBody connectionIdRequestBody) + public void deleteConnection(final UUID connectionId) throws ConfigNotFoundException, IOException, JsonValidationException { if (featureFlags.usesNewScheduler()) { - temporalWorkerRunFactory.deleteConnection(connectionIdRequestBody.getConnectionId()); + temporalWorkerRunFactory.deleteConnection(connectionId); } else { - final ConnectionRead connectionRead = getConnection(connectionIdRequestBody); + final ConnectionRead connectionRead = getConnection(connectionId); deleteConnection(connectionRead); } } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/DestinationHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/DestinationHandler.java index c3bfc4ce3b1ff..015f8e763d22c 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/DestinationHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/DestinationHandler.java @@ -7,7 +7,6 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; -import io.airbyte.api.model.ConnectionIdRequestBody; import io.airbyte.api.model.ConnectionRead; import io.airbyte.api.model.DestinationCreate; import io.airbyte.api.model.DestinationDefinitionIdRequestBody; @@ -110,8 +109,7 @@ public void deleteDestination(final DestinationRead destination) continue; } - final ConnectionIdRequestBody connectionIdRequestBody = new ConnectionIdRequestBody().connectionId(connectionRead.getConnectionId()); - connectionsHandler.deleteConnection(connectionIdRequestBody); + connectionsHandler.deleteConnection(connectionRead.getConnectionId()); } final var fullConfig = configRepository.getDestinationConnectionWithSecrets(destination.getDestinationId()).getConfiguration(); diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/SourceHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/SourceHandler.java index ab7b354b11800..0b1078e7631ad 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/SourceHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/SourceHandler.java @@ -6,7 +6,6 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.Lists; -import io.airbyte.api.model.ConnectionIdRequestBody; import io.airbyte.api.model.ConnectionRead; import io.airbyte.api.model.SourceCreate; import io.airbyte.api.model.SourceDefinitionIdRequestBody; @@ -184,8 +183,7 @@ public void deleteSource(final SourceRead source) continue; } - final ConnectionIdRequestBody connectionIdRequestBody = new ConnectionIdRequestBody().connectionId(connectionRead.getConnectionId()); - connectionsHandler.deleteConnection(connectionIdRequestBody); + connectionsHandler.deleteConnection(connectionRead.getConnectionId()); } final ConnectorSpecification spec = getSpecFromSourceId(source.getSourceId()); diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java index af29f4c5b455c..6def814e37e18 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java @@ -184,7 +184,7 @@ public WebBackendConnectionRead webBackendGetConnection(final WebBackendConnecti final ConnectionIdRequestBody connectionIdRequestBody = new ConnectionIdRequestBody() .connectionId(webBackendConnectionRequestBody.getConnectionId()); - final ConnectionRead connection = connectionsHandler.getConnection(connectionIdRequestBody); + final ConnectionRead connection = connectionsHandler.getConnection(connectionIdRequestBody.getConnectionId()); if (MoreBooleans.isTruthy(webBackendConnectionRequestBody.getWithRefreshedCatalog())) { final SourceIdRequestBody sourceId = new SourceIdRequestBody().sourceId(connection.getSourceId()); @@ -218,10 +218,11 @@ protected static AirbyteCatalog updateSchemaWithDiscovery(final AirbyteCatalog o final AirbyteStreamConfiguration discoveredStreamConfig = s.getConfig(); outputStreamConfig = new AirbyteStreamConfiguration(); - if (stream.getSupportedSyncModes().contains(originalStreamConfig.getSyncMode())) + if (stream.getSupportedSyncModes().contains(originalStreamConfig.getSyncMode())) { outputStreamConfig.setSyncMode(originalStreamConfig.getSyncMode()); - else + } else { outputStreamConfig.setSyncMode(discoveredStreamConfig.getSyncMode()); + } if (originalStreamConfig.getCursorField().size() > 0) { outputStreamConfig.setCursorField(originalStreamConfig.getCursorField()); @@ -286,7 +287,7 @@ private List createOperations(final WebBackendConnectionCreate webBackendC private List updateOperations(final WebBackendConnectionUpdate webBackendConnectionUpdate) throws JsonValidationException, ConfigNotFoundException, IOException { final ConnectionRead connectionRead = connectionsHandler - .getConnection(new ConnectionIdRequestBody().connectionId(webBackendConnectionUpdate.getConnectionId())); + .getConnection(webBackendConnectionUpdate.getConnectionId()); final List originalOperationIds = new ArrayList<>(connectionRead.getOperationIds()); final List operationIds = new ArrayList<>(); @@ -305,7 +306,7 @@ private List updateOperations(final WebBackendConnectionUpdate webBackendC } private UUID getWorkspaceIdForConnection(final UUID connectionId) throws JsonValidationException, ConfigNotFoundException, IOException { - final UUID sourceId = connectionsHandler.getConnection(new ConnectionIdRequestBody().connectionId(connectionId)).getSourceId(); + final UUID sourceId = connectionsHandler.getConnection(connectionId).getSourceId(); return getWorkspaceIdForSource(sourceId); } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/WorkspacesHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/WorkspacesHandler.java index 36c097f6ed7eb..4d04293570d63 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/WorkspacesHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/WorkspacesHandler.java @@ -7,7 +7,6 @@ import com.github.slugify.Slugify; import com.google.common.base.Strings; import io.airbyte.analytics.TrackingClientSingleton; -import io.airbyte.api.model.ConnectionIdRequestBody; import io.airbyte.api.model.ConnectionRead; import io.airbyte.api.model.DestinationRead; import io.airbyte.api.model.Notification; @@ -104,8 +103,7 @@ public void deleteWorkspace(final WorkspaceIdRequestBody workspaceIdRequestBody) // disable all connections associated with this workspace for (final ConnectionRead connectionRead : connectionsHandler.listConnectionsForWorkspace(workspaceIdRequestBody).getConnections()) { - final ConnectionIdRequestBody connectionIdRequestBody = new ConnectionIdRequestBody().connectionId(connectionRead.getConnectionId()); - connectionsHandler.deleteConnection(connectionIdRequestBody); + connectionsHandler.deleteConnection(connectionRead.getConnectionId()); } // disable all destinations associated with this workspace diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/ConnectionsHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/ConnectionsHandlerTest.java index 76c50acbc2f24..65d0b6d8957a4 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/ConnectionsHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/ConnectionsHandlerTest.java @@ -17,7 +17,6 @@ import io.airbyte.analytics.TrackingClient; import io.airbyte.api.model.AirbyteCatalog; import io.airbyte.api.model.ConnectionCreate; -import io.airbyte.api.model.ConnectionIdRequestBody; import io.airbyte.api.model.ConnectionRead; import io.airbyte.api.model.ConnectionReadList; import io.airbyte.api.model.ConnectionSchedule; @@ -367,8 +366,7 @@ void testGetConnection() throws JsonValidationException, ConfigNotFoundException when(configRepository.getStandardSync(standardSync.getConnectionId())) .thenReturn(standardSync); - final ConnectionIdRequestBody connectionIdRequestBody = new ConnectionIdRequestBody().connectionId(standardSync.getConnectionId()); - final ConnectionRead actualConnectionRead = connectionsHandler.getConnection(connectionIdRequestBody); + final ConnectionRead actualConnectionRead = connectionsHandler.getConnection(standardSync.getConnectionId()); assertEquals(ConnectionHelpers.generateExpectedConnectionRead(standardSync), actualConnectionRead); } @@ -546,7 +544,6 @@ void testSearchConnections() throws JsonValidationException, ConfigNotFoundExcep @Test void testDeleteConnection() throws JsonValidationException, IOException, ConfigNotFoundException { - final ConnectionIdRequestBody connectionIdRequestBody = new ConnectionIdRequestBody().connectionId(standardSync.getConnectionId()); final ConnectionRead connectionRead = ConnectionHelpers.generateExpectedConnectionRead( standardSync.getConnectionId(), @@ -566,12 +563,12 @@ void testDeleteConnection() throws JsonValidationException, IOException, ConfigN .resourceRequirements(connectionRead.getResourceRequirements()); final ConnectionsHandler spiedConnectionsHandler = spy(connectionsHandler); - doReturn(connectionRead).when(spiedConnectionsHandler).getConnection(connectionIdRequestBody); + doReturn(connectionRead).when(spiedConnectionsHandler).getConnection(connectionId); doReturn(null).when(spiedConnectionsHandler).updateConnection(expectedConnectionUpdate); - spiedConnectionsHandler.deleteConnection(connectionIdRequestBody); + spiedConnectionsHandler.deleteConnection(connectionId); - verify(spiedConnectionsHandler).getConnection(connectionIdRequestBody); + verify(spiedConnectionsHandler).getConnection(connectionId); verify(spiedConnectionsHandler).updateConnection(expectedConnectionUpdate); } diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/SourceHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/SourceHandlerTest.java index 54e690a2ec545..7d16d7d649a8b 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/SourceHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/SourceHandlerTest.java @@ -272,7 +272,7 @@ void testDeleteSource() throws JsonValidationException, ConfigNotFoundException, verify(connectionsHandler).listConnectionsForWorkspace(workspaceIdRequestBody); final ConnectionIdRequestBody connectionIdRequestBody = new ConnectionIdRequestBody() .connectionId(connectionRead.getConnectionId()); - verify(connectionsHandler).deleteConnection(connectionIdRequestBody); + verify(connectionsHandler).deleteConnection(connectionRead.getConnectionId()); } } diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java index 5722fd3664b98..3dcb73d2154e9 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java @@ -274,7 +274,7 @@ public void testWebBackendGetConnection() throws ConfigNotFoundException, IOExce final WebBackendConnectionRequestBody webBackendConnectionRequestBody = new WebBackendConnectionRequestBody(); webBackendConnectionRequestBody.setConnectionId(connectionRead.getConnectionId()); - when(connectionsHandler.getConnection(connectionIdRequestBody)).thenReturn(connectionRead); + when(connectionsHandler.getConnection(connectionRead.getConnectionId())).thenReturn(connectionRead); when(operationsHandler.listOperationsForConnection(connectionIdRequestBody)).thenReturn(operationReadList); final WebBackendConnectionRead WebBackendConnectionRead = wbHandler.webBackendGetConnection(webBackendConnectionRequestBody); @@ -291,7 +291,7 @@ public void testWebBackendGetConnectionWithDiscovery() throws ConfigNotFoundExce webBackendConnectionIdRequestBody.setConnectionId(connectionRead.getConnectionId()); webBackendConnectionIdRequestBody.setWithRefreshedCatalog(true); - when(connectionsHandler.getConnection(connectionIdRequestBody)).thenReturn(connectionRead); + when(connectionsHandler.getConnection(connectionRead.getConnectionId())).thenReturn(connectionRead); when(operationsHandler.listOperationsForConnection(connectionIdRequestBody)).thenReturn(operationReadList); final WebBackendConnectionRead WebBackendConnectionRead = wbHandler.webBackendGetConnection(webBackendConnectionIdRequestBody); @@ -431,7 +431,7 @@ void testUpdateConnection() throws JsonValidationException, ConfigNotFoundExcept .status(expected.getStatus()) .syncCatalog(expected.getSyncCatalog()); - when(connectionsHandler.getConnection(new ConnectionIdRequestBody().connectionId(expected.getConnectionId()))).thenReturn( + when(connectionsHandler.getConnection(expected.getConnectionId())).thenReturn( new ConnectionRead().connectionId(expected.getConnectionId())); when(connectionsHandler.updateConnection(any())).thenReturn( new ConnectionRead() @@ -472,7 +472,7 @@ void testUpdateConnectionWithOperations() throws JsonValidationException, Config .syncCatalog(expected.getSyncCatalog()) .operations(List.of(operationCreateOrUpdate)); - when(connectionsHandler.getConnection(new ConnectionIdRequestBody().connectionId(expected.getConnectionId()))).thenReturn( + when(connectionsHandler.getConnection(expected.getConnectionId())).thenReturn( new ConnectionRead() .connectionId(expected.getConnectionId()) .operationIds(connectionRead.getOperationIds())); @@ -510,7 +510,7 @@ void testUpdateConnectionWithUpdatedSchema() throws JsonValidationException, Con .withRefreshedCatalog(true); when(operationsHandler.listOperationsForConnection(any())).thenReturn(operationReadList); - when(connectionsHandler.getConnection(new ConnectionIdRequestBody().connectionId(expected.getConnectionId()))).thenReturn( + when(connectionsHandler.getConnection(expected.getConnectionId())).thenReturn( new ConnectionRead().connectionId(expected.getConnectionId())); when(connectionsHandler.updateConnection(any())).thenReturn( new ConnectionRead() diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/WorkspacesHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/WorkspacesHandlerTest.java index a2a8648b31fc3..aca8dd6870242 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/WorkspacesHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/WorkspacesHandlerTest.java @@ -15,7 +15,6 @@ import static org.mockito.Mockito.when; import com.google.common.collect.Lists; -import io.airbyte.api.model.ConnectionIdRequestBody; import io.airbyte.api.model.ConnectionRead; import io.airbyte.api.model.ConnectionReadList; import io.airbyte.api.model.DestinationRead; @@ -204,9 +203,7 @@ void testDeleteWorkspace() throws JsonValidationException, ConfigNotFoundExcepti workspacesHandler.deleteWorkspace(workspaceIdRequestBody); - final ConnectionIdRequestBody connectionIdRequestBody = new ConnectionIdRequestBody() - .connectionId(connection.getConnectionId()); - verify(connectionsHandler).deleteConnection(connectionIdRequestBody); + verify(connectionsHandler).deleteConnection(connection.getConnectionId()); verify(destinationHandler).deleteDestination(destination); verify(sourceHandler).deleteSource(source); } From 82d6b16d7f277d089a04bd37cb60d164f883fa0f Mon Sep 17 00:00:00 2001 From: Deividas J Date: Mon, 3 Jan 2022 15:57:49 +0200 Subject: [PATCH 012/215] Source Amazon Seller Partner: GET_SELLER_FEEDBACK_DATA normalize header field names (#9212) --- .../e55879a8-0ef8-4557-abcf-ab34c53ec460.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../source-amazon-seller-partner/Dockerfile | 2 +- .../schemas/GET_SELLER_FEEDBACK_DATA.json | 12 +++++----- .../source_amazon_seller_partner/streams.py | 23 +++++++++++++++---- .../unit_tests/test_transform_function.py | 6 +++-- .../sources/amazon-seller-partner.md | 1 + 8 files changed, 34 insertions(+), 16 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json index 5fc5d3d0f25fe..379a6a3c8d55b 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "e55879a8-0ef8-4557-abcf-ab34c53ec460", "name": "Amazon Seller Partner", "dockerRepository": "airbyte/source-amazon-seller-partner", - "dockerImageTag": "0.2.8", + "dockerImageTag": "0.2.9", "documentationUrl": "https://docs.airbyte.io/integrations/sources/amazon-seller-partner", "icon": "amazonsellerpartner.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index caf7def7d14e1..5e160473d5b31 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -22,7 +22,7 @@ - name: Amazon Seller Partner sourceDefinitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460 dockerRepository: airbyte/source-amazon-seller-partner - dockerImageTag: 0.2.8 + dockerImageTag: 0.2.9 sourceType: api documentationUrl: https://docs.airbyte.io/integrations/sources/amazon-seller-partner icon: amazonsellerpartner.svg diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 9ae9afede6f3c..3b0d4c2c573be 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -157,7 +157,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-amazon-seller-partner:0.2.8" +- dockerImage: "airbyte/source-amazon-seller-partner:0.2.9" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" changelogUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile index b7e937712012b..7c46452143632 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.8 +LABEL io.airbyte.version=0.2.9 LABEL io.airbyte.name=airbyte/source-amazon-seller-partner diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_SELLER_FEEDBACK_DATA.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_SELLER_FEEDBACK_DATA.json index 44834097df434..f03f3fea23c63 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_SELLER_FEEDBACK_DATA.json +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_SELLER_FEEDBACK_DATA.json @@ -4,23 +4,23 @@ "type": "object", "$schema": "http://json-schema.org/draft-07/schema#", "properties": { - "Date": { + "date": { "type": ["null", "string"], "format": "date" }, - "Rating": { + "rating": { "type": ["null", "number"] }, - "Comments": { + "comments": { "type": ["null", "string"] }, - "Response": { + "response": { "type": ["null", "string"] }, - "Order ID": { + "order_id": { "type": ["null", "string"] }, - "Rater Email": { + "rater_email": { "type": ["null", "string"] } } diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py index e367c84ddfa86..569733f958979 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py @@ -522,7 +522,7 @@ class SellerFeedbackReports(IncrementalReportsAmazonSPStream): MARKETPLACE_DATE_FORMAT_MAP = dict( # eu A2VIGQ35RCS4UG="D/M/YY", # AE - A1PA6795UKMFR9="D/M/YY", # DE + A1PA6795UKMFR9="D.M.YY", # DE A1C3SOZRARQ6R3="D/M/YY", # PL ARBP9OOSHTCHU="D/M/YY", # EG A1RKKUPIHCS9HS="D/M/YY", # ES @@ -531,7 +531,7 @@ class SellerFeedbackReports(IncrementalReportsAmazonSPStream): APJ6JRA9NG5V4="D/M/YY", # IT A1805IZSGTT6HS="D/M/YY", # NL A17E79C6D8DWNP="D/M/YY", # SA - A2NODRKZP88ZB9="D/M/YY", # SE + A2NODRKZP88ZB9="YYYY-MM-DD", # SE A33AVAJ2PDY3EV="D/M/YY", # TR A1F83G8C2ARO7P="D/M/YY", # UK # fe @@ -541,12 +541,14 @@ class SellerFeedbackReports(IncrementalReportsAmazonSPStream): # na ATVPDKIKX0DER="M/D/YY", # US A2Q3Y263D00KWC="D/M/YY", # BR - A2EUQ1WTGCTBG2="M/D/YY", # CA + A2EUQ1WTGCTBG2="D/M/YY", # CA A1AM78C64UM0Y8="D/M/YY", # MX ) + NORMALIZED_FIELD_NAMES = ["date", "rating", "comments", "response", "order_id", "rater_email"] + name = "GET_SELLER_FEEDBACK_DATA" - cursor_field = "Date" + cursor_field = "date" transformer: TypeTransformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization | TransformConfig.CustomSchemaNormalization) def __init__(self, *args, **kwargs): @@ -566,6 +568,19 @@ def transform_function(original_value: Any, field_schema: Dict[str, Any]) -> Any return transform_function + # csv header field names for this report differ per marketplace (are localized to marketplace language) + # but columns come in the same order + # so we set fieldnames to our custom ones + # and raise error if original and custom header field count does not match + @staticmethod + def parse_document(document): + reader = csv.DictReader(StringIO(document), delimiter="\t", fieldnames=SellerFeedbackReports.NORMALIZED_FIELD_NAMES) + original_fieldnames = next(reader) + if len(original_fieldnames) != len(SellerFeedbackReports.NORMALIZED_FIELD_NAMES): + raise ValueError("Original and normalized header field count does not match") + + return reader + class Orders(IncrementalAmazonSPStream): """ diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py index 1c6f7ae759aa5..9acc30f8f51c3 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py @@ -30,6 +30,8 @@ def reports_stream(marketplace_id): INPUT_DATES = { + "YYYY-MM-DD": ["2017-01-13", "2017-12-12", "2017-12-17", "2011-12-13"], + "D.M.YY": ["13.1.17", "12.12.17", "17.12.17", "13.12.11"], "YY/M/D": ["17/1/13", "17/12/12", "17/12/17", "11/12/13"], "D/M/YY": ["13/1/17", "12/12/17", "17/12/17", "13/12/11"], "M/D/YY": ["1/13/17", "12/12/17", "12/17/17", "12/13/11"], @@ -45,8 +47,8 @@ def parametrize_seller_feedback(): result.append( ( marketplace_id, - {"Date": input_date, "Rating": 1, "Comments": "c", "Response": "r", "Order ID": "1", "Rater Email": "e"}, - {"Date": expected_date, "Rating": 1, "Comments": "c", "Response": "r", "Order ID": "1", "Rater Email": "e"}, + {"date": input_date, "rating": 1, "comments": "c", "response": "r", "order_id": "1", "rater_email": "e"}, + {"date": expected_date, "rating": 1, "comments": "c", "response": "r", "order_id": "1", "rater_email": "e"}, ) ) diff --git a/docs/integrations/sources/amazon-seller-partner.md b/docs/integrations/sources/amazon-seller-partner.md index f75d8c965ddf6..90aeb0b3def04 100644 --- a/docs/integrations/sources/amazon-seller-partner.md +++ b/docs/integrations/sources/amazon-seller-partner.md @@ -64,6 +64,7 @@ Information about rate limits you may find [here](https://github.com/amzn/sellin | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| `0.2.9` | 2021-12-30 | [\#9212](https://github.com/airbytehq/airbyte/pull/9212) | Normalize GET_SELLER_FEEDBACK_DATA header field names | | `0.2.8` | 2021-12-22 | [\#8810](https://github.com/airbytehq/airbyte/pull/8810) | Fix GET_SELLER_FEEDBACK_DATA Date cursor field format | | `0.2.7` | 2021-12-21 | [\#9002](https://github.com/airbytehq/airbyte/pull/9002) | Extract REPORTS_MAX_WAIT_SECONDS to configurable parameter | | `0.2.6` | 2021-12-10 | [\#8179](https://github.com/airbytehq/airbyte/pull/8179) | Add GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT report | From df03e60226dbf731e8d2a3383491a4b16d5964cb Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Mon, 3 Jan 2022 16:30:45 +0100 Subject: [PATCH 013/215] Bmoric/migrate existing schedule (#9076) This is migrating existing connection to the new scheduler. It refrain from migrating a connection that have already be migrated. closes #8546 --- .../java/io/airbyte/server/ServerApp.java | 55 +++++++++++++++++++ .../workers/temporal/TemporalClient.java | 13 ++++- .../worker_run/TemporalWorkerRunFactory.java | 5 ++ .../workers/temporal/TemporalClientTest.java | 33 ++++++++++- 4 files changed, 104 insertions(+), 2 deletions(-) diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java index 3fe5f63d856c3..8daabdc64fd26 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java @@ -15,6 +15,7 @@ import io.airbyte.config.EnvConfigs; import io.airbyte.config.helpers.LogClientSingleton; import io.airbyte.config.init.YamlSeedConfigPersistence; +import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.DatabaseConfigPersistence; @@ -30,9 +31,13 @@ import io.airbyte.scheduler.client.DefaultSchedulerJobClient; import io.airbyte.scheduler.client.DefaultSynchronousSchedulerClient; import io.airbyte.scheduler.client.SchedulerJobClient; +import io.airbyte.scheduler.models.Job; +import io.airbyte.scheduler.models.JobStatus; import io.airbyte.scheduler.persistence.DefaultJobCreator; import io.airbyte.scheduler.persistence.DefaultJobPersistence; +import io.airbyte.scheduler.persistence.JobNotifier; import io.airbyte.scheduler.persistence.JobPersistence; +import io.airbyte.scheduler.persistence.WorkspaceHelper; import io.airbyte.scheduler.persistence.job_factory.OAuthConfigSupplier; import io.airbyte.scheduler.persistence.job_tracker.JobTracker; import io.airbyte.server.errors.InvalidInputExceptionMapper; @@ -41,15 +46,19 @@ import io.airbyte.server.errors.KnownExceptionMapper; import io.airbyte.server.errors.NotFoundExceptionMapper; import io.airbyte.server.errors.UncaughtExceptionMapper; +import io.airbyte.validation.json.JsonValidationException; import io.airbyte.workers.WorkerConfigs; import io.airbyte.workers.temporal.TemporalClient; import io.airbyte.workers.temporal.TemporalUtils; import io.airbyte.workers.worker_run.TemporalWorkerRunFactory; import io.temporal.serviceclient.WorkflowServiceStubs; +import java.io.IOException; import java.net.http.HttpClient; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; import lombok.val; import org.eclipse.jetty.server.Server; import org.eclipse.jetty.servlet.ServletContextHandler; @@ -189,6 +198,16 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final Con configs.getAirbyteVersionOrWarning(), featureFlags); + if (featureFlags.usesNewScheduler()) { + final JobNotifier jobNotifier = new JobNotifier( + configs.getWebappUrl(), + configRepository, + new WorkspaceHelper(configRepository, jobPersistence), + TrackingClientSingleton.get()); + cleanupZombies(jobPersistence, jobNotifier); + migrateExistingConnection(configRepository, temporalWorkerRunFactory); + } + LOGGER.info("Starting server..."); return apiFactory.create( @@ -212,6 +231,42 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final Con temporalWorkerRunFactory); } + private static void migrateExistingConnection(final ConfigRepository configRepository, final TemporalWorkerRunFactory temporalWorkerRunFactory) + throws JsonValidationException, ConfigNotFoundException, IOException { + LOGGER.info("Start migration to the new scheduler..."); + final Set connectionIds = + configRepository.listStandardSyncs().stream().map(standardSync -> standardSync.getConnectionId()).collect(Collectors.toSet()); + temporalWorkerRunFactory.migrateSyncIfNeeded(connectionIds); + LOGGER.info("Done migrating to the new scheduler..."); + } + + /** + * Copy paste from {@link io.airbyte.scheduler.app.SchedulerApp} which will be removed in a near + * future + * + * @param jobPersistence + * @param jobNotifier + * @throws IOException + */ + private static void cleanupZombies(final JobPersistence jobPersistence, final JobNotifier jobNotifier) throws IOException { + for (final Job zombieJob : jobPersistence.listJobsWithStatus(JobStatus.RUNNING)) { + jobNotifier.failJob("zombie job was failed", zombieJob); + + final int currentAttemptNumber = zombieJob.getAttemptsCount() - 1; + + LOGGER.warn( + "zombie clean up - job attempt was failed. job id: {}, attempt number: {}, type: {}, scope: {}", + zombieJob.getId(), + currentAttemptNumber, + zombieJob.getConfigType(), + zombieJob.getScope()); + + jobPersistence.failAttempt( + zombieJob.getId(), + currentAttemptNumber); + } + } + public static void main(final String[] args) throws Exception { try { getServer(new ServerFactory.Api(), YamlSeedConfigPersistence.getDefault()).start(); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java index 04deda7aa2f4e..542053fc8c4af 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java @@ -41,6 +41,7 @@ import java.nio.file.Path; import java.util.List; import java.util.Optional; +import java.util.Set; import java.util.UUID; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -150,6 +151,15 @@ public TemporalResponse submitSync(final long jobId, final i connectionId)); } + public void migrateSyncIfNeeded(final Set connectionIds) { + connectionIds.forEach((connectionId) -> { + if (!isWorkflowRunning(getConnectionManagerName(connectionId))) { + log.info("Migrating: " + connectionId); + submitConnectionUpdaterAsync(connectionId); + } + }); + } + public void submitConnectionUpdaterAsync(final UUID connectionId) { log.info("Starting the scheduler temporal wf"); final ConnectionManagerWorkflow connectionManagerWorkflow = getWorkflowOptionsWithWorkflowId(ConnectionManagerWorkflow.class, @@ -343,7 +353,8 @@ public boolean isWorkflowRunning(final String workflowName) { return false; } - private String getConnectionManagerName(final UUID connectionId) { + @VisibleForTesting + static String getConnectionManagerName(final UUID connectionId) { return "connection_manager_" + connectionId; } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/worker_run/TemporalWorkerRunFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/worker_run/TemporalWorkerRunFactory.java index 4e6f9e2076947..af68888144044 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/worker_run/TemporalWorkerRunFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/worker_run/TemporalWorkerRunFactory.java @@ -26,6 +26,7 @@ import io.airbyte.workers.temporal.TemporalResponse; import java.io.IOException; import java.nio.file.Path; +import java.util.Set; import java.util.UUID; import lombok.AllArgsConstructor; import org.slf4j.Logger; @@ -62,6 +63,10 @@ public void deleteConnection(final UUID connectionId) { temporalClient.deleteConnection(connectionId); } + public void migrateSyncIfNeeded(final Set connectionIds) { + temporalClient.migrateSyncIfNeeded(connectionIds); + } + public CheckedSupplier, Exception> createSupplier(final Job job, final int attemptId) { final TemporalJobType temporalJobType = toTemporalJobType(job.getConfigType()); final UUID connectionId = UUID.fromString(job.getScope()); diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalClientTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalClientTest.java index 46558e84ce167..0a412dcf49775 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalClientTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalClientTest.java @@ -8,10 +8,15 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import com.google.common.collect.Sets; import io.airbyte.commons.json.Jsons; import io.airbyte.config.Configs; import io.airbyte.config.JobCheckConnectionConfig; @@ -74,7 +79,7 @@ void setup() throws IOException { logPath = workspaceRoot.resolve(String.valueOf(JOB_ID)).resolve(String.valueOf(ATTEMPT_ID)).resolve(LogClientSingleton.LOG_FILENAME); workflowClient = mock(WorkflowClient.class); workflowServiceStubs = mock(WorkflowServiceStubs.class); - temporalClient = new TemporalClient(workflowClient, workspaceRoot, workflowServiceStubs, configs); + temporalClient = spy(new TemporalClient(workflowClient, workspaceRoot, workflowServiceStubs, configs)); } @Nested @@ -193,4 +198,30 @@ void testSubmitSync() { } + @Nested + @DisplayName("Test related to the migration to the new scheduler") + class TestMigration { + + @DisplayName("Test that the migration is properly done if needed") + @Test + public void migrateCalled() { + final UUID nonMigratedId = UUID.randomUUID(); + final UUID migratedId = UUID.randomUUID(); + + doReturn(false) + .when(temporalClient).isWorkflowRunning(TemporalClient.getConnectionManagerName(nonMigratedId)); + doReturn(true) + .when(temporalClient).isWorkflowRunning(TemporalClient.getConnectionManagerName(migratedId)); + + doNothing() + .when(temporalClient).submitConnectionUpdaterAsync(nonMigratedId); + + temporalClient.migrateSyncIfNeeded(Sets.newHashSet(nonMigratedId, migratedId)); + + verify(temporalClient, times(1)).submitConnectionUpdaterAsync(nonMigratedId); + verify(temporalClient, times(0)).submitConnectionUpdaterAsync(migratedId); + } + + } + } From c9adee617834e633b12f1efb867ad08ed32a7b05 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Mon, 3 Jan 2022 23:59:55 +0800 Subject: [PATCH 014/215] Clean up Docker compose env vars. (#9209) - sort docker env vars. - remove all non-docker related env vars. - add what is missing. For the .env file: - sort the file to match the Configs.java lay out for better reading. - get rid of env vars that are not used in docker - get rid of env vars that have defaults, with the exception of var that are for scaling e.g. submitter_num_threads, worker related vars to prevent the env file from getting too large - add a header to clarify when/where to add env vars to the file For the docker compose file: - sort the env vars alphabetically - get rid of env vars that aren't used in that application - add missing env vars into the worker application --- .env | 118 ++++++------- .env.dev | 2 + .../java/io/airbyte/config/EnvConfigs.java | 2 - docker-compose.yaml | 163 ++++++++---------- 4 files changed, 129 insertions(+), 156 deletions(-) diff --git a/.env b/.env index ff185b71a36dc..02b84180e21a4 100644 --- a/.env +++ b/.env @@ -1,32 +1,22 @@ +# This file only contains Docker relevant variables. +# +# Variables with defaults have been omitted to avoid duplication of defaults. +# The only exception to the non-default rule are env vars related to scaling. +# +# See https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java +# for the latest environment variables. +# +# # Contributors - please organise this env file according to the above linked file. + + +### SHARED ### VERSION=0.35.2-alpha -# Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db -DATABASE_USER=docker -DATABASE_PASSWORD=docker -DATABASE_HOST=db -DATABASE_PORT=5432 -DATABASE_DB=airbyte -# translate manually DATABASE_URL=jdbc:postgresql://${DATABASE_HOST}:${DATABASE_PORT/${DATABASE_DB} (do not include the username or password here) -DATABASE_URL=jdbc:postgresql://db:5432/airbyte -JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=0.29.15.001 - -# Airbyte Internal Config Database, default to reuse the Job Database when they are empty -# Usually you do not need to set them; they are explicitly left empty to mute docker compose warnings -CONFIG_DATABASE_USER= -CONFIG_DATABASE_PASSWORD= -CONFIG_DATABASE_URL= -CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=0.35.1.001 - -RUN_DATABASE_MIGRATION_ON_STARTUP=true - -# When using the airbyte-db via default docker image: +# When using the airbyte-db via default docker image CONFIG_ROOT=/data DATA_DOCKER_MOUNT=airbyte_data DB_DOCKER_MOUNT=airbyte_db -# Temporal.io worker configuration -TEMPORAL_HOST=airbyte-temporal:7233 - # Workspace storage for running jobs (logs, etc) WORKSPACE_ROOT=/tmp/workspace WORKSPACE_DOCKER_MOUNT=airbyte_workspace @@ -42,59 +32,59 @@ LOCAL_DOCKER_MOUNT=/tmp/airbyte_local # Issue: https://github.com/airbytehq/airbyte/issues/577 HACK_LOCAL_ROOT_PARENT=/tmp -# Maximum simultaneous jobs -SUBMITTER_NUM_THREADS=10 -# Job container images -# Usually you should not need to set these, they have defaults already set -JOB_KUBE_SOCAT_IMAGE= -JOB_KUBE_BUSYBOX_IMAGE= -JOB_KUBE_CURL_IMAGE= +### DATABASE ### +# Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db +DATABASE_USER=docker +DATABASE_PASSWORD=docker +DATABASE_HOST=db +DATABASE_PORT=5432 +DATABASE_DB=airbyte +# translate manually DATABASE_URL=jdbc:postgresql://${DATABASE_HOST}:${DATABASE_PORT/${DATABASE_DB} (do not include the username or password here) +DATABASE_URL=jdbc:postgresql://db:5432/airbyte +JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=0.29.15.001 + +# Airbyte Internal Config Database, defaults to Job Database if empty. Explicitly left empty to mute docker compose warnings. +CONFIG_DATABASE_USER= +CONFIG_DATABASE_PASSWORD= +CONFIG_DATABASE_URL= +CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=0.35.1.001 -# Miscellaneous -TRACKING_STRATEGY=segment + +### AIRBYTE SERVICES ### +TEMPORAL_HOST=airbyte-temporal:7233 +INTERNAL_API_HOST=airbyte-server:8001 WEBAPP_URL=http://localhost:8000/ +# Although not present as an env var, required for webapp configuration. API_URL=/api/v1/ -INTERNAL_API_HOST=airbyte-server:8001 -LOG_LEVEL=INFO -WORKER_ENVIRONMENT=docker -# Cloud log backups. Don't use this unless you know what you're doing. Mainly for Airbyte devs. -# If you just want to capture Docker logs, you probably want to use something like this instead: -# https://docs.docker.com/config/containers/logging/configure/ -S3_LOG_BUCKET= -S3_LOG_BUCKET_REGION= -AWS_ACCESS_KEY_ID= -AWS_SECRET_ACCESS_KEY= -S3_MINIO_ENDPOINT= -S3_PATH_STYLE_ACCESS= - -GCS_LOG_BUCKET= - -# Docker Resource Limits +### JOBS ### +# Relevant to scaling. +SYNC_JOB_MAX_ATTEMPTS=3 +SYNC_JOB_MAX_TIMEOUT_DAYS=3 JOB_MAIN_CONTAINER_CPU_REQUEST= JOB_MAIN_CONTAINER_CPU_LIMIT= JOB_MAIN_CONTAINER_MEMORY_REQUEST= JOB_MAIN_CONTAINER_MEMORY_LIMIT= -# Max attempts per sync and max retries per attempt -SYNC_JOB_MAX_ATTEMPTS=3 - -# Time in days to reach a timeout to cancel the synchronization -SYNC_JOB_MAX_TIMEOUT_DAYS=3 -# Set secret persistence store to use. Do not change this for existing installations! -SECRET_PERSISTENCE=NONE - -# State Cloud Storage -STATE_STORAGE_S3_BUCKET_NAME= -STATE_STORAGE_S3_REGION= +### LOGGING/MONITORING/TRACKING ### +TRACKING_STRATEGY=segment +# Although not present as an env var, expected by Log4J configuration. +LOG_LEVEL=INFO +# Although not present as an env var, helps Airbyte track job healthiness. +SENTRY_DSN="https://d4b03de0c4574c78999b8d58e55243dc@o1009025.ingest.sentry.io/6102835" -STATE_STORAGE_MINIO_BUCKET_NAME= -STATE_STORAGE_MINIO_ENDPOINT= -STATE_STORAGE_GCS_BUCKET_NAME= +### APPLICATIONS ### +# Scheduler # +# Relevant to scaling. +SUBMITTER_NUM_THREADS=10 -# Sentry -SENTRY_DSN="https://d4b03de0c4574c78999b8d58e55243dc@o1009025.ingest.sentry.io/6102835" +# Worker # +# Relevant to scaling. +MAX_SYNC_WORKERS=5 +MAX_SPEC_WORKERS=5 +MAX_CHECK_WORKERS=5 +MAX_DISCOVER_WORKERS=5 diff --git a/.env.dev b/.env.dev index c9dedb6aa3dc7..e7a4f02b7d5b0 100644 --- a/.env.dev +++ b/.env.dev @@ -1,3 +1,5 @@ +# For internal Airbyte dev use. + VERSION=dev DATABASE_USER=docker DATABASE_PASSWORD=docker diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java index 4ac1df231abce..327cf0196c99a 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java @@ -53,8 +53,6 @@ public class EnvConfigs implements Configs { public static final String CONFIG_DATABASE_PASSWORD = "CONFIG_DATABASE_PASSWORD"; public static final String CONFIG_DATABASE_URL = "CONFIG_DATABASE_URL"; public static final String RUN_DATABASE_MIGRATION_ON_STARTUP = "RUN_DATABASE_MIGRATION_ON_STARTUP"; - public static final String LOG_LEVEL = "LOG_LEVEL"; - public static final String S3_PATH_STYLE_ACCESS = "S3_PATH_STYLE_ACCESS"; public static final String WEBAPP_URL = "WEBAPP_URL"; public static final String JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY = "JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY"; public static final String JOB_KUBE_TOLERATIONS = "JOB_KUBE_TOLERATIONS"; diff --git a/docker-compose.yaml b/docker-compose.yaml index cc8ffd381b039..6b4e623882394 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -24,26 +24,28 @@ services: container_name: airbyte-bootloader environment: - AIRBYTE_VERSION=${VERSION} - - DATABASE_USER=${DATABASE_USER} - - DATABASE_PASSWORD=${DATABASE_PASSWORD} - - DATABASE_URL=${DATABASE_URL} - - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} - CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-} - CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-} + - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} + - DATABASE_PASSWORD=${DATABASE_PASSWORD} + - DATABASE_URL=${DATABASE_URL} + - DATABASE_USER=${DATABASE_USER} + - LOG_LEVEL=${LOG_LEVEL} + - RUN_DATABASE_MIGRATION_ON_STARTUP=${RUN_DATABASE_MIGRATION_ON_STARTUP} db: image: airbyte/db:${VERSION} logging: *default-logging container_name: airbyte-db restart: unless-stopped environment: - - POSTGRES_USER=${DATABASE_USER} - - POSTGRES_PASSWORD=${DATABASE_PASSWORD} - - DATABASE_USER=${DATABASE_USER} - - DATABASE_PASSWORD=${DATABASE_PASSWORD} - - DATABASE_URL=${DATABASE_URL} - - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} - CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-} - CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-} + - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} + - DATABASE_PASSWORD=${DATABASE_PASSWORD} + - DATABASE_URL=${DATABASE_URL} + - DATABASE_USER=${DATABASE_USER} + - POSTGRES_PASSWORD=${DATABASE_PASSWORD} + - POSTGRES_USER=${DATABASE_USER} volumes: - db:/var/lib/postgresql/data scheduler: @@ -52,84 +54,71 @@ services: container_name: airbyte-scheduler restart: unless-stopped environment: - - WEBAPP_URL=${WEBAPP_URL} - - DATABASE_USER=${DATABASE_USER} - - DATABASE_PASSWORD=${DATABASE_PASSWORD} - - DATABASE_URL=${DATABASE_URL} - - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} + - AIRBYTE_ROLE=${AIRBYTE_ROLE:-} + - AIRBYTE_VERSION=${VERSION} - CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-} - CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-} - - RUN_DATABASE_MIGRATION_ON_STARTUP=${RUN_DATABASE_MIGRATION_ON_STARTUP} - - WORKSPACE_ROOT=${WORKSPACE_ROOT} - - WORKSPACE_DOCKER_MOUNT=${WORKSPACE_DOCKER_MOUNT} - - LOCAL_ROOT=${LOCAL_ROOT} - - LOCAL_DOCKER_MOUNT=${LOCAL_DOCKER_MOUNT} + - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} - CONFIG_ROOT=${CONFIG_ROOT} - - TRACKING_STRATEGY=${TRACKING_STRATEGY} - - AIRBYTE_VERSION=${VERSION} - - AIRBYTE_ROLE=${AIRBYTE_ROLE:-} - - TEMPORAL_HOST=${TEMPORAL_HOST} - - WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT} - - S3_LOG_BUCKET=${S3_LOG_BUCKET} - - S3_LOG_BUCKET_REGION=${S3_LOG_BUCKET_REGION} - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - - GCS_LOG_BUCKET=${GCS_LOG_BUCKET} - - LOG_LEVEL=${LOG_LEVEL} - - SUBMITTER_NUM_THREADS=${SUBMITTER_NUM_THREADS} - - JOB_MAIN_CONTAINER_CPU_REQUEST=${JOB_MAIN_CONTAINER_CPU_REQUEST} + - DATABASE_PASSWORD=${DATABASE_PASSWORD} + - DATABASE_URL=${DATABASE_URL} + - DATABASE_USER=${DATABASE_USER} + - INTERNAL_API_HOST=${INTERNAL_API_HOST} - JOB_MAIN_CONTAINER_CPU_LIMIT=${JOB_MAIN_CONTAINER_CPU_LIMIT} - - JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST} + - JOB_MAIN_CONTAINER_CPU_REQUEST=${JOB_MAIN_CONTAINER_CPU_REQUEST} - JOB_MAIN_CONTAINER_MEMORY_LIMIT=${JOB_MAIN_CONTAINER_MEMORY_LIMIT} + - JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST} + - LOCAL_ROOT=${LOCAL_ROOT} + - LOCAL_DOCKER_MOUNT=${LOCAL_DOCKER_MOUNT} + - LOG_LEVEL=${LOG_LEVEL} + - SECRET_PERSISTENCE=${SECRET_PERSISTENCE} - SYNC_JOB_MAX_ATTEMPTS=${SYNC_JOB_MAX_ATTEMPTS} - SYNC_JOB_MAX_TIMEOUT_DAYS=${SYNC_JOB_MAX_TIMEOUT_DAYS} - - INTERNAL_API_HOST=${INTERNAL_API_HOST} - - SECRET_PERSISTENCE=${SECRET_PERSISTENCE} + - SUBMITTER_NUM_THREADS=${SUBMITTER_NUM_THREADS} + - TEMPORAL_HOST=${TEMPORAL_HOST} + - TRACKING_STRATEGY=${TRACKING_STRATEGY} + - WEBAPP_URL=${WEBAPP_URL} + - WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT} + - WORKSPACE_DOCKER_MOUNT=${WORKSPACE_DOCKER_MOUNT} + - WORKSPACE_ROOT=${WORKSPACE_ROOT} # TODO: Remove before merge # - NEW_SCHEDULER=valuedoesntmatter volumes: + - data:${CONFIG_ROOT} - workspace:${WORKSPACE_ROOT} - ${LOCAL_ROOT}:${LOCAL_ROOT} - - data:${CONFIG_ROOT} worker: image: airbyte/worker:${VERSION} logging: *default-logging container_name: airbyte-worker restart: unless-stopped environment: - - WEBAPP_URL=${WEBAPP_URL} - - DATABASE_USER=${DATABASE_USER} - - DATABASE_PASSWORD=${DATABASE_PASSWORD} - - DATABASE_URL=${DATABASE_URL} - - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} - CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-} - CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-} - - RUN_DATABASE_MIGRATION_ON_STARTUP=${RUN_DATABASE_MIGRATION_ON_STARTUP} - - WORKSPACE_ROOT=${WORKSPACE_ROOT} - - WORKSPACE_DOCKER_MOUNT=${WORKSPACE_DOCKER_MOUNT} - - LOCAL_ROOT=${LOCAL_ROOT} - - LOCAL_DOCKER_MOUNT=${LOCAL_DOCKER_MOUNT} + - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} - CONFIG_ROOT=${CONFIG_ROOT} - - TRACKING_STRATEGY=${TRACKING_STRATEGY} - - AIRBYTE_VERSION=${VERSION} - - AIRBYTE_ROLE=${AIRBYTE_ROLE:-} - - TEMPORAL_HOST=${TEMPORAL_HOST} - - WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT} - - S3_LOG_BUCKET=${S3_LOG_BUCKET} - - S3_LOG_BUCKET_REGION=${S3_LOG_BUCKET_REGION} - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - - GCS_LOG_BUCKET=${GCS_LOG_BUCKET} - - LOG_LEVEL=${LOG_LEVEL} - - SUBMITTER_NUM_THREADS=${SUBMITTER_NUM_THREADS} - - JOB_MAIN_CONTAINER_CPU_REQUEST=${JOB_MAIN_CONTAINER_CPU_REQUEST} + - DATABASE_PASSWORD=${DATABASE_PASSWORD} + - DATABASE_URL=${DATABASE_URL} + - DATABASE_USER=${DATABASE_USER} - JOB_MAIN_CONTAINER_CPU_LIMIT=${JOB_MAIN_CONTAINER_CPU_LIMIT} - - JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST} + - JOB_MAIN_CONTAINER_CPU_REQUEST=${JOB_MAIN_CONTAINER_CPU_REQUEST} - JOB_MAIN_CONTAINER_MEMORY_LIMIT=${JOB_MAIN_CONTAINER_MEMORY_LIMIT} + - JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST} + - LOCAL_DOCKER_MOUNT=${LOCAL_DOCKER_MOUNT} + - LOCAL_ROOT=${LOCAL_ROOT} + - LOG_LEVEL=${LOG_LEVEL} + - MAX_CHECK_WORKERS=${MAX_CHECK_WORKERS} + - MAX_DISCOVER_WORKERS=${MAX_DISCOVER_WORKERS} + - MAX_SPEC_WORKERS=${MAX_SPEC_WORKERS} + - MAX_SYNC_WORKERS=${MAX_SYNC_WORKERS} + - SECRET_PERSISTENCE=${SECRET_PERSISTENCE} - SYNC_JOB_MAX_ATTEMPTS=${SYNC_JOB_MAX_ATTEMPTS} - SYNC_JOB_MAX_TIMEOUT_DAYS=${SYNC_JOB_MAX_TIMEOUT_DAYS} - - INTERNAL_API_HOST=${INTERNAL_API_HOST} - - SECRET_PERSISTENCE=${SECRET_PERSISTENCE} + - TEMPORAL_HOST=${TEMPORAL_HOST} + - TRACKING_STRATEGY=${TRACKING_STRATEGY} + - WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT} + - WORKSPACE_DOCKER_MOUNT=${WORKSPACE_DOCKER_MOUNT} + - WORKSPACE_ROOT=${WORKSPACE_ROOT} volumes: - /var/run/docker.sock:/var/run/docker.sock - workspace:${WORKSPACE_ROOT} @@ -140,35 +129,29 @@ services: container_name: airbyte-server restart: unless-stopped environment: - - WEBAPP_URL=${WEBAPP_URL} - - DATABASE_USER=${DATABASE_USER} - - DATABASE_PASSWORD=${DATABASE_PASSWORD} - - DATABASE_URL=${DATABASE_URL} - - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} + - AIRBYTE_ROLE=${AIRBYTE_ROLE:-} + - AIRBYTE_VERSION=${VERSION} - CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-} - CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-} - - RUN_DATABASE_MIGRATION_ON_STARTUP=${RUN_DATABASE_MIGRATION_ON_STARTUP} - - WORKSPACE_ROOT=${WORKSPACE_ROOT} + - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} + - CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-} - CONFIG_ROOT=${CONFIG_ROOT} - - TRACKING_STRATEGY=${TRACKING_STRATEGY} - - AIRBYTE_VERSION=${VERSION} - - AIRBYTE_ROLE=${AIRBYTE_ROLE:-} - - TEMPORAL_HOST=${TEMPORAL_HOST} - - WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT} - - S3_LOG_BUCKET=${S3_LOG_BUCKET} - - S3_LOG_BUCKET_REGION=${S3_LOG_BUCKET_REGION} - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - - GCS_LOG_BUCKET=${GCS_LOG_BUCKET} - - LOG_LEVEL=${LOG_LEVEL} - - JOB_MAIN_CONTAINER_CPU_REQUEST=${JOB_MAIN_CONTAINER_CPU_REQUEST} + - DATABASE_PASSWORD=${DATABASE_PASSWORD} + - DATABASE_URL=${DATABASE_URL} + - DATABASE_USER=${DATABASE_USER} - JOB_MAIN_CONTAINER_CPU_LIMIT=${JOB_MAIN_CONTAINER_CPU_LIMIT} - - JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST} + - JOB_MAIN_CONTAINER_CPU_REQUEST=${JOB_MAIN_CONTAINER_CPU_REQUEST} - JOB_MAIN_CONTAINER_MEMORY_LIMIT=${JOB_MAIN_CONTAINER_MEMORY_LIMIT} + - JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST} + - JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-} + - LOG_LEVEL=${LOG_LEVEL} - SECRET_PERSISTENCE=${SECRET_PERSISTENCE} + - TEMPORAL_HOST=${TEMPORAL_HOST} + - TRACKING_STRATEGY=${TRACKING_STRATEGY} + - WEBAPP_URL=${WEBAPP_URL} + - WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT} + - WORKSPACE_ROOT=${WORKSPACE_ROOT} # - NEW_SCHEDULER=valuedoesntmatter - - CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-} - - JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-} ports: - 8001:8001 volumes: @@ -186,12 +169,12 @@ services: - AIRBYTE_ROLE=${AIRBYTE_ROLE:-} - AIRBYTE_VERSION=${VERSION} - API_URL=${API_URL:-} - - IS_DEMO=${IS_DEMO:-} - FULLSTORY=${FULLSTORY:-} - - TRACKING_STRATEGY=${TRACKING_STRATEGY} - INTERNAL_API_HOST=${INTERNAL_API_HOST} + - IS_DEMO=${IS_DEMO:-} - OPENREPLAY=${OPENREPLAY:-} - PAPERCUPS_STORYTIME=${PAPERCUPS_STORYTIME:-} + - TRACKING_STRATEGY=${TRACKING_STRATEGY} airbyte-temporal: image: airbyte/temporal:${VERSION} logging: *default-logging @@ -202,11 +185,11 @@ services: environment: - DB=postgresql - DB_PORT=${DATABASE_PORT} - - POSTGRES_USER=${DATABASE_USER} - - POSTGRES_PWD=${DATABASE_PASSWORD} - - POSTGRES_SEEDS=${DATABASE_HOST} - DYNAMIC_CONFIG_FILE_PATH=config/dynamicconfig/development.yaml - LOG_LEVEL=${LOG_LEVEL} + - POSTGRES_PWD=${DATABASE_PASSWORD} + - POSTGRES_SEEDS=${DATABASE_HOST} + - POSTGRES_USER=${DATABASE_USER} volumes: - ./temporal/dynamicconfig:/etc/temporal/config/dynamicconfig volumes: From 06fc680e4b723c8af8261988a09cbd8ae345dde2 Mon Sep 17 00:00:00 2001 From: Augustin Date: Mon, 3 Jan 2022 17:59:48 +0100 Subject: [PATCH 015/215] fix anchors in postgres doc (#9219) --- docs/integrations/sources/postgres.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index 93a176f0f0ea3..3695a3316c1d7 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -100,9 +100,9 @@ Please read the [CDC docs](../../understanding-airbyte/cdc.md) for an overview o Follow one of these guides to enable logical replication: -* [Bare Metal, VMs \(EC2/GCE/etc\), Docker, etc.](postgres.md#setting-up-cdc-on-bare-metal-vms-ec2gceetc-docker-etc) -* [AWS Postgres RDS or Aurora](postgres.md#setting-up-cdc-on-aws-postgres-rds-or-aurora) -* [Azure Database for Postgres](postgres.md#setting-up-cdc-on-azure-database-for-postgres) +* [Bare Metal, VMs \(EC2/GCE/etc\), Docker, etc.](postgres.md#cdc-on-bare-metal-vms-ec2-gce-etc-docker-etc.) +* [AWS Postgres RDS or Aurora](postgres.md#cdc-on-aws-postgres-rds-or-aurora) +* [Azure Database for Postgres](postgres.md#cdc-on-azure-database-for-postgres) #### 2. Add user-level permissions From 73002401543cf6e8ac7a201ca27bcdfd64d2b0da Mon Sep 17 00:00:00 2001 From: Augustin Date: Mon, 3 Jan 2022 19:13:07 +0100 Subject: [PATCH 016/215] Doc: new Worker parallelization section in jobs.md (#9191) --- docs/understanding-airbyte/jobs.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/understanding-airbyte/jobs.md b/docs/understanding-airbyte/jobs.md index 3c7455ecf57f4..9c52a25c79a58 100644 --- a/docs/understanding-airbyte/jobs.md +++ b/docs/understanding-airbyte/jobs.md @@ -39,6 +39,16 @@ Note: When a source has passed all of its messages, the docker process should au See the [architecture overview](high-level-view.md) for more information about workers. +## Worker parallelization +Airbyte exposes the following environment variable to change the maximum number of each type of worker allowed to run in parallel. +Tweaking these values might help you run more jobs in parallel and increase the workload of your Airbyte instance: +* `MAX_SPEC_WORKERS`: Maximum number of *Spec* workers allowed to run in parallel. +* `MAX_CHECK_WORKERS`: Maximum number of *Check connection* workers allowed to run in parallel. +* `MAX_DISCOVERY_WORKERS`: Maximum number of *Discovery* workers allowed to run in parallel. +* `MAX_SYNC_WORKERS`: Maximum number of *Sync* workers allowed to run in parallel. + +The current default value for these environment variables is currently set to **5**. + ## Job State Machine Jobs in the worker follow the following state machine. From d5565d3a4e6e23b12ed6d6133323ba5bad309018 Mon Sep 17 00:00:00 2001 From: Lake Mossman Date: Mon, 3 Jan 2022 13:46:48 -0800 Subject: [PATCH 017/215] raise init pod termination check timeout to 5 minutes (#9260) --- .../main/java/io/airbyte/workers/process/KubePodProcess.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index 65cc267c4c34e..c90f13449686b 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -243,7 +243,7 @@ private static void copyFilesToKubeConfigVolume(final KubernetesClient client, // Copying the success indicator file to the init container causes the container to immediately // exit, causing the `kubectl cp` command to exit with code 137. This check ensures that an error is // not thrown in this case if the init container exits successfully. - if (file.getKey().equals(SUCCESS_FILE_NAME) && waitForInitPodToTerminate(client, podDefinition, 10, TimeUnit.SECONDS) == 0) { + if (file.getKey().equals(SUCCESS_FILE_NAME) && waitForInitPodToTerminate(client, podDefinition, 5, TimeUnit.MINUTES) == 0) { LOGGER.info("Init was successful; ignoring non-zero kubectl cp exit code for success indicator file."); } else { throw new IOException("kubectl cp failed with exit code " + exitCode); From 1e06646c16f1135701d8dfd19c409838bead3b37 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Mon, 3 Jan 2022 16:36:38 -0800 Subject: [PATCH 018/215] DAT module uses junit-bom (#9263) --- .../bases/standard-destination-test/build.gradle | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/bases/standard-destination-test/build.gradle b/airbyte-integrations/bases/standard-destination-test/build.gradle index dffb19b2ec89c..b8c61532e8b0a 100644 --- a/airbyte-integrations/bases/standard-destination-test/build.gradle +++ b/airbyte-integrations/bases/standard-destination-test/build.gradle @@ -7,6 +7,7 @@ dependencies { implementation project(':airbyte-protocol:models') implementation project(':airbyte-workers') - implementation 'org.junit.jupiter:junit-jupiter-api:5.4.2' - implementation 'org.junit.jupiter:junit-jupiter-params:5.4.2' + implementation(enforcedPlatform('org.junit:junit-bom:5.4.2')) + implementation 'org.junit.jupiter:junit-jupiter-api' + implementation 'org.junit.jupiter:junit-jupiter-params' } From cb68e519c87fb9211c55077c796b139b70d88cb2 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Mon, 3 Jan 2022 17:04:19 -0800 Subject: [PATCH 019/215] Bump Airbyte version from 0.35.2-alpha to 0.35.3-alpha (#9262) Co-authored-by: lmossman --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-bootloader/Dockerfile | 4 ++-- airbyte-container-orchestrator/Dockerfile | 6 +++--- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 4 ++-- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 8 ++++---- charts/airbyte/values.yaml | 8 ++++---- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 12 ++++++------ kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 12 ++++++------ 17 files changed, 40 insertions(+), 40 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index d8f6be5a79b00..82c6923033783 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.35.2-alpha +current_version = 0.35.3-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index 02b84180e21a4..2dc357e36e1d0 100644 --- a/.env +++ b/.env @@ -10,7 +10,7 @@ ### SHARED ### -VERSION=0.35.2-alpha +VERSION=0.35.3-alpha # When using the airbyte-db via default docker image CONFIG_ROOT=/data diff --git a/airbyte-bootloader/Dockerfile b/airbyte-bootloader/Dockerfile index f2428c55b2988..8534ee8eab9f8 100644 --- a/airbyte-bootloader/Dockerfile +++ b/airbyte-bootloader/Dockerfile @@ -5,6 +5,6 @@ ENV APPLICATION airbyte-bootloader WORKDIR /app -ADD bin/${APPLICATION}-0.35.2-alpha.tar /app +ADD bin/${APPLICATION}-0.35.3-alpha.tar /app -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.2-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}"] diff --git a/airbyte-container-orchestrator/Dockerfile b/airbyte-container-orchestrator/Dockerfile index 6365830cb0bab..7f34019030c4b 100644 --- a/airbyte-container-orchestrator/Dockerfile +++ b/airbyte-container-orchestrator/Dockerfile @@ -19,12 +19,12 @@ RUN add-apt-repository \ RUN apt-get update && apt-get install -y docker-ce-cli jq ENV APPLICATION airbyte-container-orchestrator -ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.2-alpha/bin/${APPLICATION}" +ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}" WORKDIR /app # Move orchestrator app -ADD bin/${APPLICATION}-0.35.2-alpha.tar /app +ADD bin/${APPLICATION}-0.35.3-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.2-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 50b38855b26bd..09cc13d39c00b 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD bin/${APPLICATION}-0.35.2-alpha.tar /app +ADD bin/${APPLICATION}-0.35.3-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.2-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 1e511320d8c36..9d3ce0481a510 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD bin/${APPLICATION}-0.35.2-alpha.tar /app +ADD bin/${APPLICATION}-0.35.3-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.2-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 984b5af4fb4e0..72d6b1b47d7da 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,12 +1,12 @@ { "name": "airbyte-webapp", - "version": "0.35.2-alpha", + "version": "0.35.3-alpha", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "airbyte-webapp", - "version": "0.35.2-alpha", + "version": "0.35.3-alpha", "dependencies": { "@fortawesome/fontawesome-svg-core": "^1.2.36", "@fortawesome/free-brands-svg-icons": "^5.15.4", diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 1d9f7f5b4eb7b..74657a7d6a19b 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.35.2-alpha", + "version": "0.35.3-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 190a6b7744043..993740bba1d82 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -30,7 +30,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD bin/${APPLICATION}-0.35.2-alpha.tar /app +ADD bin/${APPLICATION}-0.35.3-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.2-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index ddd1117703b0f..d4fdb10626390 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.35.2-alpha" +appVersion: "0.35.3-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 7e638648c8d46..fcf8a050d5315 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.2-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.3-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -55,7 +55,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.2-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.3-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -86,7 +86,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.2-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.3-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -120,7 +120,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.2-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.3-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 7858c6a3ea544..07d59b1d2c73e 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -43,7 +43,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.35.2-alpha + tag: 0.35.3-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -140,7 +140,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.35.2-alpha + tag: 0.35.3-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -245,7 +245,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.35.2-alpha + tag: 0.35.3-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -357,7 +357,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.35.2-alpha + tag: 0.35.3-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index ccd7774e05818..99d7c703f9ecf 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -99,7 +99,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.35.2-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.35.3-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index 7f3c5a662f8bc..bb6f2dfe836a1 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.2-alpha +AIRBYTE_VERSION=0.35.3-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 7b3bd7f9bd7af..c82ddcd10811e 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: airbyte/bootloader - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: airbyte/scheduler - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: airbyte/server - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: airbyte/webapp - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: airbyte/worker - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index 7f3c5a662f8bc..bb6f2dfe836a1 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.2-alpha +AIRBYTE_VERSION=0.35.3-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 455272a062383..c284f922b3142 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: airbyte/bootloader - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: airbyte/scheduler - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: airbyte/server - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: airbyte/webapp - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: airbyte/worker - newTag: 0.35.2-alpha + newTag: 0.35.3-alpha - name: temporalio/auto-setup newTag: 1.7.0 From a24a2870545f1a6b3d29937a26282fa9abb917fb Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Tue, 4 Jan 2022 09:28:21 +0100 Subject: [PATCH 020/215] Add test delays (#9268) increase test waiting time in order to make them less flaky --- .../ConnectionManagerWorkflowTest.java | 23 ++++++++++--------- .../SleepingSyncWorkflow.java | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java index 2244029d140d1..33a70b82e8e86 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java @@ -190,9 +190,9 @@ public void manualRun() { workflowState); WorkflowClient.start(workflow::run, input); - testEnv.sleep(Duration.ofSeconds(2L)); + testEnv.sleep(Duration.ofSeconds(30L)); workflow.submitManualSync(); - testEnv.sleep(Duration.ofSeconds(50L)); + testEnv.sleep(Duration.ofSeconds(20L)); final Queue events = testStateListener.events(testId); @@ -231,9 +231,9 @@ public void updatedSignalRecieved() { workflowState); WorkflowClient.start(workflow::run, input); - testEnv.sleep(Duration.ofSeconds(2L)); + testEnv.sleep(Duration.ofSeconds(30L)); workflow.connectionUpdated(); - testEnv.sleep(Duration.ofSeconds(50L)); + testEnv.sleep(Duration.ofSeconds(20L)); final Queue events = testStateListener.events(testId); @@ -272,9 +272,9 @@ public void cancelNonRunning() { workflowState); WorkflowClient.start(workflow::run, input); - testEnv.sleep(Duration.ofSeconds(50L)); + testEnv.sleep(Duration.ofSeconds(30L)); workflow.cancelJob(); - testEnv.sleep(Duration.ofSeconds(2L)); + testEnv.sleep(Duration.ofSeconds(20L)); final Queue events = testStateListener.events(testId); @@ -313,8 +313,9 @@ public void deleteSync() { workflowState); WorkflowClient.start(workflow::run, input); + testEnv.sleep(Duration.ofSeconds(30L)); workflow.deleteConnection(); - testEnv.sleep(Duration.ofSeconds(50L)); + testEnv.sleep(Duration.ofMinutes(20L)); final Queue events = testStateListener.events(testId); @@ -380,7 +381,7 @@ public void manualRun() { workflowState); WorkflowClient.start(workflow::run, input); - testEnv.sleep(Duration.ofSeconds(61L)); + testEnv.sleep(Duration.ofMinutes(2L)); workflow.submitManualSync(); final Queue events = testStateListener.events(testId); @@ -410,15 +411,15 @@ public void cancelRunning() { WorkflowClient.start(workflow::run, input); workflow.submitManualSync(); - testEnv.sleep(Duration.ofSeconds(1L)); + testEnv.sleep(Duration.ofSeconds(30L)); workflow.cancelJob(); - testEnv.sleep(Duration.ofSeconds(2L)); + testEnv.sleep(Duration.ofMinutes(1L)); final Queue events = testStateListener.events(testId); Assertions.assertThat(events) .filteredOn(changedStateEvent -> changedStateEvent.getField() == StateField.CANCELLED && changedStateEvent.isValue()) - .hasSize(1); + .hasSizeGreaterThanOrEqualTo(1); Mockito.verify(mJobCreationAndStatusUpdateActivity).jobCancelled(Mockito.any()); diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/testsyncworkflow/SleepingSyncWorkflow.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/testsyncworkflow/SleepingSyncWorkflow.java index ab7f68fea9e39..890483ba54380 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/testsyncworkflow/SleepingSyncWorkflow.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/testsyncworkflow/SleepingSyncWorkflow.java @@ -22,7 +22,7 @@ public StandardSyncOutput run(final JobRunConfig jobRunConfig, final StandardSyncInput syncInput, final UUID connectionId) { - Workflow.sleep(Duration.ofSeconds(2)); + Workflow.sleep(Duration.ofMinutes(1)); return new StandardSyncOutput(); } From 8c3c68c160528d5273845c4d95094b840719d1e4 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 4 Jan 2022 17:27:58 +0800 Subject: [PATCH 021/215] Document various available configuration. (#9249) - Add comments to the interface methods in Configs.java. - Add new document on configuring airbyte. Transfer the non internal-only variables to this document. --- airbyte-commons/src/main/resources/log4j2.xml | 2 + .../main/java/io/airbyte/config/Configs.java | 210 +++++++++++++++++- .../java/io/airbyte/config/EnvConfigs.java | 3 +- docs/SUMMARY.md | 1 + docs/operator-guides/scaling-airbyte.md | 4 +- .../configuring-airbyte.md | 120 ++++++++++ 6 files changed, 328 insertions(+), 12 deletions(-) create mode 100644 docs/understanding-airbyte/configuring-airbyte.md diff --git a/airbyte-commons/src/main/resources/log4j2.xml b/airbyte-commons/src/main/resources/log4j2.xml index 8ea2c1a4de455..ed578354eb798 100644 --- a/airbyte-commons/src/main/resources/log4j2.xml +++ b/airbyte-commons/src/main/resources/log4j2.xml @@ -14,6 +14,8 @@ This is useful if you want to override the environment variables at runtime (or if you don't have access to the necessary information at the point where you are setting environment variables). + + Please update configuring-airbyte.md if the names of any of the below variables change. --> diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java index 7ee283c1821aa..2a7e10cf26a78 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java @@ -12,125 +12,317 @@ import java.util.Map; import java.util.Set; +/** + * This interface defines the general variables for configuring Airbyte. + *

+ * Please update the configuring-airbyte.md document when modifying this file. + *

+ * Please also add one of the following tags to the env var accordingly: + *

+ * 1. 'Internal-use only' if a var is mainly for Airbyte-only configuration. e.g. tracking, test or + * Cloud related etc. + *

+ * 2. 'Alpha support' if a var does not have proper support and should be used with care. + */ public interface Configs { // CORE // General + /** + * Distinguishes internal Airbyte deployments. Internal-use only. + */ String getAirbyteRole(); + /** + * Defines the Airbyte deployment version. + */ AirbyteVersion getAirbyteVersion(); String getAirbyteVersionOrWarning(); + /** + * Defines the bucket for caching specs. This immensely speeds up spec operations. This is updated + * when new versions are published. + */ String getSpecCacheBucket(); + /** + * Distinguishes internal Airbyte deployments. Internal-use only. + */ DeploymentMode getDeploymentMode(); + /** + * Defines if the deployment is Docker or Kubernetes. Airbyte behaves accordingly. + */ WorkerEnvironment getWorkerEnvironment(); + /** + * Defines the configs directory. Applies only to Docker, and is present in Kubernetes for backward + * compatibility. + */ Path getConfigRoot(); + /** + * Defines the Airbyte workspace directory. Applies only to Docker, and is present in Kubernetes for + * backward compatibility. + */ Path getWorkspaceRoot(); // Docker Only + /** + * Defines the name of the Airbyte docker volume. + */ String getWorkspaceDockerMount(); + /** + * Defines the name of the docker mount that is used for local file handling. On Docker, this allows + * connector pods to interact with a volume for "local file" operations. + */ String getLocalDockerMount(); + /** + * Defines the docker network jobs are launched on with the new scheduler. + */ String getDockerNetwork(); Path getLocalRoot(); // Secrets + /** + * Defines the GCP Project to store secrets in. Alpha support. + */ String getSecretStoreGcpProjectId(); + /** + * Define the JSON credentials used to read/write Airbyte Configuration to Google Secret Manager. + * These credentials must have Secret Manager Read/Write access. Alpha support. + */ String getSecretStoreGcpCredentials(); + /** + * Defines the Secret Persistence type. None by default. Set to GOOGLE_SECRET_MANAGER to use Google + * Secret Manager. Set to TESTING_CONFIG_DB_TABLE to use the database as a test. Alpha support. + * Undefined behavior will result if this is turned on and then off. + */ SecretPersistenceType getSecretPersistenceType(); // Database + /** + * Define the Jobs Database user. + */ String getDatabaseUser(); + /** + * Define the Jobs Database password. + */ String getDatabasePassword(); + /** + * Define the Jobs Database url in the form of + * jdbc:postgresql://${DATABASE_HOST}:${DATABASE_PORT/${DATABASE_DB}. Do not include username or + * password. + */ String getDatabaseUrl(); + /** + * Define the minimum flyway migration version the Jobs Database must be at. If this is not + * satisfied, applications will not successfully connect. Internal-use only. + */ String getJobsDatabaseMinimumFlywayMigrationVersion(); + /** + * Define the total time to wait for the Jobs Database to be initialized. This includes migrations. + */ long getJobsDatabaseInitializationTimeoutMs(); + /** + * Define the Configs Database user. Defaults to the Jobs Database user if empty. + */ String getConfigDatabaseUser(); + /** + * Define the Configs Database password. Defaults to the Jobs Database password if empty. + */ String getConfigDatabasePassword(); + /** + * Define the Configs Database url in the form of + * jdbc:postgresql://${DATABASE_HOST}:${DATABASE_PORT/${DATABASE_DB}. Defaults to the Jobs Database + * url if empty. + */ String getConfigDatabaseUrl(); + /** + * Define the minimum flyway migration version the Configs Database must be at. If this is not + * satisfied, applications will not successfully connect. Internal-use only. + */ String getConfigsDatabaseMinimumFlywayMigrationVersion(); + /** + * Define the total time to wait for the Configs Database to be initialized. This includes + * migrations. + */ long getConfigsDatabaseInitializationTimeoutMs(); + /** + * Define if the Bootloader should run migrations on start up. + */ boolean runDatabaseMigrationOnStartup(); // Airbyte Services + /** + * Define the url where Temporal is hosted at. Please include the port. Airbyte services use this + * information. + */ String getTemporalHost(); + /** + * Define the url where the Airbyte Server is hosted at. Airbyte services use this information. + * Manipulates the `INTERNAL_API_HOST` variable. + */ String getAirbyteApiHost(); + /** + * Define the port where the Airbyte Server is hosted at. Airbyte services use this information. + * Manipulates the `INTERNAL_API_HOST` variable. + */ int getAirbyteApiPort(); + /** + * Define the url the Airbyte Webapp is hosted at. Airbyte services use this information. + */ String getWebappUrl(); // Jobs + /** + * Define the number of attempts a sync will attempt before failing. + */ int getSyncJobMaxAttempts(); + /** + * Define the number of days a sync job will execute for before timing out. + */ int getSyncJobMaxTimeoutDays(); + /** + * Define the job container's minimum CPU usage. Units follow either Docker or Kubernetes, depending + * on the deployment. Defaults to none. + */ + String getJobMainContainerCpuRequest(); + + /** + * Define the job container's maximum CPU usage. Units follow either Docker or Kubernetes, depending + * on the deployment. Defaults to none. + */ + String getJobMainContainerCpuLimit(); + + /** + * Define the job container's minimum RAM usage. Units follow either Docker or Kubernetes, depending + * on the deployment. Defaults to none. + */ + String getJobMainContainerMemoryRequest(); + + /** + * Define the job container's maximum RAM usage. Units follow either Docker or Kubernetes, depending + * on the deployment. Defaults to none. + */ + String getJobMainContainerMemoryLimit(); + + // Jobs - Kube only + /** + * Define one or more Job pod tolerations. Tolerations are separated by ';'. Each toleration + * contains k=v pairs mentioning some/all of key, effect, operator and value and separated by `,`. + */ List getJobKubeTolerations(); + /** + * Define one or more Job pod node selectors. Each kv-pair is separated by a `,`. + */ Map getJobKubeNodeSelectors(); + /** + * Define the Job pod connector image pull policy. + */ String getJobKubeMainContainerImagePullPolicy(); + /** + * Define the Job pod connector image pull secret. Useful when hosting private images. + */ String getJobKubeMainContainerImagePullSecret(); + /** + * Define the Job pod socat image. + */ String getJobKubeSocatImage(); + /** + * Define the Job pod busybox image. + */ String getJobKubeBusyboxImage(); + /** + * Define the Job pod curl image pull. + */ String getJobKubeCurlImage(); + /** + * Define the Kubernetes namespace Job pods are created in. + */ String getJobKubeNamespace(); - String getJobMainContainerCpuRequest(); - - String getJobMainContainerCpuLimit(); - - String getJobMainContainerMemoryRequest(); - - String getJobMainContainerMemoryLimit(); - // Logging/Monitoring/Tracking + /** + * Define either S3, Minio or GCS as a logging backend. Kubernetes only. Multiple variables are + * involved here. Please see {@link CloudStorageConfigs} for more info. + */ LogConfigs getLogConfigs(); + /** + * Define either S3, Minio or GCS as a state storage backend. Multiple variables are involved here. + * Please see {@link CloudStorageConfigs} for more info. + */ CloudStorageConfigs getStateStorageCloudConfigs(); + /** + * Determine if Datadog tracking events should be published. Mainly for Airbyte internal use. + */ boolean getPublishMetrics(); + /** + * Define whether to publish tracking events to Segment or log-only. Airbyte internal use. + */ TrackingStrategy getTrackingStrategy(); // APPLICATIONS // Worker + /** + * Define the maximum number of workers each Airbyte Worker container supports. Multiple variables + * are involved here. Please see {@link MaxWorkersConfig} for more info. + */ MaxWorkersConfig getMaxWorkers(); + // Worker - Kube only + /** + * Define the local ports the Airbyte Worker pod uses to connect to the various Job pods. + */ Set getTemporalWorkerPorts(); // Scheduler + /** + * Define how and how often the Scheduler sweeps its local disk for old configs. Multiple variables + * are involved here. Please see {@link WorkspaceRetentionConfig} for more info. + */ WorkspaceRetentionConfig getWorkspaceRetentionConfig(); + /** + * Define the maximum number of concurrent jobs the Scheduler schedules. Defaults to 5. + */ String getSubmitterNumThreads(); // Container Orchestrator - + /** + * Define if Airbyte should use Scheduler V2. Internal-use only. + */ boolean getContainerOrchestratorEnabled(); enum TrackingStrategy { diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java index 327cf0196c99a..e5ab6f06a5fa7 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java @@ -84,6 +84,7 @@ public class EnvConfigs implements Configs { private static final String CONFIGS_DATABASE_INITIALIZATION_TIMEOUT_MS = "CONFIGS_DATABASE_INITIALIZATION_TIMEOUT_MS"; private static final String JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION = "JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION"; private static final String JOBS_DATABASE_INITIALIZATION_TIMEOUT_MS = "JOBS_DATABASE_INITIALIZATION_TIMEOUT_MS"; + private static final String CONTAINER_ORCHESTRATOR_ENABLED = "CONTAINER_ORCHESTRATOR_ENABLED"; private static final String STATE_STORAGE_S3_BUCKET_NAME = "STATE_STORAGE_S3_BUCKET_NAME"; private static final String STATE_STORAGE_S3_REGION = "STATE_STORAGE_S3_REGION"; @@ -544,7 +545,7 @@ public String getSubmitterNumThreads() { @Override public boolean getContainerOrchestratorEnabled() { - return getEnvOrDefault("CONTAINER_ORCHESTRATOR_ENABLED", false, Boolean::valueOf); + return getEnvOrDefault(CONTAINER_ORCHESTRATOR_ENABLED, false, Boolean::valueOf); } // Helpers diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 66334fcb70e36..74f9daae815a7 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -256,6 +256,7 @@ * [Change Data Capture (CDC)](understanding-airbyte/cdc.md) * [Namespaces](understanding-airbyte/namespaces.md) * [Json to Avro Conversion](understanding-airbyte/json-avro-conversion.md) + * [Configuring Airbyte](understanding-airbyte/configuring-airbyte.md) * [Glossary of Terms](understanding-airbyte/glossary.md) * [API documentation](api-documentation.md) * [Project Overview](project-overview/README.md) diff --git a/docs/operator-guides/scaling-airbyte.md b/docs/operator-guides/scaling-airbyte.md index 9bfa0eade16dc..aeefa8da6fd13 100644 --- a/docs/operator-guides/scaling-airbyte.md +++ b/docs/operator-guides/scaling-airbyte.md @@ -53,9 +53,9 @@ This is a **non-issue** for users running Airbyte Docker. ### Temporal DB -Temporal maintains multiple idle connexions. By the default value is `20` and you may want to lower or increase this number. One issue we noticed is +Temporal maintains multiple idle connections. By the default value is `20` and you may want to lower or increase this number. One issue we noticed is that temporal creates multiple pools and the number specified in the `SQL_MAX_IDLE_CONNS` environment variable of the `docker.compose.yaml` file -might end up allowing 4-5 times more connexions than expected. +might end up allowing 4-5 times more connections than expected. If you want tho increase the amount of allowed idle connexion, you will also need to increase `SQL_MAX_CONNS` as well because `SQL_MAX_IDLE_CONNS` is capped by `SQL_MAX_CONNS`. diff --git a/docs/understanding-airbyte/configuring-airbyte.md b/docs/understanding-airbyte/configuring-airbyte.md new file mode 100644 index 0000000000000..45fbc6090f158 --- /dev/null +++ b/docs/understanding-airbyte/configuring-airbyte.md @@ -0,0 +1,120 @@ +# Configuring Airbyte + +This section covers how to configure Airbyte, and the various configuration Airbyte accepts. + +Configuration is currently via environment variables. See the below section on how to modify these variables. + +## Docker Deployments + +The recommended way to run an Airbyte Docker deployment is via the Airbyte repo's `docker-compose.yaml` and `.env` file. + +To configure the default Airbyte Docker deployment, modify the bundled `.env` file. The `docker-compose.yaml` file injects appropriate variables into +the containers. + +If you want to manage your own docker files, please refer to Airbyte's docker file to ensure applications get the correct variables. + +## Kubernetes Deployments + +The recommended way to run an Airbyte Kubernetes deployment is via the `Kustomize` overlays. + +We recommend using the overlays in the `stable` directory as these have preset resource limits. + +To configure the default Airbyte Kubernetes deployment, modify the `.env` in the respective directory. Each application will consume the appropriate +env var from a generated configmap. + +If you want to manage your own Kube manifests, please refer to the various `Kustomize` overlays for examples. + +## Reference + +The following are the possible configuration options organised by deployment type and services. + +Internal-only variables have been omitted for clarity. See `Configs.java` for a full list. + +Be careful using variables marked as `alpha` as they aren't meant for public consumption. + +### Shared + +The following variables are relevant to both Docker and Kubernetes. + +#### Core +1. `AIRBYTE_VERSION` - Defines the Airbyte deployment version. +2. `SPEC_CACHE_BUCKET` - Defines the bucket for caching specs. This immensely speeds up spec operations. This is updated when new versions are published. +3. `WORKER_ENVIRONMENT` - Defines if the deployment is Docker or Kubernetes. Airbyte behaves accordingly. +4. `CONFIG_ROOT` - Defines the configs directory. Applies only to Docker, and is present in Kubernetes for backward compatibility. +5. `WORKSPACE_ROOT` - Defines the Airbyte workspace directory. Applies only to Docker, and is present in Kubernetes for backward compatibility. + +#### Secrets +1. `SECRET_STORE_GCP_PROJECT_ID` - Defines the GCP Project to store secrets in. Alpha support. +2. `SECRET_STORE_GCP_CREDENTIALS` - Define the JSON credentials used to read/write Airbyte Configuration to Google Secret Manager. These credentials must have Secret Manager Read/Write access. Alpha support. +3. `SECRET_PERSISTENCE_TYPE` - Defines the Secret Persistence type. Defaults to NONE. Set to GOOGLE_SECRET_MANAGER to use Google Secret Manager. Set to TESTING_CONFIG_DB_TABLE to use the database as a test. Alpha support. Undefined behavior will result if this is turned on and then off. + +#### Database +1. `DATABASE_USER` - Define the Jobs Database user. +2. `DATABASE_PASSWORD` - Define the Jobs Database password. +3. `DATABASE_URL` - Define the Jobs Database url in the form of `jdbc:postgresql://${DATABASE_HOST}:${DATABASE_PORT/${DATABASE_DB}`. Do not include username or password. +4. `JOBS_DATABASE_INITIALIZATION_TIMEOUT_MS` - Define the total time to wait for the Jobs Database to be initialized. This includes migrations. +5. `CONFIG_DATABASE_USER` - Define the Configs Database user. Defaults to the Jobs Database user if empty. +6. `CONFIG_DATABASE_PASSWORD` - Define the Configs Database password. Defaults to the Jobs Database password if empty. +7. `CONFIG_DATABASE_URL` - Define the Configs Database url in the form of `jdbc:postgresql://${DATABASE_HOST}:${DATABASE_PORT/${DATABASE_DB}`. Defaults to the Jobs Database url if empty. +8. `CONFIG_DATABASE_INITIALIZATION_TIMEOUT_MS` - Define the total time to wait for the Configs Database to be initialized. This includes migrations. +9. `RUN_DATABASE_MIGRATION_ON_STARTUP` - Define if the Bootloader should run migrations on start up. + +#### Airbyte Services +1. `TEMPORAL_HOST` - Define the url where Temporal is hosted at. Please include the port. Airbyte services use this information. +2. `INTERNAL_API_HOST` - Define the url where the Airbyte Server is hosted at. Please include the port. Airbyte services use this information. +3. `WEBAPP_URL` - Define the url the Airbyte Webapp is hosted at. Please include the port. Airbyte services use this information. + +#### Jobs +1. `SYNC_JOB_MAX_ATTEMPTS` - Define the number of attempts a sync will attempt before failing. +2. `SYNC_JOB_MAX_TIMEOUT_DAYS` - Define the number of days a sync job will execute for before timing out. +3. `JOB_MAIN_CONTAINER_CPU_REQUEST` - Define the job container's minimum CPU usage. Units follow either Docker or Kubernetes, depending on the deployment. Defaults to none. +4. `JOB_MAIN_CONTAINER_CPU_LIMIT` - Define the job container's maximum CPU usage. Units follow either Docker or Kubernetes, depending on the deployment. Defaults to none. +5. `JOB_MAIN_CONTAINER_MEMORY_REQUEST` - Define the job container's minimum RAM usage. Units follow either Docker or Kubernetes, depending on the deployment. Defaults to none. +6. `JOB_MAIN_CONTAINER_MEMORY_LIMIT` - Define the job container's maximum RAM usage. Units follow either Docker or Kubernetes, depending on the deployment. Defaults to none. + +#### Logging +1. `LOG_LEVEL` - Define log levels. Defaults to INFO. This value is expected to be one of the various Log4J log levels. + +#### Worker +1. `MAX_SPEC_WORKERS` - Define the maximum number of Spec workers each Airbyte Worker container can support. Defaults to 5. +2. `MAX_CHECK_WORKERS` - Define the maximum number of Check workers each Airbyte Worker container can support. Defaults to 5. +3. `MAX_SYNC_WORKERS` - Define the maximum number of Sync workers each Airbyte Worker container can support. Defaults to 5. +4. `MAX_DISCOVER_WORKERS` - Define the maximum number of Discover workers each Airbyte Worker container can support. Defaults to 5. + +#### Scheduler +1. `SUBMITTER_NUM_THREADS` - Define the maximum number of concurrent jobs the Scheduler schedules. Defaults to 5. +2. `MINIMUM_WORKSPACE_RETENTION_DAYS` - Defines the minimum configuration file age for sweeping. The Scheduler will do it's best to now sweep files younger than this. Defaults to 1 day. +3. `MAXIMUM_WORKSPACE_RETENTION_DAYS` - Defines the oldest un-swept configuration file age. Files older than this will definitely be swept. Defaults to 60 days. +4. `MAXIMUM_WORKSPACE_SIZE_MB` - Defines the workspace size sweeping will continue until. Defaults to 5GB. + +### Docker-Only +1. `WORKSPACE_DOCKER_MOUNT` - Defines the name of the Airbyte docker volume. +2. `DOCKER_NETWORK` - Defines the docker network the new Scheduler launches jobs on. +3. `LOCAL_DOCKER_MOUNT` - Defines the name of the docker mount that is used for local file handling. On Docker, this allows connector pods to interact with a volume for "local file" operations. + +### Kubernetes-Only +#### Jobs +1. `JOB_KUBE_TOLERATIONS` - Define one or more Job pod tolerations. Tolerations are separated by ';'. Each toleration contains k=v pairs mentioning some/all of key, effect, operator and value and separated by `,`. +2. `JOB_KUBE_NODE_SELECTORS` - Define one or more Job pod node selectors. Each kv-pair is separated by a `,`. +3. `JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY` - Define the Job pod connector image pull policy. +4. `JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_SECRET` - Define the Job pod connector image pull secret. Useful when hosting private images. +5. `JOB_KUBE_SOCAT_IMAGE` - Define the Job pod socat image. +6. `JOB_KUBE_BUSYBOX_IMAGE` - Define the Job pod busybox image. +7. `JOB_KUBE_CURL_IMAGE` - Define the Job pod curl image pull. +8. `JOB_KUBE_NAMESPACE` - Define the Kubernetes namespace Job pods are created in. + +#### Worker +1. `TEMPORAL_WORKER_PORTS` - Define the local ports the Airbyte Worker pod uses to connect to the various Job pods. Port 9001 - 9040 are exposed by default in the Kustomize deployments. + +#### Logging +Note that Airbyte does not support logging to separate Cloud Storage providers. + +Please see [here](https://docs.airbyte.com/deploying-airbyte/on-kubernetes#configure-logs) for more information on configuring Kuberentes logging. + +1. `GCS_LOG_BUCKET` - Define the GCS bucket to store logs. +2. `S3_BUCKET` - Define the S3 bucket to store logs. +3. `S3_RREGION` - Define the S3 region the S3 log bucket is in. +4. `S3_AWS_KEY` - Define the key used to access the S3 log bucket. +5. `S3_AWS_SECRET` - Define the secret used to access the S3 log bucket. +6. `S3_MINIO_ENDPOINT` - Define the url Minio is hosted at so Airbyte can use Minio to store logs. +7. `S3_PATH_STYLE_ACCESS` - Set to `true` if using Minio to store logs. Empty otherwise. From 8d34062a4f7492f7d606a74879a3e1210425920d Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Tue, 4 Jan 2022 18:10:49 +0800 Subject: [PATCH 022/215] Move configuring airbyte to operator guides. (#9272) --- docs/SUMMARY.md | 2 +- .../configuring-airbyte.md | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename docs/{understanding-airbyte => operator-guides}/configuring-airbyte.md (100%) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 74f9daae815a7..34ff335e9cf59 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -28,6 +28,7 @@ * [Transformations with SQL (Part 1/3)](operator-guides/transformation-and-normalization/transformations-with-sql.md) * [Transformations with dbt (Part 2/3)](operator-guides/transformation-and-normalization/transformations-with-dbt.md) * [Transformations with Airbyte (Part 3/3)](operator-guides/transformation-and-normalization/transformations-with-airbyte.md) + * [Configuring Airbyte](operator-guides/configuring-airbyte.md) * [Scaling Airbyte](operator-guides/scaling-airbyte.md) * [Connector Catalog](integrations/README.md) * [Sources](integrations/sources/README.md) @@ -256,7 +257,6 @@ * [Change Data Capture (CDC)](understanding-airbyte/cdc.md) * [Namespaces](understanding-airbyte/namespaces.md) * [Json to Avro Conversion](understanding-airbyte/json-avro-conversion.md) - * [Configuring Airbyte](understanding-airbyte/configuring-airbyte.md) * [Glossary of Terms](understanding-airbyte/glossary.md) * [API documentation](api-documentation.md) * [Project Overview](project-overview/README.md) diff --git a/docs/understanding-airbyte/configuring-airbyte.md b/docs/operator-guides/configuring-airbyte.md similarity index 100% rename from docs/understanding-airbyte/configuring-airbyte.md rename to docs/operator-guides/configuring-airbyte.md From b8c17f66265a7d2160c6326d7831588f3f5aba10 Mon Sep 17 00:00:00 2001 From: Augustin Date: Tue, 4 Jan 2022 12:13:52 +0100 Subject: [PATCH 023/215] add preinstall and preupgrade to env-configmap + define hook weights (#9250) --- charts/airbyte/templates/bootloader/pod.yaml | 1 + charts/airbyte/templates/env-configmap.yaml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/charts/airbyte/templates/bootloader/pod.yaml b/charts/airbyte/templates/bootloader/pod.yaml index 89982628892eb..7c73ccdd5e813 100644 --- a/charts/airbyte/templates/bootloader/pod.yaml +++ b/charts/airbyte/templates/bootloader/pod.yaml @@ -7,6 +7,7 @@ metadata: {{- include "airbyte.labels" . | nindent 4 }} annotations: helm.sh/hook: pre-install,pre-upgrade + helm.sh/hook-weight: "0" spec: restartPolicy: Never containers: diff --git a/charts/airbyte/templates/env-configmap.yaml b/charts/airbyte/templates/env-configmap.yaml index 8d7a4d56f0a66..073b20249ac05 100644 --- a/charts/airbyte/templates/env-configmap.yaml +++ b/charts/airbyte/templates/env-configmap.yaml @@ -2,6 +2,9 @@ apiVersion: v1 kind: ConfigMap metadata: name: airbyte-env + annotations: + helm.sh/hook: pre-install,pre-upgrade + helm.sh/hook-weight: "-1" data: AIRBYTE_VERSION: {{ .Values.version | default .Chart.AppVersion }} API_URL: {{ .Values.webapp.api.url }} From c3c38a1e055613cf30d80983afcd25590fb8d504 Mon Sep 17 00:00:00 2001 From: ycherniaiev <94798230+ycherniaiev@users.noreply.github.com> Date: Tue, 4 Jan 2022 13:39:54 +0200 Subject: [PATCH 024/215] Upd (#9270) --- airbyte-integrations/connectors/source-jdbc/Dockerfile | 2 +- .../connectors/source-jdbc/src/main/resources/spec.json | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-jdbc/Dockerfile b/airbyte-integrations/connectors/source-jdbc/Dockerfile index 0b57d8430ca93..8f44d096053e1 100644 --- a/airbyte-integrations/connectors/source-jdbc/Dockerfile +++ b/airbyte-integrations/connectors/source-jdbc/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-jdbc COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.3.2 +LABEL io.airbyte.version=0.3.3 LABEL io.airbyte.name=airbyte/source-jdbc diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/resources/spec.json b/airbyte-integrations/connectors/source-jdbc/src/main/resources/spec.json index 5460d99cb862e..83e56422038d3 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-jdbc/src/main/resources/spec.json @@ -8,16 +8,19 @@ "additionalProperties": false, "properties": { "username": { - "description": "Username to use to access the database.", + "title": "Username", + "description": "The username which is used to access the database.", "type": "string" }, "password": { - "description": "Password associated with the username.", + "title": "Password", + "description": "The password associated with this username.", "type": "string", "airbyte_secret": true }, "jdbc_url": { - "description": "JDBC formatted url. See the standard here.", + "title": "JDBC URL", + "description": "JDBC formatted URL. See the standard here.", "type": "string" } } From 9de7443ec0f7b5f4f5a8e232ffa95de66e3547b8 Mon Sep 17 00:00:00 2001 From: Iryna Grankova <87977540+igrankova@users.noreply.github.com> Date: Tue, 4 Jan 2022 14:10:51 +0200 Subject: [PATCH 025/215] Update fields in destination-connectors specifications: mssql, mssql-strict-encrypt (#9158) * Files title/description update for issue # 8950 * Version update for issue # 8950 * Changelogs update for PR #9158 * update destination_specs.yaml with new version * update version for destination-mssql-strict-encrypt * fix destination-mssql-strict-encrypt integration tests Co-authored-by: mkhokh --- .../d4353156-9217-4cad-8dd7-c108fd4f74cf.json | 2 +- .../seed/destination_definitions.yaml | 2 +- .../resources/seed/destination_specs.yaml | 23 ++++++++++--------- .../Dockerfile | 2 +- .../resources/expected_spec.json | 16 ++++++------- .../connectors/destination-mssql/Dockerfile | 2 +- .../src/main/resources/spec.json | 18 +++++++-------- docs/integrations/destinations/mssql.md | 2 ++ 8 files changed, 35 insertions(+), 32 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/d4353156-9217-4cad-8dd7-c108fd4f74cf.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/d4353156-9217-4cad-8dd7-c108fd4f74cf.json index 4040f03131852..041999fea8189 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/d4353156-9217-4cad-8dd7-c108fd4f74cf.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/d4353156-9217-4cad-8dd7-c108fd4f74cf.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "d4353156-9217-4cad-8dd7-c108fd4f74cf", "name": "MS SQL Server", "dockerRepository": "airbyte/destination-mssql", - "dockerImageTag": "0.1.12", + "dockerImageTag": "0.1.13", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/mssql", "icon": "mssql.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 720a4e043b0f3..c03cc2c715d3f 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -102,7 +102,7 @@ - name: MS SQL Server destinationDefinitionId: d4353156-9217-4cad-8dd7-c108fd4f74cf dockerRepository: airbyte/destination-mssql - dockerImageTag: 0.1.12 + dockerImageTag: 0.1.13 documentationUrl: https://docs.airbyte.io/integrations/destinations/mssql icon: mssql.svg - name: MeiliSearch diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 311d747898828..03dc563b6f39d 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -2015,7 +2015,7 @@ supportsDBT: false supported_destination_sync_modes: - "append" -- dockerImage: "airbyte/destination-mssql:0.1.12" +- dockerImage: "airbyte/destination-mssql:0.1.13" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/mssql" connectionSpecification: @@ -2032,12 +2032,12 @@ properties: host: title: "Host" - description: "Hostname of the database." + description: "The host name of the MSSQL database." type: "string" order: 0 port: title: "Port" - description: "Port of the database." + description: "The port of the MSSQL database." type: "integer" minimum: 0 maximum: 65536 @@ -2047,7 +2047,7 @@ order: 1 database: title: "DB Name" - description: "Name of the database." + description: "The name of the MSSQL database." type: "string" order: 2 schema: @@ -2062,24 +2062,25 @@ order: 3 username: title: "User" - description: "Username to use to access the database." + description: "The username which is used to access the database." type: "string" order: 4 password: title: "Password" - description: "Password associated with the username." + description: "The password associated with this username." type: "string" airbyte_secret: true order: 5 ssl_method: title: "SSL Method" type: "object" - description: "Encryption method to use when communicating with the database" + description: "The encryption method which is used to communicate with the\ + \ database." order: 6 oneOf: - title: "Unencrypted" additionalProperties: false - description: "Data transfer will not be encrypted." + description: "The data transfer will not be encrypted." required: - "ssl_method" type: "object" @@ -2091,8 +2092,8 @@ default: "unencrypted" - title: "Encrypted (trust server certificate)" additionalProperties: false - description: "Use the cert provided by the server without verification.\ - \ (For testing purposes only!)" + description: "Use the certificate provided by the server without verification.\ + \ (For testing purposes only!)" required: - "ssl_method" type: "object" @@ -2104,7 +2105,7 @@ default: "encrypted_trust_server_certificate" - title: "Encrypted (verify certificate)" additionalProperties: false - description: "Verify and use the cert provided by the server." + description: "Verify and use the certificate provided by the server." required: - "ssl_method" - "trustStoreName" diff --git a/airbyte-integrations/connectors/destination-mssql-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/destination-mssql-strict-encrypt/Dockerfile index c314e732dda3e..8cb2dcfa0c8b8 100644 --- a/airbyte-integrations/connectors/destination-mssql-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/destination-mssql-strict-encrypt/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-mssql-strict-encrypt COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/destination-mssql-strict-encrypt diff --git a/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/resources/expected_spec.json b/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/resources/expected_spec.json index e372c869cd854..099c49cf7e78a 100644 --- a/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/resources/expected_spec.json +++ b/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/resources/expected_spec.json @@ -13,13 +13,13 @@ "properties": { "host": { "title": "Host", - "description": "Hostname of the database.", + "description": "The host name of the MSSQL database.", "type": "string", "order": 0 }, "port": { "title": "Port", - "description": "Port of the database.", + "description": "The port of the MSSQL database.", "type": "integer", "minimum": 0, "maximum": 65536, @@ -29,7 +29,7 @@ }, "database": { "title": "DB Name", - "description": "Name of the database.", + "description": "The name of the MSSQL database.", "type": "string", "order": 2 }, @@ -43,13 +43,13 @@ }, "username": { "title": "User", - "description": "Username to use to access the database.", + "description": "The username which is used to access the database.", "type": "string", "order": 4 }, "password": { "title": "Password", - "description": "Password associated with the username.", + "description": "The password associated with this username.", "type": "string", "airbyte_secret": true, "order": 5 @@ -57,13 +57,13 @@ "ssl_method": { "title": "SSL Method", "type": "object", - "description": "Encryption method to use when communicating with the database", + "description": "The encryption method which is used to communicate with the database.", "order": 6, "oneOf": [ { "title": "Encrypted (trust server certificate)", "additionalProperties": false, - "description": "Use the cert provided by the server without verification. (For testing purposes only!)", + "description": "Use the certificate provided by the server without verification. (For testing purposes only!)", "required": ["ssl_method"], "type": "object", "properties": { @@ -77,7 +77,7 @@ { "title": "Encrypted (verify certificate)", "additionalProperties": false, - "description": "Verify and use the cert provided by the server.", + "description": "Verify and use the certificate provided by the server.", "required": ["ssl_method", "trustStoreName", "trustStorePassword"], "type": "object", "properties": { diff --git a/airbyte-integrations/connectors/destination-mssql/Dockerfile b/airbyte-integrations/connectors/destination-mssql/Dockerfile index 6713ce07a7b95..01965dcec1853 100644 --- a/airbyte-integrations/connectors/destination-mssql/Dockerfile +++ b/airbyte-integrations/connectors/destination-mssql/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-mssql COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.12 +LABEL io.airbyte.version=0.1.13 LABEL io.airbyte.name=airbyte/destination-mssql diff --git a/airbyte-integrations/connectors/destination-mssql/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-mssql/src/main/resources/spec.json index 79c2f4959cf90..95a118feab69a 100644 --- a/airbyte-integrations/connectors/destination-mssql/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-mssql/src/main/resources/spec.json @@ -13,13 +13,13 @@ "properties": { "host": { "title": "Host", - "description": "Hostname of the database.", + "description": "The host name of the MSSQL database.", "type": "string", "order": 0 }, "port": { "title": "Port", - "description": "Port of the database.", + "description": "The port of the MSSQL database.", "type": "integer", "minimum": 0, "maximum": 65536, @@ -29,7 +29,7 @@ }, "database": { "title": "DB Name", - "description": "Name of the database.", + "description": "The name of the MSSQL database.", "type": "string", "order": 2 }, @@ -43,13 +43,13 @@ }, "username": { "title": "User", - "description": "Username to use to access the database.", + "description": "The username which is used to access the database.", "type": "string", "order": 4 }, "password": { "title": "Password", - "description": "Password associated with the username.", + "description": "The password associated with this username.", "type": "string", "airbyte_secret": true, "order": 5 @@ -57,13 +57,13 @@ "ssl_method": { "title": "SSL Method", "type": "object", - "description": "Encryption method to use when communicating with the database", + "description": "The encryption method which is used to communicate with the database.", "order": 6, "oneOf": [ { "title": "Unencrypted", "additionalProperties": false, - "description": "Data transfer will not be encrypted.", + "description": "The data transfer will not be encrypted.", "required": ["ssl_method"], "type": "object", "properties": { @@ -77,7 +77,7 @@ { "title": "Encrypted (trust server certificate)", "additionalProperties": false, - "description": "Use the cert provided by the server without verification. (For testing purposes only!)", + "description": "Use the certificate provided by the server without verification. (For testing purposes only!)", "required": ["ssl_method"], "type": "object", "properties": { @@ -91,7 +91,7 @@ { "title": "Encrypted (verify certificate)", "additionalProperties": false, - "description": "Verify and use the cert provided by the server.", + "description": "Verify and use the certificate provided by the server.", "required": ["ssl_method", "trustStoreName", "trustStorePassword"], "type": "object", "properties": { diff --git a/docs/integrations/destinations/mssql.md b/docs/integrations/destinations/mssql.md index 5a7d0f02e1f1a..fc08fc302120f 100644 --- a/docs/integrations/destinations/mssql.md +++ b/docs/integrations/destinations/mssql.md @@ -119,6 +119,7 @@ Using this feature requires additional configuration, when creating the source. | Version | Date | Pull Request | Subject | |:--------| :--- |:---------------------------------------------------------| :--- | +| 0.1.13 | 2021-12-28 | [\#9158](https://github.com/airbytehq/airbyte/pull/9158) | Update connector fields title/description | | 0.1.12 | 2021-12-01 | [\#8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | | 0.1.11 | 2021-11-08 | [#7719](https://github.com/airbytehq/airbyte/pull/7719) | Improve handling of wide rows by buffering records based on their byte size rather than their count | | 0.1.10 | 2021-10-11 | [\#6877](https://github.com/airbytehq/airbyte/pull/6877) | Add `normalization` capability, add `append+deduplication` sync mode | @@ -135,5 +136,6 @@ Using this feature requires additional configuration, when creating the source. ### Changelog (Strict Encrypt) | Version | Date | Pull Request | Subject | |:--------| :--- | :--- | :--- | +| 0.1.3 | 2021-12-28 | [\#9158](https://github.com/airbytehq/airbyte/pull/9158) | Update connector fields title/description | | 0.1.2 | 2021-12-01 | [8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | | 0.1.1 | 2021-11-08 | [#7719](https://github.com/airbytehq/airbyte/pull/7719) | Improve handling of wide rows by buffering records based on their byte size rather than their count | From cece4c69ce6da66cde209315f6de85f9753627f5 Mon Sep 17 00:00:00 2001 From: Titas Skrebe Date: Tue, 4 Jan 2022 16:02:00 +0200 Subject: [PATCH 026/215] Increase Pod sidecars' memory limits (#9266) --- .../main/java/io/airbyte/workers/process/KubePodProcess.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index c90f13449686b..0f5d0668d8f49 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -95,9 +95,10 @@ public class KubePodProcess extends Process { private static final String INIT_CONTAINER_NAME = "init"; public static final Duration DEFAULT_STATUS_CHECK_INTERVAL = Duration.ofSeconds(30); - private static final String DEFAULT_MEMORY_LIMIT = "25Mi"; + private static final String DEFAULT_MEMORY_REQUEST = "25Mi"; + private static final String DEFAULT_MEMORY_LIMIT = "50Mi"; private static final ResourceRequirements DEFAULT_SIDECAR_RESOURCES = new ResourceRequirements() - .withMemoryLimit(DEFAULT_MEMORY_LIMIT).withMemoryRequest(DEFAULT_MEMORY_LIMIT); + .withMemoryLimit(DEFAULT_MEMORY_LIMIT).withMemoryRequest(DEFAULT_MEMORY_REQUEST); private static final String PIPES_DIR = "/pipes"; private static final String STDIN_PIPE_FILE = PIPES_DIR + "/stdin"; From 5be2424c77e7d44af23b9beb524d10f22f60a6ee Mon Sep 17 00:00:00 2001 From: Deividas J Date: Tue, 4 Jan 2022 16:17:16 +0200 Subject: [PATCH 027/215] Source Amazon Seller Partner: fix NoAuth deprecation warning (#9236) --- .../e55879a8-0ef8-4557-abcf-ab34c53ec460.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-amazon-seller-partner/Dockerfile | 2 +- .../source_amazon_seller_partner/streams.py | 4 ++-- ...ams_rate_limits.py => test_reports_streams_rate_limits.py} | 3 +-- .../unit_tests/test_transform_function.py | 3 +-- docs/integrations/sources/amazon-seller-partner.md | 1 + 8 files changed, 9 insertions(+), 10 deletions(-) rename airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/{test_repots_streams_rate_limits.py => test_reports_streams_rate_limits.py} (96%) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json index 379a6a3c8d55b..e175948b1c018 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "e55879a8-0ef8-4557-abcf-ab34c53ec460", "name": "Amazon Seller Partner", "dockerRepository": "airbyte/source-amazon-seller-partner", - "dockerImageTag": "0.2.9", + "dockerImageTag": "0.2.10", "documentationUrl": "https://docs.airbyte.io/integrations/sources/amazon-seller-partner", "icon": "amazonsellerpartner.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 5e160473d5b31..85cc216843a43 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -22,7 +22,7 @@ - name: Amazon Seller Partner sourceDefinitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460 dockerRepository: airbyte/source-amazon-seller-partner - dockerImageTag: 0.2.9 + dockerImageTag: 0.2.10 sourceType: api documentationUrl: https://docs.airbyte.io/integrations/sources/amazon-seller-partner icon: amazonsellerpartner.svg diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 3b0d4c2c573be..d93324b6831ee 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -157,7 +157,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-amazon-seller-partner:0.2.9" +- dockerImage: "airbyte/source-amazon-seller-partner:0.2.10" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" changelogUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile index 7c46452143632..0b67d75a64e70 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.9 +LABEL io.airbyte.version=0.2.10 LABEL io.airbyte.name=airbyte/source-amazon-seller-partner diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py index 569733f958979..c92a199217459 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py @@ -18,7 +18,7 @@ from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.http import HttpStream -from airbyte_cdk.sources.streams.http.auth import HttpAuthenticator, NoAuth +from airbyte_cdk.sources.streams.http.auth import HttpAuthenticator from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException, RequestBodyException from airbyte_cdk.sources.streams.http.http import BODY_REQUEST_METHODS from airbyte_cdk.sources.streams.http.rate_limiting import default_backoff_handler @@ -152,7 +152,7 @@ def __init__( period_in_days: Optional[int], report_options: Optional[str], max_wait_seconds: Optional[int], - authenticator: HttpAuthenticator = NoAuth(), + authenticator: HttpAuthenticator = None, ): self._authenticator = authenticator self._session = requests.Session() diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_repots_streams_rate_limits.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_streams_rate_limits.py similarity index 96% rename from airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_repots_streams_rate_limits.py rename to airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_streams_rate_limits.py index 8b47b2d9ac949..c1d269dde62cc 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_repots_streams_rate_limits.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_streams_rate_limits.py @@ -6,7 +6,6 @@ import pytest import requests -from airbyte_cdk.sources.streams.http.auth import NoAuth from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException from source_amazon_seller_partner.auth import AWSSignature from source_amazon_seller_partner.streams import MerchantListingsReports @@ -26,7 +25,7 @@ def reports_stream(): aws_signature=aws_signature, replication_start_date="2017-01-25T00:00:00Z", marketplace_id="id", - authenticator=NoAuth(), + authenticator=None, period_in_days=0, report_options=None, max_wait_seconds=500, diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py index 9acc30f8f51c3..160dedee9fcb5 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py @@ -3,7 +3,6 @@ # import pytest -from airbyte_cdk.sources.streams.http.auth import NoAuth from source_amazon_seller_partner.auth import AWSSignature from source_amazon_seller_partner.streams import SellerFeedbackReports @@ -21,7 +20,7 @@ def reports_stream(marketplace_id): aws_signature=aws_signature, replication_start_date="2010-01-25T00:00:00Z", marketplace_id=marketplace_id, - authenticator=NoAuth(), + authenticator=None, period_in_days=0, report_options=None, max_wait_seconds=0, diff --git a/docs/integrations/sources/amazon-seller-partner.md b/docs/integrations/sources/amazon-seller-partner.md index 90aeb0b3def04..b5f89136572b6 100644 --- a/docs/integrations/sources/amazon-seller-partner.md +++ b/docs/integrations/sources/amazon-seller-partner.md @@ -64,6 +64,7 @@ Information about rate limits you may find [here](https://github.com/amzn/sellin | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| `0.2.10` | 2021-12-31 | [\#9236](https://github.com/airbytehq/airbyte/pull/9236) | Fix NoAuth deprecation warning | | `0.2.9` | 2021-12-30 | [\#9212](https://github.com/airbytehq/airbyte/pull/9212) | Normalize GET_SELLER_FEEDBACK_DATA header field names | | `0.2.8` | 2021-12-22 | [\#8810](https://github.com/airbytehq/airbyte/pull/8810) | Fix GET_SELLER_FEEDBACK_DATA Date cursor field format | | `0.2.7` | 2021-12-21 | [\#9002](https://github.com/airbytehq/airbyte/pull/9002) | Extract REPORTS_MAX_WAIT_SECONDS to configurable parameter | From 103e003adfd172eb14e8b4cd896f6162e72a2d2c Mon Sep 17 00:00:00 2001 From: "oleh.zorenko" <19872253+Zirochkaa@users.noreply.github.com> Date: Tue, 4 Jan 2022 16:31:59 +0200 Subject: [PATCH 028/215] =?UTF-8?q?=F0=9F=90=9B=20Source=20Delighted:=20Fi?= =?UTF-8?q?x=20pagination=20for=20`survey=5Fresponses`,=20`bounces`=20and?= =?UTF-8?q?=20`unsubscribes`=20streams=20(#9275)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 9271 Fix pagination for delighted source * 9271 Implement change request * 9271 Implement change request * 9271 Bump connector's version --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-delighted/Dockerfile | 2 +- .../acceptance-test-config.yml | 2 +- .../schemas/survey_responses.json | 44 +++++++++---------- .../source_delighted/source.py | 43 +++++++++--------- docs/integrations/sources/delighted.md | 3 +- 7 files changed, 48 insertions(+), 50 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 85cc216843a43..2102d59d4b408 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -147,7 +147,7 @@ - name: Delighted sourceDefinitionId: cc88c43f-6f53-4e8a-8c4d-b284baaf9635 dockerRepository: airbyte/source-delighted - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.1 documentationUrl: https://docs.airbyte.io/integrations/sources/delighted icon: delighted.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index d93324b6831ee..89940b728ca2e 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1280,7 +1280,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-delighted:0.1.0" +- dockerImage: "airbyte/source-delighted:0.1.1" spec: documentationUrl: "https://docsurl.com" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-delighted/Dockerfile b/airbyte-integrations/connectors/source-delighted/Dockerfile index 4853739994eb2..1e1396f91cea8 100644 --- a/airbyte-integrations/connectors/source-delighted/Dockerfile +++ b/airbyte-integrations/connectors/source-delighted/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-delighted diff --git a/airbyte-integrations/connectors/source-delighted/acceptance-test-config.yml b/airbyte-integrations/connectors/source-delighted/acceptance-test-config.yml index e8df991ce5376..5158a03f40509 100644 --- a/airbyte-integrations/connectors/source-delighted/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-delighted/acceptance-test-config.yml @@ -15,7 +15,7 @@ tests: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" empty_streams: ["bounces"] - incremental: # TODO if your connector does not implement incremental sync, remove this block + incremental: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" future_state_path: "integration_tests/abnormal_state.json" diff --git a/airbyte-integrations/connectors/source-delighted/source_delighted/schemas/survey_responses.json b/airbyte-integrations/connectors/source-delighted/source_delighted/schemas/survey_responses.json index 00d220722441c..2d97340ea9e8c 100644 --- a/airbyte-integrations/connectors/source-delighted/source_delighted/schemas/survey_responses.json +++ b/airbyte-integrations/connectors/source-delighted/source_delighted/schemas/survey_responses.json @@ -27,7 +27,21 @@ "type": ["null", "integer"] }, "person_properties": { - "type": "object" + "type": ["object", "null"], + "properties": { + "Delighted Source": { + "type": ["null", "string"] + }, + "Delighted Device Type": { + "type": ["null", "string"] + }, + "Delighted Operating System": { + "type": ["null", "string"] + }, + "Delighted Browser": { + "type": ["null", "string"] + } + } }, "notes": { "type": "array", @@ -76,12 +90,10 @@ "text": { "type": "string" } - }, - "required": ["id", "text"] + } } } - }, - "required": ["free_response", "scale", "select_one", "select_many"] + } }, "question": { "type": "object", @@ -95,26 +107,10 @@ "text": { "type": "string" } - }, - "required": ["id", "type", "text"] + } } - }, - "required": ["id", "value", "question"] + } } } - }, - "required": [ - "id", - "person", - "survey_type", - "score", - "comment", - "permalink", - "created_at", - "updated_at", - "person_properties", - "notes", - "tags", - "additional_answers" - ] + } } diff --git a/airbyte-integrations/connectors/source-delighted/source_delighted/source.py b/airbyte-integrations/connectors/source-delighted/source_delighted/source.py index 2f25139320201..80e738728afaa 100644 --- a/airbyte-integrations/connectors/source-delighted/source_delighted/source.py +++ b/airbyte-integrations/connectors/source-delighted/source_delighted/source.py @@ -23,6 +23,7 @@ class DelightedStream(HttpStream, ABC): # Page size limit = 100 + page = 1 # Define primary key to all streams as primary key primary_key = "id" @@ -32,12 +33,10 @@ def __init__(self, since: int, **kwargs): self.since = since def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: - # Getting next page link - next_page = response.links.get("next", None) - if next_page: - return dict(parse_qsl(urlparse(next_page.get("url")).query)) - else: - return None + response_data = response.json() + if len(response_data) == self.limit: + self.page += 1 + return {"page": self.page} def request_params( self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None @@ -49,8 +48,7 @@ def request_params( return params def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: - records = response.json() - yield from records + yield from response.json() class IncrementalDelightedStream(DelightedStream, ABC): @@ -77,19 +75,21 @@ def request_params(self, stream_state=None, **kwargs): class People(IncrementalDelightedStream): - def path( - self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None - ) -> str: + def path(self, **kwargs) -> str: return "people.json" + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + # Getting next page link + next_page = response.links.get("next", None) + if next_page: + return {"page_info": dict(parse_qsl(urlparse(next_page.get("url")).query)).get("page_info")} + class Unsubscribes(IncrementalDelightedStream): cursor_field = "unsubscribed_at" primary_key = "person_id" - def path( - self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None - ) -> str: + def path(self, **kwargs) -> str: return "unsubscribes.json" @@ -97,18 +97,14 @@ class Bounces(IncrementalDelightedStream): cursor_field = "bounced_at" primary_key = "person_id" - def path( - self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None - ) -> str: + def path(self, **kwargs) -> str: return "bounces.json" class SurveyResponses(IncrementalDelightedStream): cursor_field = "updated_at" - def path( - self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None - ) -> str: + def path(self, **kwargs) -> str: return "survey_responses.json" def request_params(self, stream_state=None, **kwargs): @@ -148,4 +144,9 @@ def check_connection(self, logger, config) -> Tuple[bool, any]: def streams(self, config: Mapping[str, Any]) -> List[Stream]: auth = self._get_authenticator(config) args = {"authenticator": auth, "since": config["since"]} - return [People(**args), Unsubscribes(**args), Bounces(**args), SurveyResponses(**args)] + return [ + Bounces(**args), + People(**args), + SurveyResponses(**args), + Unsubscribes(**args), + ] diff --git a/docs/integrations/sources/delighted.md b/docs/integrations/sources/delighted.md index 6c4865d00fe32..9402469ee0d7b 100644 --- a/docs/integrations/sources/delighted.md +++ b/docs/integrations/sources/delighted.md @@ -37,4 +37,5 @@ This connector supports `API PASSWORD` as the authentication method. | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| 0.1.0 | 2021-10-27 | [4551](https://github.com/airbytehq/airbyte/pull/4551) | Add Delighted source connector | \ No newline at end of file +| 0.1.1 | 2022-01-04 | [9275](https://github.com/airbytehq/airbyte/pull/9275) | Fix pagination handling for `survey_responses`, `bounces` and `unsubscribes` streams | +| 0.1.0 | 2021-10-27 | [4551](https://github.com/airbytehq/airbyte/pull/4551) | Add Delighted source connector | From c0a46c1987735993901febe646b5b285334c2110 Mon Sep 17 00:00:00 2001 From: Serhii Chvaliuk Date: Tue, 4 Jan 2022 17:04:36 +0200 Subject: [PATCH 029/215] BufferedStreamConsumerTest: remove non-determinism in size of generated test records (#9274) * generate records fixed 40 bytes of size * fix buffer flush Signed-off-by: Sergey Chvalyuk --- .../buffered_stream_consumer/BufferedStreamConsumer.java | 2 +- .../buffered_stream_consumer/BufferedStreamConsumerTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java index c9aee79f28a5d..7d5f78e14bae0 100644 --- a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java @@ -147,7 +147,7 @@ protected void acceptTracked(final AirbyteMessage message) throws Exception { // are serialized again when writing to // the destination long messageSizeInBytes = ByteUtils.getSizeInBytesForUTF8CharSet(Jsons.serialize(recordMessage.getData())); - if (bufferSizeInBytes + messageSizeInBytes >= maxQueueSizeInBytes) { + if (bufferSizeInBytes + messageSizeInBytes > maxQueueSizeInBytes) { LOGGER.info("Flushing buffer..."); flushQueueToDestination(); bufferSizeInBytes = 0; diff --git a/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java index 0b1c34ae743b5..01549f4b61b5f 100644 --- a/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java +++ b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java @@ -315,7 +315,7 @@ private static List generateRecords(final long targetSizeInBytes List output = Lists.newArrayList(); long bytesCounter = 0; for (int i = 0;; i++) { - JsonNode payload = Jsons.jsonNode(ImmutableMap.of("id", RandomStringUtils.randomAscii(7), "name", "human " + String.format("%5d", i))); + JsonNode payload = Jsons.jsonNode(ImmutableMap.of("id", RandomStringUtils.randomAlphabetic(7), "name", "human " + String.format("%8d", i))); long sizeInBytes = ByteUtils.getSizeInBytesForUTF8CharSet(Jsons.serialize(payload)); bytesCounter += sizeInBytes; AirbyteMessage airbyteMessage = new AirbyteMessage() From b475aa9a693610b0308abda5bec0d6bbade47727 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Tue, 4 Jan 2022 10:07:24 -0800 Subject: [PATCH 030/215] decrease cost of health check (#9288) --- airbyte-api/src/main/openapi/config.yaml | 4 ++-- .../airbyte/scheduler/app/SchedulerApp.java | 2 +- .../airbyte/server/apis/ConfigurationApi.java | 2 +- .../server/handlers/HealthCheckHandler.java | 22 +------------------ .../handlers/HealthCheckHandlerTest.java | 21 +++--------------- airbyte-webapp/src/config/defaultConfig.ts | 2 +- .../api/generated-api-html/index.html | 4 ++-- 7 files changed, 11 insertions(+), 46 deletions(-) diff --git a/airbyte-api/src/main/openapi/config.yaml b/airbyte-api/src/main/openapi/config.yaml index 7bc28ae6c92fe..602b45f2e897a 100644 --- a/airbyte-api/src/main/openapi/config.yaml +++ b/airbyte-api/src/main/openapi/config.yaml @@ -3198,9 +3198,9 @@ components: HealthCheckRead: type: object required: - - db + - available properties: - db: + available: type: boolean # General CheckConnectionRead: diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java index c8e3223057c69..b807255cf9017 100644 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java +++ b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java @@ -205,7 +205,7 @@ public static void waitForServer(final Configs configs) throws InterruptedExcept while (!isHealthy) { try { final HealthCheckRead healthCheck = apiClient.getHealthApi().getHealthCheck(); - isHealthy = healthCheck.getDb(); + isHealthy = healthCheck.getAvailable(); } catch (final ApiException e) { LOGGER.info("Waiting for server to become available..."); Thread.sleep(2000); diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index 9ecc6afe61386..05ec930a8ff70 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -214,7 +214,7 @@ public ConfigurationApi(final ConfigRepository configRepository, jobHistoryHandler, schedulerHandler, operationsHandler); - healthCheckHandler = new HealthCheckHandler(configRepository); + healthCheckHandler = new HealthCheckHandler(); archiveHandler = new ArchiveHandler( airbyteVersion, configRepository, diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/HealthCheckHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/HealthCheckHandler.java index ed360a10e5808..0495b3e40bd85 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/HealthCheckHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/HealthCheckHandler.java @@ -5,31 +5,11 @@ package io.airbyte.server.handlers; import io.airbyte.api.model.HealthCheckRead; -import io.airbyte.config.persistence.ConfigRepository; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class HealthCheckHandler { - private static final Logger LOGGER = LoggerFactory.getLogger(HealthCheckHandler.class); - - private final ConfigRepository configRepository; - - public HealthCheckHandler(final ConfigRepository configRepository) { - this.configRepository = configRepository; - } - - // todo (cgardens) - add more checks as we go. public HealthCheckRead health() { - boolean databaseHealth = false; - try { - configRepository.listStandardWorkspaces(true); - databaseHealth = true; - } catch (final Exception e) { - LOGGER.error("database health check failed."); - } - - return new HealthCheckRead().db(databaseHealth); + return new HealthCheckRead().available(true); } } diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/HealthCheckHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/HealthCheckHandlerTest.java index 0ae0ebea5ad34..2462c384b46bb 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/HealthCheckHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/HealthCheckHandlerTest.java @@ -5,31 +5,16 @@ package io.airbyte.server.handlers; import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; import io.airbyte.api.model.HealthCheckRead; -import io.airbyte.config.StandardWorkspace; -import io.airbyte.config.persistence.ConfigRepository; -import io.airbyte.validation.json.JsonValidationException; -import java.io.IOException; -import java.util.Collections; import org.junit.jupiter.api.Test; class HealthCheckHandlerTest { @Test - void testDbHealth() throws IOException, JsonValidationException { - final ConfigRepository configRepository = mock(ConfigRepository.class); - final HealthCheckHandler healthCheckHandler = new HealthCheckHandler(configRepository); - - // check db healthy - when(configRepository.listStandardWorkspaces(true)).thenReturn(Collections.singletonList(new StandardWorkspace())); - assertEquals(new HealthCheckRead().db(true), healthCheckHandler.health()); - - doThrow(IOException.class).when(configRepository).listStandardWorkspaces(true); - assertEquals(new HealthCheckRead().db(false), healthCheckHandler.health()); + void testDbHealth() { + final HealthCheckHandler healthCheckHandler = new HealthCheckHandler(); + assertEquals(new HealthCheckRead().available(true), healthCheckHandler.health()); } } diff --git a/airbyte-webapp/src/config/defaultConfig.ts b/airbyte-webapp/src/config/defaultConfig.ts index c245f3b9e8ea7..b187c6856c408 100644 --- a/airbyte-webapp/src/config/defaultConfig.ts +++ b/airbyte-webapp/src/config/defaultConfig.ts @@ -18,7 +18,7 @@ const features: Feature[] = [ const defaultConfig: Config = { ui: uiConfig, segment: { enabled: true, token: "" }, - healthCheckInterval: 10000, + healthCheckInterval: 20000, version: "dev", apiUrl: `${window.location.protocol}//${window.location.hostname}:8001/api/v1/`, integrationUrl: "/docs", diff --git a/docs/reference/api/generated-api-html/index.html b/docs/reference/api/generated-api-html/index.html index c1f96a772aa2b..bff6feaf45603 100644 --- a/docs/reference/api/generated-api-html/index.html +++ b/docs/reference/api/generated-api-html/index.html @@ -2784,7 +2784,7 @@

Return type

Example data

Content-Type: application/json
{
-  "db" : true
+  "available" : true
 }

Produces

@@ -7572,7 +7572,7 @@

DestinationUpdate - HealthCheckRead - Up

-
db
+
available
From ee26499d7db12a9ffdd58a5a1528180f608b6d42 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Tue, 4 Jan 2022 11:09:16 -0800 Subject: [PATCH 031/215] fix build error introduced by health check change (#9292) --- .../automaticMigrationAcceptance/MigrationAcceptanceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java b/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java index 90bade72f1934..fa1ba5a85e53f 100644 --- a/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java +++ b/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java @@ -319,7 +319,7 @@ private static void healthCheck(final ApiClient apiClient) { final HealthApi healthApi = new HealthApi(apiClient); try { final HealthCheckRead healthCheck = healthApi.getHealthCheck(); - assertTrue(healthCheck.getDb()); + assertTrue(healthCheck.getAvailable()); } catch (final ApiException e) { throw new RuntimeException("Health check failed, usually due to auto migration failure. Please check the logs for details."); } From 7776dbec8667b437c3a73447f07333428d1ba75a Mon Sep 17 00:00:00 2001 From: Anna Lvova <37615075+annalvova05@users.noreply.github.com> Date: Tue, 4 Jan 2022 22:14:57 +0100 Subject: [PATCH 032/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20Zendesk=20Sunsh?= =?UTF-8?q?ine:=20support=20oauth=20(#7976)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add oauth support * bump version * add java part * change date format * change spec * upd invalid_configs * bump version and format --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 127 ++++++++++++++-- .../source-zendesk-sunshine/Dockerfile | 2 +- .../acceptance-test-config.yml | 12 ++ .../invalid_config_api_token.json | 9 ++ .../invalid_config_oauth.json | 10 ++ .../source_zendesk_sunshine/source.py | 19 ++- .../source_zendesk_sunshine/spec.json | 141 ++++++++++++++++-- .../source_zendesk_sunshine/streams.py | 8 + .../oauth/OAuthImplementationFactory.java | 1 + .../oauth/flows/ZendeskSunshineOAuthFlow.java | 100 +++++++++++++ .../flows/ZendeskSunshineOAuthFlowTest.java | 89 +++++++++++ docs/integrations/sources/zendesk-sunshine.md | 8 +- 13 files changed, 492 insertions(+), 36 deletions(-) create mode 100644 airbyte-integrations/connectors/source-zendesk-sunshine/integration_tests/invalid_config_api_token.json create mode 100644 airbyte-integrations/connectors/source-zendesk-sunshine/integration_tests/invalid_config_oauth.json create mode 100644 airbyte-oauth/src/main/java/io/airbyte/oauth/flows/ZendeskSunshineOAuthFlow.java create mode 100644 airbyte-oauth/src/test/java/io/airbyte/oauth/flows/ZendeskSunshineOAuthFlowTest.java diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 2102d59d4b408..9e81596aef35b 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -753,7 +753,7 @@ - name: Zendesk Sunshine sourceDefinitionId: 325e0640-e7b3-4e24-b823-3361008f603f dockerRepository: airbyte/source-zendesk-sunshine - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.1 documentationUrl: https://docs.airbyte.io/integrations/sources/zendesk-sunshine icon: zendesk.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 89940b728ca2e..3ee4c7c5c8d58 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -7553,7 +7553,7 @@ path_in_connector_config: - "credentials" - "client_secret" -- dockerImage: "airbyte/source-zendesk-sunshine:0.1.0" +- dockerImage: "airbyte/source-zendesk-sunshine:0.1.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk_sunshine" connectionSpecification: @@ -7561,32 +7561,129 @@ title: "Zendesk Sunshine Spec" type: "object" required: - - "api_token" - - "email" - "start_date" - "subdomain" - additionalProperties: false + additionalProperties: true properties: - api_token: - type: "string" - airbyte_secret: true - description: "API Token. See the docs for information on how to generate this key." - email: - type: "string" - description: "The user email for your Zendesk account" subdomain: + title: "Subdomain" type: "string" - description: "The subdomain for your Zendesk Account" + description: "The subdomain for your Zendesk Account." start_date: title: "Start Date" type: "string" - description: "The date from which you'd like to replicate the data" + description: "The date from which you'd like to replicate data for Zendesk\ + \ Sunshine API, in the format YYYY-MM-DDT00:00:00Z." pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: "2021-01-01T00:00:00.000000Z" + examples: + - "2021-01-01T00:00:00Z" + credentials: + title: "Authorization Method" + type: "object" + oneOf: + - type: "object" + title: "OAuth2.0" + required: + - "auth_method" + - "client_id" + - "client_secret" + - "access_token" + properties: + auth_method: + type: "string" + const: "oauth2.0" + enum: + - "oauth2.0" + default: "oauth2.0" + order: 0 + client_id: + type: "string" + title: "Client ID" + description: "The Client ID of your OAuth application." + airbyte_secret: true + client_secret: + type: "string" + title: "Client Secret" + description: "The Client Secret of your OAuth application." + airbyte_secret: true + access_token: + type: "string" + title: "Access Token" + description: "Long-term access Token for making authenticated requests." + airbyte_secret: true + - type: "object" + title: "API Token" + required: + - "auth_method" + - "api_token" + - "email" + properties: + auth_method: + type: "string" + const: "api_token" + enum: + - "api_token" + default: "api_token" + order: 1 + api_token: + type: "string" + title: "API Token" + description: "API Token. See the docs for information on how to generate this key." + airbyte_secret: true + email: + type: "string" + title: "Email" + description: "The user email for your Zendesk account" supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] + advanced_auth: + auth_flow_type: "oauth2.0" + predicate_key: + - "credentials" + - "auth_method" + predicate_value: "oauth2.0" + oauth_config_specification: + oauth_user_input_from_connector_config_specification: + type: "object" + additionalProperties: false + properties: + subdomain: + type: "string" + path_in_connector_config: + - "subdomain" + complete_oauth_output_specification: + type: "object" + additionalProperties: false + properties: + access_token: + type: "string" + path_in_connector_config: + - "credentials" + - "access_token" + complete_oauth_server_input_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + client_secret: + type: "string" + complete_oauth_server_output_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + path_in_connector_config: + - "credentials" + - "client_id" + client_secret: + type: "string" + path_in_connector_config: + - "credentials" + - "client_secret" - dockerImage: "airbyte/source-zendesk-support:0.1.11" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-support" diff --git a/airbyte-integrations/connectors/source-zendesk-sunshine/Dockerfile b/airbyte-integrations/connectors/source-zendesk-sunshine/Dockerfile index e46b751b39c2a..ddfe861d3711f 100644 --- a/airbyte-integrations/connectors/source-zendesk-sunshine/Dockerfile +++ b/airbyte-integrations/connectors/source-zendesk-sunshine/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-zendesk-sunshine diff --git a/airbyte-integrations/connectors/source-zendesk-sunshine/acceptance-test-config.yml b/airbyte-integrations/connectors/source-zendesk-sunshine/acceptance-test-config.yml index 9f3303a3275c6..5a46bc61d8d47 100644 --- a/airbyte-integrations/connectors/source-zendesk-sunshine/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-zendesk-sunshine/acceptance-test-config.yml @@ -7,13 +7,25 @@ tests: connection: - config_path: "secrets/config.json" status: "succeed" + - config_path: "secrets/config_oauth.json" + status: "succeed" + - config_path: "secrets/config_api_token.json" + status: "succeed" - config_path: "integration_tests/invalid_config.json" status: "failed" + - config_path: "integration_tests/invalid_config_api_token.json" + status: "failed" + - config_path: "integration_tests/invalid_config_oauth.json" + status: "failed" discovery: - config_path: "secrets/config.json" basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + - config_path: "secrets/config_api_token.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + - config_path: "secrets/config_oauth.json" + configured_catalog_path: "integration_tests/configured_catalog.json" # incremental: # complex state ( {parent_id: {cur_field: value}} still not supported ) # - config_path: "secrets/config.json" # configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-zendesk-sunshine/integration_tests/invalid_config_api_token.json b/airbyte-integrations/connectors/source-zendesk-sunshine/integration_tests/invalid_config_api_token.json new file mode 100644 index 0000000000000..a974c5902823c --- /dev/null +++ b/airbyte-integrations/connectors/source-zendesk-sunshine/integration_tests/invalid_config_api_token.json @@ -0,0 +1,9 @@ +{ + "credentials": { + "auth_method": "api_token", + "email": "test@ayhghghte.io", + "api_token": "fgfgvf ghnbvg hnghbvnhbvnvbn" + }, + "subdomain": "d3v-airbyte", + "start_date": "2020-01-01T00:00:00Z" +} diff --git a/airbyte-integrations/connectors/source-zendesk-sunshine/integration_tests/invalid_config_oauth.json b/airbyte-integrations/connectors/source-zendesk-sunshine/integration_tests/invalid_config_oauth.json new file mode 100644 index 0000000000000..66f0c30be152f --- /dev/null +++ b/airbyte-integrations/connectors/source-zendesk-sunshine/integration_tests/invalid_config_oauth.json @@ -0,0 +1,10 @@ +{ + "credentials": { + "auth_method": "oauth2.0", + "client_id": "some_client_id", + "client_secret": "some_client_secret", + "access_token": "some_access_token" + }, + "subdomain": "d3v-airbyte", + "start_date": "2020-01-01T00:00:00Z" +} diff --git a/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/source.py b/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/source.py index 170cb54f060a4..81bbc46c6944f 100644 --- a/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/source.py +++ b/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/source.py @@ -4,7 +4,7 @@ import base64 -from typing import Any, List, Mapping, Tuple +from typing import Any, List, Mapping, Tuple, Union import pendulum from airbyte_cdk.logger import AirbyteLogger @@ -23,11 +23,24 @@ def __init__(self, auth: Tuple[str, str], auth_method: str = "Basic", **kwargs): super().__init__(token=b64_encoded, auth_method=auth_method, **kwargs) +class ZendeskSunshineAuthenticator: + """Provides the authentication capabilities for both old and new methods.""" + + @staticmethod + def get_auth(config: Mapping[str, Any]) -> Union[Base64HttpAuthenticator, TokenAuthenticator]: + credentials = config.get("credentials", {}) + token = config.get("api_token") or credentials.get("api_token") + email = config.get("email") or credentials.get("email") + if email and token: + return Base64HttpAuthenticator(auth=(f"{email}/token", token)) + return TokenAuthenticator(token=credentials["access_token"]) + + class SourceZendeskSunshine(AbstractSource): def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Any]: try: pendulum.parse(config["start_date"], strict=True) - authenticator = Base64HttpAuthenticator(auth=(f'{config["email"]}/token', config["api_token"])) + authenticator = ZendeskSunshineAuthenticator.get_auth(config) stream = Limits(authenticator=authenticator, subdomain=config["subdomain"], start_date=pendulum.parse(config["start_date"])) records = stream.read_records(sync_mode=SyncMode.full_refresh) next(records) @@ -47,7 +60,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: After this time is passed we have no data. It will require permanent population, to pass the test criteria `stream should contain at least 1 record) """ - authenticator = Base64HttpAuthenticator(auth=(f'{config["email"]}/token', config["api_token"])) + authenticator = ZendeskSunshineAuthenticator.get_auth(config) args = {"authenticator": authenticator, "subdomain": config["subdomain"], "start_date": config["start_date"]} return [ ObjectTypes(**args), diff --git a/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/spec.json b/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/spec.json index c61498b1fc0ff..03a04f15b81a5 100644 --- a/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/spec.json +++ b/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/spec.json @@ -4,28 +4,141 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Zendesk Sunshine Spec", "type": "object", - "required": ["api_token", "email", "start_date", "subdomain"], - "additionalProperties": false, + "required": ["start_date", "subdomain"], + "additionalProperties": true, "properties": { - "api_token": { - "type": "string", - "airbyte_secret": true, - "description": "API Token. See the docs for information on how to generate this key." - }, - "email": { - "type": "string", - "description": "The user email for your Zendesk account" - }, "subdomain": { + "title": "Subdomain", "type": "string", - "description": "The subdomain for your Zendesk Account" + "description": "The subdomain for your Zendesk Account." }, "start_date": { "title": "Start Date", "type": "string", - "description": "The date from which you'd like to replicate the data", + "description": "The date from which you'd like to replicate data for Zendesk Sunshine API, in the format YYYY-MM-DDT00:00:00Z.", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", - "examples": "2021-01-01T00:00:00.000000Z" + "examples": ["2021-01-01T00:00:00Z"] + }, + "credentials": { + "title": "Authorization Method", + "type": "object", + "oneOf": [ + { + "type": "object", + "title": "OAuth2.0", + "required": [ + "auth_method", + "client_id", + "client_secret", + "access_token" + ], + "properties": { + "auth_method": { + "type": "string", + "const": "oauth2.0", + "enum": ["oauth2.0"], + "default": "oauth2.0", + "order": 0 + }, + "client_id": { + "type": "string", + "title": "Client ID", + "description": "The Client ID of your OAuth application.", + "airbyte_secret": true + }, + "client_secret": { + "type": "string", + "title": "Client Secret", + "description": "The Client Secret of your OAuth application.", + "airbyte_secret": true + }, + "access_token": { + "type": "string", + "title": "Access Token", + "description": "Long-term access Token for making authenticated requests.", + "airbyte_secret": true + } + } + }, + { + "type": "object", + "title": "API Token", + "required": ["auth_method", "api_token", "email"], + "properties": { + "auth_method": { + "type": "string", + "const": "api_token", + "enum": ["api_token"], + "default": "api_token", + "order": 1 + }, + "api_token": { + "type": "string", + "title": "API Token", + "description": "API Token. See the docs for information on how to generate this key.", + "airbyte_secret": true + }, + "email": { + "type": "string", + "title": "Email", + "description": "The user email for your Zendesk account" + } + } + } + ] + } + } + }, + "advanced_auth": { + "auth_flow_type": "oauth2.0", + "predicate_key": ["credentials", "auth_method"], + "predicate_value": "oauth2.0", + "oauth_config_specification": { + "complete_oauth_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "access_token": { + "type": "string", + "path_in_connector_config": ["credentials", "access_token"] + } + } + }, + "complete_oauth_server_input_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string" + }, + "client_secret": { + "type": "string" + } + } + }, + "complete_oauth_server_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string", + "path_in_connector_config": ["credentials", "client_id"] + }, + "client_secret": { + "type": "string", + "path_in_connector_config": ["credentials", "client_secret"] + } + } + }, + "oauth_user_input_from_connector_config_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "subdomain": { + "type": "string", + "path_in_connector_config": ["subdomain"] + } + } } } } diff --git a/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/streams.py b/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/streams.py index ea700f26413e1..6c24ec9e22515 100644 --- a/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/streams.py +++ b/airbyte-integrations/connectors/source-zendesk-sunshine/source_zendesk_sunshine/streams.py @@ -77,6 +77,8 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late class ObjectTypes(SunshineStream): + primary_key = "key" + def path(self, **kwargs) -> str: return "objects/types" @@ -134,6 +136,8 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late class RelationshipTypes(SunshineStream): + primary_key = "key" + def path(self, **kwargs) -> str: return "relationships/types" @@ -170,6 +174,8 @@ def path(self, **kwargs) -> str: class ObjectTypePolicies(SunshineStream): + primary_key = None + def stream_slices(self, **kwargs): parent_stream = ObjectTypes(authenticator=self.authenticator, subdomain=self.subdomain, start_date=self._start_date) for obj_type in parent_stream.read_records(sync_mode=SyncMode.full_refresh): @@ -200,5 +206,7 @@ def path(self, **kwargs) -> str: class Limits(SunshineStream): + primary_key = "key" + def path(self, **kwargs) -> str: return "limits" diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java index fcb04df68cf4b..0894acbf4efc1 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java @@ -54,6 +54,7 @@ public OAuthImplementationFactory(final ConfigRepository configRepository, final .put("airbyte/source-drift", new DriftOAuthFlow(configRepository, httpClient)) .put("airbyte/source-zendesk-chat", new ZendeskChatOAuthFlow(configRepository, httpClient)) .put("airbyte/source-monday", new MondayOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-zendesk-sunshine", new ZendeskSunshineOAuthFlow(configRepository, httpClient)) .build(); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/ZendeskSunshineOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/ZendeskSunshineOAuthFlow.java new file mode 100644 index 0000000000000..1527992965dfa --- /dev/null +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/ZendeskSunshineOAuthFlow.java @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.oauth.BaseOAuth2Flow; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.http.HttpClient; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; +import java.util.function.Supplier; +import org.apache.http.client.utils.URIBuilder; + +/** + * Following docs from + * https://developer.zendesk.com/api-reference/custom-data/introduction/#authentication + */ +public class ZendeskSunshineOAuthFlow extends BaseOAuth2Flow { + + public ZendeskSunshineOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient) { + super(configRepository, httpClient); + } + + @VisibleForTesting + public ZendeskSunshineOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient, final Supplier stateSupplier) { + super(configRepository, httpClient, stateSupplier); + } + + @Override + protected String formatConsentUrl(final UUID definitionId, + final String clientId, + final String redirectUrl, + final JsonNode inputOAuthConfiguration) + throws IOException { + + // getting subdomain value from user's config + final String subdomain = getConfigValueUnsafe(inputOAuthConfiguration, "subdomain"); + + final URIBuilder builder = new URIBuilder() + .setScheme("https") + .setHost(String.format("%s.zendesk.com", subdomain)) + .setPath("oauth/authorizations/new") + // required + .addParameter("response_type", "code") + .addParameter("redirect_uri", redirectUrl) + .addParameter("client_id", clientId) + .addParameter("scope", "read") + .addParameter("state", getState()); + + try { + return builder.build().toString(); + } catch (final URISyntaxException e) { + throw new IOException("Failed to format Consent URL for OAuth flow", e); + } + } + + @Override + protected Map getAccessTokenQueryParameters(String clientId, + String clientSecret, + String authCode, + String redirectUrl) { + return ImmutableMap.builder() + // required + .put("grant_type", "authorization_code") + .put("code", authCode) + .put("client_id", clientId) + .put("client_secret", clientSecret) + .put("redirect_uri", redirectUrl) + .put("scope", "read") + .build(); + } + + @Override + protected String getAccessTokenUrl(final JsonNode inputOAuthConfiguration) { + // getting subdomain value from user's config + final String subdomain = getConfigValueUnsafe(inputOAuthConfiguration, "subdomain"); + + return String.format("https://%s.zendesk.com/oauth/tokens", subdomain); + } + + @Override + protected Map extractOAuthOutput(final JsonNode data, final String accessTokenUrl) throws IOException { + final Map result = new HashMap<>(); + // getting out access_token + if (data.has("access_token")) { + result.put("access_token", data.get("access_token").asText()); + } else { + throw new IOException(String.format("Missing 'access_token' in query params from %s", accessTokenUrl)); + } + return result; + } + +} diff --git a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/ZendeskSunshineOAuthFlowTest.java b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/ZendeskSunshineOAuthFlowTest.java new file mode 100644 index 0000000000000..4e6867a847fcc --- /dev/null +++ b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/ZendeskSunshineOAuthFlowTest.java @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.oauth.BaseOAuthFlow; +import io.airbyte.oauth.MoreOAuthParameters; +import java.util.Map; +import org.junit.jupiter.api.Test; + +public class ZendeskSunshineOAuthFlowTest extends BaseOAuthFlowTest { + + @Override + protected BaseOAuthFlow getOAuthFlow() { + return new ZendeskSunshineOAuthFlow(getConfigRepository(), getHttpClient(), this::getConstantState); + } + + @Override + protected String getExpectedConsentUrl() { + return "https://test_subdomain.zendesk.com/oauth/authorizations/new?response_type=code&redirect_uri=https%3A%2F%2Fairbyte.io&client_id=test_client_id&scope=read&state=state"; + } + + @Override + protected JsonNode getInputOAuthConfiguration() { + return Jsons.jsonNode(Map.of("subdomain", "test_subdomain")); + } + + @Override + protected JsonNode getUserInputFromConnectorConfigSpecification() { + return getJsonSchema(Map.of("subdomain", Map.of("type", "string"))); + } + + @Test + public void testEmptyOutputCompleteSourceOAuth() {} + + @Test + public void testGetSourceConsentUrlEmptyOAuthSpec() {} + + @Test + public void testValidateOAuthOutputFailure() {} + + @Test + public void testCompleteSourceOAuth() {} + + @Test + public void testEmptyInputCompleteDestinationOAuth() {} + + @Test + public void testDeprecatedCompleteDestinationOAuth() {} + + @Test + public void testDeprecatedCompleteSourceOAuth() {} + + @Test + public void testEmptyOutputCompleteDestinationOAuth() {} + + @Test + public void testCompleteDestinationOAuth() {} + + @Test + public void testGetDestinationConsentUrlEmptyOAuthSpec() {} + + @Test + public void testEmptyInputCompleteSourceOAuth() {} + + @Override + protected Map getExpectedOutput() { + return Map.of( + "access_token", "access_token_response", + "client_id", MoreOAuthParameters.SECRET_MASK, + "client_secret", MoreOAuthParameters.SECRET_MASK); + } + + @Override + protected JsonNode getCompleteOAuthOutputSpecification() { + return getJsonSchema(Map.of("access_token", Map.of("type", "string"))); + } + + @Override + protected Map getExpectedFilteredOutput() { + return Map.of( + "access_token", "access_token_response", + "client_id", MoreOAuthParameters.SECRET_MASK); + } + +} diff --git a/docs/integrations/sources/zendesk-sunshine.md b/docs/integrations/sources/zendesk-sunshine.md index 365370ea00dcc..c12b22d9371f3 100644 --- a/docs/integrations/sources/zendesk-sunshine.md +++ b/docs/integrations/sources/zendesk-sunshine.md @@ -47,13 +47,16 @@ The Zendesk connector should not run into Zendesk API limitations under normal u ### Requirements -* Zendesk Sunshine Access Token +* Zendesk Sunshine API Token + +OR +* Zendesk Sunshine oauth2.0 application (client_id, client_secret, access_token) ### Setup guide Please follow this [guide](https://developer.zendesk.com/documentation/custom-data/custom-objects/getting-started-with-custom-objects/#enabling-custom-objects) -Generate a Access Token as described in [here](https://developer.zendesk.com/api-reference/ticketing/introduction/#security-and-authentication) +Generate an API Token or oauth2.0 Access token as described in [here](https://developer.zendesk.com/api-reference/ticketing/introduction/#security-and-authentication) We recommend creating a restricted, read-only key specifically for Airbyte access. This will allow you to control which resources Airbyte should be able to access. @@ -61,5 +64,6 @@ We recommend creating a restricted, read-only key specifically for Airbyte acces | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.1 | 2021-11-15 | [7976](https://github.com/airbytehq/airbyte/pull/7976) | Add oauth2.0 support | | 0.1.0 | 2021-07-08 | [4359](https://github.com/airbytehq/airbyte/pull/4359) | Initial Release | From dfebddc07656f3d2012179a1b6cd40be43599fbe Mon Sep 17 00:00:00 2001 From: Harshith Mullapudi Date: Wed, 5 Jan 2022 02:49:41 +0530 Subject: [PATCH 033/215] Publish source Amazon SP: fix source only pulls first 100 orders (#9294) * fixed reading only 100 records in orders * removed if condition * changed docker image version and catalog * fixed readme file * fixed issues with 100 orders * fix: bump version * chore: spec changes Co-authored-by: prudhvi85 Co-authored-by: Prudhvi Raj --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../source-amazon-seller-partner/Dockerfile | 2 +- .../source_amazon_seller_partner/streams.py | 7 +- .../sources/amazon-seller-partner.md | 98 +++++++++---------- 5 files changed, 55 insertions(+), 56 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 9e81596aef35b..973f9fdee9f1e 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -22,7 +22,7 @@ - name: Amazon Seller Partner sourceDefinitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460 dockerRepository: airbyte/source-amazon-seller-partner - dockerImageTag: 0.2.10 + dockerImageTag: 0.2.11 sourceType: api documentationUrl: https://docs.airbyte.io/integrations/sources/amazon-seller-partner icon: amazonsellerpartner.svg diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 3ee4c7c5c8d58..0195aa75a728d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -157,7 +157,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-amazon-seller-partner:0.2.10" +- dockerImage: "airbyte/source-amazon-seller-partner:0.2.11" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" changelogUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile index 0b67d75a64e70..485b51f7050cc 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.10 +LABEL io.airbyte.version=0.2.11 LABEL io.airbyte.name=airbyte/source-amazon-seller-partner diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py index c92a199217459..bcf2569a67795 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py @@ -103,7 +103,7 @@ def request_params( def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: stream_data = response.json() - next_page_token = stream_data.get(self.next_page_token_field) + next_page_token = stream_data.get("payload").get(self.next_page_token_field) if next_page_token: return {self.next_page_token_field: next_page_token} @@ -602,8 +602,7 @@ def request_params( self, stream_state: Mapping[str, Any], next_page_token: Mapping[str, Any] = None, **kwargs ) -> MutableMapping[str, Any]: params = super().request_params(stream_state=stream_state, next_page_token=next_page_token, **kwargs) - if not next_page_token: - params.update({"MarketplaceIds": self.marketplace_id}) + params.update({"MarketplaceIds": self.marketplace_id}) return params def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]: @@ -645,4 +644,4 @@ def request_params( return params def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]: - yield from response.json().get(self.data_field, {}).get("shippingLabels", []) + yield from response.json().get(self.data_field, {}).get("shippingLabels", []) \ No newline at end of file diff --git a/docs/integrations/sources/amazon-seller-partner.md b/docs/integrations/sources/amazon-seller-partner.md index b5f89136572b6..e3b9c272fc332 100644 --- a/docs/integrations/sources/amazon-seller-partner.md +++ b/docs/integrations/sources/amazon-seller-partner.md @@ -2,10 +2,10 @@ ## Features -| Feature | Supported?\(Yes/No\) | Notes | -| :--- | :--- | :--- | -| Full Refresh Sync | Yes | | -| Incremental Sync | Yes | | +| Feature | Supported?\(Yes/No\) | Notes | +| :---------------- | :------------------- | :---- | +| Full Refresh Sync | Yes | | +| Incremental Sync | Yes | | This source syncs data from the [Amazon Seller Partner API](https://github.com/amzn/selling-partner-api-docs/blob/main/guides/en-US/developer-guide/SellingPartnerApiDeveloperGuide.md). @@ -13,32 +13,32 @@ This source syncs data from the [Amazon Seller Partner API](https://github.com/a This source is capable of syncing the following streams: -* [Order Reports](https://sellercentral.amazon.com/gp/help/help.html?itemID=201648780) -* [All Listings](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#inventory-reports) -* [FBA Inventory Reports](https://sellercentral.amazon.com/gp/help/200740930) -* [Amazon-Fulfilled Shipments Report](https://sellercentral.amazon.com/gp/help/help.html?itemID=200453120) -* [Open Listings Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#inventory-reports) -* [Removal Order Detail Report (overview)](https://sellercentral.amazon.com/gp/help/help.html?itemID=200989110) -* [Removal Shipment Detail Report](https://sellercentral.amazon.com/gp/help/help.html?itemID=200989100) -* [Inventory Health & Planning Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#vendor-retail-analytics-reports) -* [Orders](https://github.com/amzn/selling-partner-api-docs/blob/main/references/orders-api/ordersV0.md) \(incremental\) -* [VendorDirectFulfillmentShipping](https://github.com/amzn/selling-partner-api-docs/blob/main/references/vendor-direct-fulfillment-shipping-api/vendorDirectFulfillmentShippingV1.md) -* [Seller Feedback Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#performance-reports) -* [Brand Analytics Search Terms Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#brand-analytics-reports) +- [Order Reports](https://sellercentral.amazon.com/gp/help/help.html?itemID=201648780) +- [All Listings](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#inventory-reports) +- [FBA Inventory Reports](https://sellercentral.amazon.com/gp/help/200740930) +- [Amazon-Fulfilled Shipments Report](https://sellercentral.amazon.com/gp/help/help.html?itemID=200453120) +- [Open Listings Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#inventory-reports) +- [Removal Order Detail Report (overview)](https://sellercentral.amazon.com/gp/help/help.html?itemID=200989110) +- [Removal Shipment Detail Report](https://sellercentral.amazon.com/gp/help/help.html?itemID=200989100) +- [Inventory Health & Planning Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#vendor-retail-analytics-reports) +- [Orders](https://github.com/amzn/selling-partner-api-docs/blob/main/references/orders-api/ordersV0.md) \(incremental\) +- [VendorDirectFulfillmentShipping](https://github.com/amzn/selling-partner-api-docs/blob/main/references/vendor-direct-fulfillment-shipping-api/vendorDirectFulfillmentShippingV1.md) +- [Seller Feedback Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#performance-reports) +- [Brand Analytics Search Terms Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#brand-analytics-reports) ## Getting started **Requirements** -* replication\_start\_date -* refresh\_token -* lwa\_app\_id -* lwa\_client\_secret -* aws\_access\_key -* aws\_secret\_key -* role\_arn -* aws\_environment -* region +- replication_start_date +- refresh_token +- lwa_app_id +- lwa_client_secret +- aws_access_key +- aws_secret_key +- role_arn +- aws_environment +- region **Setup guide** @@ -46,34 +46,34 @@ Information about how to get credentials you may find [here](https://github.com/ ## Data type mapping -| Integration Type | Airbyte Type | Notes | -| :--- | :--- | :--- | -| `string` | `string` | | -| `int`, `float`, `number` | `number` | | -| `date` | `date` | | -| `datetime` | `datetime` | | -| `array` | `array` | | -| `object` | `object` | | +| Integration Type | Airbyte Type | Notes | +| :----------------------- | :----------- | :---- | +| `string` | `string` | | +| `int`, `float`, `number` | `number` | | +| `date` | `date` | | +| `datetime` | `datetime` | | +| `array` | `array` | | +| `object` | `object` | | ### Performance Considerations (Airbyte Open-Source) Information about rate limits you may find [here](https://github.com/amzn/selling-partner-api-docs/blob/main/guides/en-US/usage-plans-rate-limits/Usage-Plans-and-Rate-Limits.md). - ## CHANGELOG -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| `0.2.10` | 2021-12-31 | [\#9236](https://github.com/airbytehq/airbyte/pull/9236) | Fix NoAuth deprecation warning | -| `0.2.9` | 2021-12-30 | [\#9212](https://github.com/airbytehq/airbyte/pull/9212) | Normalize GET_SELLER_FEEDBACK_DATA header field names | -| `0.2.8` | 2021-12-22 | [\#8810](https://github.com/airbytehq/airbyte/pull/8810) | Fix GET_SELLER_FEEDBACK_DATA Date cursor field format | -| `0.2.7` | 2021-12-21 | [\#9002](https://github.com/airbytehq/airbyte/pull/9002) | Extract REPORTS_MAX_WAIT_SECONDS to configurable parameter | -| `0.2.6` | 2021-12-10 | [\#8179](https://github.com/airbytehq/airbyte/pull/8179) | Add GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT report | -| `0.2.5` | 2021-12-06 | [\#8425](https://github.com/airbytehq/airbyte/pull/8425) | Update title, description fields in spec | -| `0.2.4` | 2021-11-08 | [\#8021](https://github.com/airbytehq/airbyte/pull/8021) | Added GET_SELLER_FEEDBACK_DATA report with incremental sync capability | -| `0.2.3` | 2021-11-08 | [\#7828](https://github.com/airbytehq/airbyte/pull/7828) | Remove datetime format from all streams | -| `0.2.2` | 2021-11-08 | [\#7752](https://github.com/airbytehq/airbyte/pull/7752) | Change `check_connection` function to use stream Orders | -| `0.2.1` | 2021-09-17 | [\#5248](https://github.com/airbytehq/airbyte/pull/5248) | `Added extra stream support. Updated reports streams logics` | -| `0.2.0` | 2021-08-06 | [\#4863](https://github.com/airbytehq/airbyte/pull/4863) | `Rebuild source with airbyte-cdk` | -| `0.1.3` | 2021-06-23 | [\#4288](https://github.com/airbytehq/airbyte/pull/4288) | `Bugfix failing connection check` | -| `0.1.2` | 2021-06-15 | [\#4108](https://github.com/airbytehq/airbyte/pull/4108) | `Fixed: Sync fails with timeout when create report is CANCELLED` | +| Version | Date | Pull Request | Subject | +| :------- | :--------- | :------------------------------------------------------- | :--------------------------------------------------------------------- | +| `0.2.11` | 2022-01-05 | [\#9115](https://github.com/airbytehq/airbyte/pull/9115) | Fix reading only 100 orders | +| `0.2.10` | 2021-12-31 | [\#9236](https://github.com/airbytehq/airbyte/pull/9236) | Fix NoAuth deprecation warning | +| `0.2.9` | 2021-12-30 | [\#9212](https://github.com/airbytehq/airbyte/pull/9212) | Normalize GET_SELLER_FEEDBACK_DATA header field names | +| `0.2.8` | 2021-12-22 | [\#8810](https://github.com/airbytehq/airbyte/pull/8810) | Fix GET_SELLER_FEEDBACK_DATA Date cursor field format | +| `0.2.7` | 2021-12-21 | [\#9002](https://github.com/airbytehq/airbyte/pull/9002) | Extract REPORTS_MAX_WAIT_SECONDS to configurable parameter | +| `0.2.6` | 2021-12-10 | [\#8179](https://github.com/airbytehq/airbyte/pull/8179) | Add GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT report | +| `0.2.5` | 2021-12-06 | [\#8425](https://github.com/airbytehq/airbyte/pull/8425) | Update title, description fields in spec | +| `0.2.4` | 2021-11-08 | [\#8021](https://github.com/airbytehq/airbyte/pull/8021) | Added GET_SELLER_FEEDBACK_DATA report with incremental sync capability | +| `0.2.3` | 2021-11-08 | [\#7828](https://github.com/airbytehq/airbyte/pull/7828) | Remove datetime format from all streams | +| `0.2.2` | 2021-11-08 | [\#7752](https://github.com/airbytehq/airbyte/pull/7752) | Change `check_connection` function to use stream Orders | +| `0.2.1` | 2021-09-17 | [\#5248](https://github.com/airbytehq/airbyte/pull/5248) | `Added extra stream support. Updated reports streams logics` | +| `0.2.0` | 2021-08-06 | [\#4863](https://github.com/airbytehq/airbyte/pull/4863) | `Rebuild source with airbyte-cdk` | +| `0.1.3` | 2021-06-23 | [\#4288](https://github.com/airbytehq/airbyte/pull/4288) | `Bugfix failing connection check` | +| `0.1.2` | 2021-06-15 | [\#4108](https://github.com/airbytehq/airbyte/pull/4108) | `Fixed: Sync fails with timeout when create report is CANCELLED` | From bd71999a457a25c3d9a5063950ff290417904271 Mon Sep 17 00:00:00 2001 From: jdclarke5 Date: Wed, 5 Jan 2022 12:56:47 +1100 Subject: [PATCH 034/215] Source Gitlab: fix typo in specification (#9297) --- .../connectors/source-gitlab/source_gitlab/spec.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-gitlab/source_gitlab/spec.json b/airbyte-integrations/connectors/source-gitlab/source_gitlab/spec.json index f080158cb3dd6..8ea35a964b8b0 100644 --- a/airbyte-integrations/connectors/source-gitlab/source_gitlab/spec.json +++ b/airbyte-integrations/connectors/source-gitlab/source_gitlab/spec.json @@ -15,7 +15,7 @@ }, "private_token": { "type": "string", - "title": "Privat Token", + "title": "Private Token", "description": "Log into your GitLab account and then generate a personal Access Token.", "airbyte_secret": true }, From de56d4713cf15af42a4cd24e5744e790d9a98dd2 Mon Sep 17 00:00:00 2001 From: Marcos Marx Date: Tue, 4 Jan 2022 23:28:14 -0300 Subject: [PATCH 035/215] Publish PR 9029: clickhouse normalization (#9072) * add normalization-clickhouse docker build step * bump normalization version * small changes gradle * fix settings gradle * fix eof file * correct clickhouse normalization * Refactor jinja template for scd (#9278) * merge chris code and regenerate sql files Co-authored-by: James Zhao Co-authored-by: Edward Gao Co-authored-by: Christophe Duong --- .../bases/base-normalization/.dockerignore | 1 + .../bases/base-normalization/Dockerfile | 2 +- .../bases/base-normalization/build.gradle | 6 + .../base-normalization/docker-compose.yaml | 2 + .../dedup_cdc_excluded_scd.sql | 9 +- .../dedup_exchange_rate_scd.sql | 9 +- .../renamed_dedup_cdc_excluded_scd.sql | 9 +- .../test_normalization/dedup_cdc_excluded.sql | 1 + .../dedup_exchange_rate.sql | 1 + .../renamed_dedup_cdc_excluded.sql | 1 + .../test_normalization/exchange_rate.sql | 4 + .../dedup_cdc_excluded_ab3.sql | 71 ----- .../dedup_exchange_rate_stg.sql} | 5 +- .../test_normalization/pos_dedup_cdcx_ab3.sql | 78 ------ .../renamed_dedup_cdc_excluded_ab3.sql | 45 ---- .../dedup_cdc_excluded_ab1.sql | 1 + .../dedup_cdc_excluded_ab2.sql | 1 + .../dedup_exchange_rate_ab1.sql | 1 + .../dedup_exchange_rate_ab2.sql | 1 + .../test_normalization/exchange_rate_ab1.sql | 1 + .../test_normalization/exchange_rate_ab2.sql | 1 + .../test_normalization/exchange_rate_ab3.sql | 1 + .../test_normalization/pos_dedup_cdcx_ab1.sql | 1 + .../test_normalization/pos_dedup_cdcx_ab2.sql | 1 + .../renamed_dedup_cdc_excluded_ab1.sql | 1 + .../renamed_dedup_cdc_excluded_ab2.sql | 1 + .../dedup_cdc_excluded_scd.sql | 26 +- .../dedup_exchange_rate_scd.sql | 26 +- .../renamed_dedup_cdc_excluded_scd.sql | 26 +- .../test_normalization/dedup_cdc_excluded.sql | 1 + .../dedup_exchange_rate.sql | 1 + .../renamed_dedup_cdc_excluded.sql | 1 + .../test_normalization/exchange_rate.sql | 1 + .../dedup_cdc_excluded_ab3.sql | 20 -- ...te_ab3.sql => dedup_exchange_rate_stg.sql} | 1 + .../test_normalization/pos_dedup_cdcx_ab3.sql | 21 -- .../renamed_dedup_cdc_excluded_ab3.sql | 16 -- .../test_normalization/exchange_rate.sql | 4 + .../dedup_cdc_excluded_ab3.sql | 71 ----- .../dedup_exchange_rate_stg.sql} | 5 +- .../test_normalization/pos_dedup_cdcx_ab3.sql | 78 ------ .../renamed_dedup_cdc_excluded_ab3.sql | 45 ---- .../transform_catalog/stream_processor.py | 253 ++++++++++-------- .../NormalizationRunnerFactory.java | 2 +- build.gradle | 1 + settings.gradle | 1 + 46 files changed, 254 insertions(+), 601 deletions(-) delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/{second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql => first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql} (91%) delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/{dedup_exchange_rate_ab3.sql => dedup_exchange_rate_stg.sql} (92%) delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/{first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql => second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql} (91%) delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql diff --git a/airbyte-integrations/bases/base-normalization/.dockerignore b/airbyte-integrations/bases/base-normalization/.dockerignore index 2e284d6c45646..e6fbfb3101a91 100644 --- a/airbyte-integrations/bases/base-normalization/.dockerignore +++ b/airbyte-integrations/bases/base-normalization/.dockerignore @@ -8,3 +8,4 @@ !dbt-project-template-mssql !dbt-project-template-mysql !dbt-project-template-oracle +!dbt-project-template-clickhouse diff --git a/airbyte-integrations/bases/base-normalization/Dockerfile b/airbyte-integrations/bases/base-normalization/Dockerfile index a9efa7a9379f6..be915f01ab0d0 100644 --- a/airbyte-integrations/bases/base-normalization/Dockerfile +++ b/airbyte-integrations/bases/base-normalization/Dockerfile @@ -28,5 +28,5 @@ WORKDIR /airbyte ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" ENTRYPOINT ["/airbyte/entrypoint.sh"] -LABEL io.airbyte.version=0.1.61 +LABEL io.airbyte.version=0.1.62 LABEL io.airbyte.name=airbyte/normalization diff --git a/airbyte-integrations/bases/base-normalization/build.gradle b/airbyte-integrations/bases/base-normalization/build.gradle index ccdda2ce39dd4..eff597c11a3e1 100644 --- a/airbyte-integrations/bases/base-normalization/build.gradle +++ b/airbyte-integrations/bases/base-normalization/build.gradle @@ -69,10 +69,15 @@ task airbyteDockerOracle(type: Exec, dependsOn: checkSshScriptCopy) { configure buildAirbyteDocker('oracle') dependsOn assemble } +task airbyteDockerClickhouse(type: Exec, dependsOn: checkSshScriptCopy) { + configure buildAirbyteDocker('clickhouse') + dependsOn assemble +} airbyteDocker.dependsOn(airbyteDockerMSSql) airbyteDocker.dependsOn(airbyteDockerMySql) airbyteDocker.dependsOn(airbyteDockerOracle) +airbyteDocker.dependsOn(airbyteDockerClickhouse) task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs) { module = "pytest" @@ -86,6 +91,7 @@ task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs dependsOn ':airbyte-integrations:connectors:destination-snowflake:airbyteDocker' dependsOn ':airbyte-integrations:connectors:destination-oracle:airbyteDocker' dependsOn ':airbyte-integrations:connectors:destination-mssql:airbyteDocker' + dependsOn ':airbyte-integrations:connectors:destination-clickhouse:airbyteDocker' } integrationTest.dependsOn("customIntegrationTestPython") diff --git a/airbyte-integrations/bases/base-normalization/docker-compose.yaml b/airbyte-integrations/bases/base-normalization/docker-compose.yaml index ced1d036d206f..8dd94275765be 100644 --- a/airbyte-integrations/bases/base-normalization/docker-compose.yaml +++ b/airbyte-integrations/bases/base-normalization/docker-compose.yaml @@ -10,3 +10,5 @@ services: image: airbyte/normalization-mysql:${VERSION} normalization-oracle: image: airbyte/normalization-oracle:${VERSION} + normalization-clickhouse: + image: airbyte/normalization-clickhouse:${VERSION} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 749e5b38562b4..99e574c63fda6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -12,11 +12,12 @@ as ( +-- depends_on: ref('dedup_cdc_excluded_stg') with input_data as ( select * - from _airbyte_test_normalization.dedup_cdc_excluded_ab3 + from _airbyte_test_normalization.dedup_cdc_excluded_stg -- dedup_cdc_excluded from test_normalization._airbyte_raw_dedup_cdc_excluded ), @@ -45,15 +46,15 @@ scd_data as ( _ab_cdc_updated_at, _ab_cdc_deleted_at, _airbyte_emitted_at as _airbyte_start_at, + case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, anyOrNull(_airbyte_emitted_at) over ( partition by id order by _airbyte_emitted_at is null asc, _airbyte_emitted_at desc, _airbyte_emitted_at desc, _ab_cdc_updated_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING ) as _airbyte_end_at, - case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_cdc_excluded_hashid @@ -65,7 +66,7 @@ dedup_data as ( -- additionally, we generate a unique key for the scd table row_number() over ( partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String') - order by _airbyte_ab_id + order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, assumeNotNull(hex(MD5( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 49c1843204fee..2486691308c65 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -12,11 +12,12 @@ as ( +-- depends_on: ref('dedup_exchange_rate_stg') with input_data as ( select * - from _airbyte_test_normalization.dedup_exchange_rate_ab3 + from _airbyte_test_normalization.dedup_exchange_rate_stg -- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate ), @@ -54,15 +55,15 @@ scd_data as ( NZD, USD, date as _airbyte_start_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, anyOrNull(date) over ( partition by id, currency, cast(NZD as String) order by date is null asc, date desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING ) as _airbyte_end_at, - case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_exchange_rate_hashid @@ -74,7 +75,7 @@ dedup_data as ( -- additionally, we generate a unique key for the scd table row_number() over ( partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at - order by _airbyte_ab_id + order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, assumeNotNull(hex(MD5( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 61f46aa4665c4..4fa7b03259e21 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -12,11 +12,12 @@ as ( +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') with input_data as ( select * - from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab3 + from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg -- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded ), @@ -41,15 +42,15 @@ scd_data as ( ))) as _airbyte_unique_key, id, _airbyte_emitted_at as _airbyte_start_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, anyOrNull(_airbyte_emitted_at) over ( partition by id order by _airbyte_emitted_at is null asc, _airbyte_emitted_at desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING ) as _airbyte_end_at, - case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_renamed_dedup_cdc_excluded_hashid @@ -61,7 +62,7 @@ dedup_data as ( -- additionally, we generate a unique key for the scd table row_number() over ( partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at - order by _airbyte_ab_id + order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, assumeNotNull(hex(MD5( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql index 4b2055de8600d..8aea31930d35c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -13,6 +13,7 @@ as ( -- Final base SQL model +-- depends_on: test_normalization.dedup_cdc_excluded_scd select _airbyte_unique_key, id, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index d1dc1abdc7142..28204615e97cf 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -13,6 +13,7 @@ as ( -- Final base SQL model +-- depends_on: test_normalization.dedup_exchange_rate_scd select _airbyte_unique_key, id, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index 5295b9baa8dae..b16b5361120f0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -13,6 +13,7 @@ as ( -- Final base SQL model +-- depends_on: test_normalization.renamed_dedup_cdc_excluded_scd select _airbyte_unique_key, id, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql index 0eb15bc43e455..2ee3d293b8403 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -14,6 +14,7 @@ with __dbt__cte__exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_exchange_rate select JSONExtractRaw(_airbyte_data, 'id') as id, JSONExtractRaw(_airbyte_data, 'currency') as currency, @@ -33,6 +34,7 @@ where 1 = 1 ), __dbt__cte__exchange_rate_ab2 as ( -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 select accurateCastOrNull(id, ' BIGINT @@ -60,6 +62,7 @@ where 1 = 1 ), __dbt__cte__exchange_rate_ab3 as ( -- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 select assumeNotNull(hex(MD5( @@ -95,6 +98,7 @@ from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate where 1 = 1 )-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 select id, currency, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql deleted file mode 100644 index fe2bf632dbf20..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql +++ /dev/null @@ -1,71 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_cdc_excluded_ab3__dbt_tmp - - as ( - -with __dbt__cte__dedup_cdc_excluded_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'name') as name, - JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, - JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias --- dedup_cdc_excluded -where 1 = 1 - -), __dbt__cte__dedup_cdc_excluded_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, - accurateCastOrNull(_ab_cdc_lsn, ' - Float64 -') as _ab_cdc_lsn, - accurateCastOrNull(_ab_cdc_updated_at, ' - Float64 -') as _ab_cdc_updated_at, - accurateCastOrNull(_ab_cdc_deleted_at, ' - Float64 -') as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__dedup_cdc_excluded_ab1 --- dedup_cdc_excluded -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record -select - assumeNotNull(hex(MD5( - - toString(id) || '~' || - - - toString(name) || '~' || - - - toString(_ab_cdc_lsn) || '~' || - - - toString(_ab_cdc_updated_at) || '~' || - - - toString(_ab_cdc_deleted_at) - - ))) as _airbyte_dedup_cdc_excluded_hashid, - tmp.* -from __dbt__cte__dedup_cdc_excluded_ab2 tmp --- dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql similarity index 91% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index 28abd1a79a7f2..799af4ec78aba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -1,12 +1,13 @@ - create view _airbyte_test_normalization.dedup_exchange_rate_ab3__dbt_tmp + create view _airbyte_test_normalization.dedup_exchange_rate_stg__dbt_tmp as ( with __dbt__cte__dedup_exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate select JSONExtractRaw(_airbyte_data, 'id') as id, JSONExtractRaw(_airbyte_data, 'currency') as currency, @@ -26,6 +27,7 @@ where 1 = 1 ), __dbt__cte__dedup_exchange_rate_ab2 as ( -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 select accurateCastOrNull(id, ' BIGINT @@ -51,6 +53,7 @@ from __dbt__cte__dedup_exchange_rate_ab1 where 1 = 1 )-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 select assumeNotNull(hex(MD5( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql deleted file mode 100644 index 9f515f09a4a44..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql +++ /dev/null @@ -1,78 +0,0 @@ - - - create view _airbyte_test_normalization.pos_dedup_cdcx_ab3__dbt_tmp - - as ( - -with __dbt__cte__pos_dedup_cdcx_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'name') as name, - JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, - JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias --- pos_dedup_cdcx -where 1 = 1 - -), __dbt__cte__pos_dedup_cdcx_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, - accurateCastOrNull(_ab_cdc_lsn, ' - Float64 -') as _ab_cdc_lsn, - accurateCastOrNull(_ab_cdc_updated_at, ' - Float64 -') as _ab_cdc_updated_at, - accurateCastOrNull(_ab_cdc_deleted_at, ' - Float64 -') as _ab_cdc_deleted_at, - accurateCastOrNull(_ab_cdc_log_pos, ' - Float64 -') as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__pos_dedup_cdcx_ab1 --- pos_dedup_cdcx -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record -select - assumeNotNull(hex(MD5( - - toString(id) || '~' || - - - toString(name) || '~' || - - - toString(_ab_cdc_lsn) || '~' || - - - toString(_ab_cdc_updated_at) || '~' || - - - toString(_ab_cdc_deleted_at) || '~' || - - - toString(_ab_cdc_log_pos) - - ))) as _airbyte_pos_dedup_cdcx_hashid, - tmp.* -from __dbt__cte__pos_dedup_cdcx_ab2 tmp --- pos_dedup_cdcx -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql deleted file mode 100644 index 43c5b8ad9e18a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql +++ /dev/null @@ -1,45 +0,0 @@ - - - create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab3__dbt_tmp - - as ( - -with __dbt__cte__renamed_dedup_cdc_excluded_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias --- renamed_dedup_cdc_excluded -where 1 = 1 - -), __dbt__cte__renamed_dedup_cdc_excluded_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__renamed_dedup_cdc_excluded_ab1 --- renamed_dedup_cdc_excluded -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record -select - assumeNotNull(hex(MD5( - - toString(id) - - ))) as _airbyte_renamed_dedup_cdc_excluded_hashid, - tmp.* -from __dbt__cte__renamed_dedup_cdc_excluded_ab2 tmp --- renamed_dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql index ad250a2de1969..5b9ee4b6b6820 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as name, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql index b4921f53776b7..6f7e747a0699a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_cdc_excluded_ab1') }} select accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, nullif(accurateCastOrNull(trim(BOTH '"' from name), '{{ dbt_utils.type_string() }}'), 'null') as name, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index bdfc716769aee..6e998ca141418 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index 351ccad8f300e..ee41ee94585ee 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} select accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, nullif(accurateCastOrNull(trim(BOTH '"' from currency), '{{ dbt_utils.type_string() }}'), 'null') as currency, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql index 59f1c4bcfba0c..f9b9da32d25d1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql index a48a14a7aecc3..49cb5ea4c759b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('exchange_rate_ab1') }} select accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, nullif(accurateCastOrNull(trim(BOTH '"' from currency), '{{ dbt_utils.type_string() }}'), 'null') as currency, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql index d6593d4eb8f1a..c45103fae85c5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('exchange_rate_ab2') }} select {{ dbt_utils.surrogate_key([ 'id', diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql index 182bcd7dbfd4f..909b7bd2366b6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as name, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql index 3769adf4d02e0..0b9192b2620a4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('pos_dedup_cdcx_ab1') }} select accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, nullif(accurateCastOrNull(trim(BOTH '"' from name), '{{ dbt_utils.type_string() }}'), 'null') as name, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql index 4504a7bbffa32..a09668e69387e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, _airbyte_ab_id, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql index 8b248db9590f7..2fd528509bc5a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} select accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, _airbyte_ab_id, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index eca9d38763a06..eedb5184f0a89 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -1,15 +1,17 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", + post_hook = ['drop view _airbyte_test_normalization.dedup_cdc_excluded_stg'], tags = [ "top-level" ] ) }} +-- depends_on: ref('dedup_cdc_excluded_stg') with {% if is_incremental() %} new_data as ( -- retrieve incremental "new" data select * - from {{ ref('dedup_cdc_excluded_ab3') }} + from {{ ref('dedup_cdc_excluded_stg') }} -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} where 1 = 1 {{ incremental_clause('_airbyte_emitted_at') }} @@ -22,26 +24,30 @@ new_data_ids as ( ]) }} as _airbyte_unique_key from new_data ), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes select - {{ star_intersect(ref('dedup_cdc_excluded_ab3'), this, from_alias='inc_data', intersect_alias='this_data') }} + {{ star_intersect(ref('dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} from {{ this }} as this_data -- make a join with new_data using primary key to filter active data that need to be updated only join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) - --left join {{ ref('dedup_cdc_excluded_ab3') }} as inc_data on 1 = 0 + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + --left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_cdc_excluded_ab3')) }} from new_data + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data union all - select {{ dbt_utils.star(ref('dedup_cdc_excluded_ab3')) }} from previous_active_scd_data + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data ), {% else %} input_data as ( select * - from {{ ref('dedup_cdc_excluded_ab3') }} + from {{ ref('dedup_cdc_excluded_stg') }} -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} ), {% endif %} @@ -68,15 +74,15 @@ scd_data as ( _ab_cdc_updated_at, _ab_cdc_deleted_at, _airbyte_emitted_at as _airbyte_start_at, + case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, anyOrNull(_airbyte_emitted_at) over ( partition by id order by _airbyte_emitted_at is null asc, _airbyte_emitted_at desc, _airbyte_emitted_at desc, _ab_cdc_updated_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING ) as _airbyte_end_at, - case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_cdc_excluded_hashid @@ -88,7 +94,7 @@ dedup_data as ( -- additionally, we generate a unique key for the scd table row_number() over ( partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_string() }}') - order by _airbyte_ab_id + order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ '_airbyte_unique_key', diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 97757d03ce77d..13744503505c2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -1,15 +1,17 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", + post_hook = ['drop view _airbyte_test_normalization.dedup_exchange_rate_stg'], tags = [ "top-level" ] ) }} +-- depends_on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} new_data as ( -- retrieve incremental "new" data select * - from {{ ref('dedup_exchange_rate_ab3') }} + from {{ ref('dedup_exchange_rate_stg') }} -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 {{ incremental_clause('_airbyte_emitted_at') }} @@ -24,26 +26,30 @@ new_data_ids as ( ]) }} as _airbyte_unique_key from new_data ), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes select - {{ star_intersect(ref('dedup_exchange_rate_ab3'), this, from_alias='inc_data', intersect_alias='this_data') }} + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} from {{ this }} as this_data -- make a join with new_data using primary key to filter active data that need to be updated only join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) - --left join {{ ref('dedup_exchange_rate_ab3') }} as inc_data on 1 = 0 + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + --left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_ab3')) }} from new_data + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_ab3')) }} from previous_active_scd_data + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), {% else %} input_data as ( select * - from {{ ref('dedup_exchange_rate_ab3') }} + from {{ ref('dedup_exchange_rate_stg') }} -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} ), {% endif %} @@ -75,15 +81,15 @@ scd_data as ( NZD, USD, date as _airbyte_start_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, anyOrNull(date) over ( partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) order by date is null asc, date desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING ) as _airbyte_end_at, - case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_exchange_rate_hashid @@ -95,7 +101,7 @@ dedup_data as ( -- additionally, we generate a unique key for the scd table row_number() over ( partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at - order by _airbyte_ab_id + order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ '_airbyte_unique_key', diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index c0dcee2b2ccbb..525bee19a04f1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -1,15 +1,17 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", + post_hook = ['drop view _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg'], tags = [ "top-level" ] ) }} +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') with {% if is_incremental() %} new_data as ( -- retrieve incremental "new" data select * - from {{ ref('renamed_dedup_cdc_excluded_ab3') }} + from {{ ref('renamed_dedup_cdc_excluded_stg') }} -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} where 1 = 1 {{ incremental_clause('_airbyte_emitted_at') }} @@ -22,26 +24,30 @@ new_data_ids as ( ]) }} as _airbyte_unique_key from new_data ), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes select - {{ star_intersect(ref('renamed_dedup_cdc_excluded_ab3'), this, from_alias='inc_data', intersect_alias='this_data') }} + {{ star_intersect(ref('renamed_dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} from {{ this }} as this_data -- make a join with new_data using primary key to filter active data that need to be updated only join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) - --left join {{ ref('renamed_dedup_cdc_excluded_ab3') }} as inc_data on 1 = 0 + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + --left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_ab3')) }} from new_data + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data union all - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_ab3')) }} from previous_active_scd_data + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data ), {% else %} input_data as ( select * - from {{ ref('renamed_dedup_cdc_excluded_ab3') }} + from {{ ref('renamed_dedup_cdc_excluded_stg') }} -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} ), {% endif %} @@ -64,15 +70,15 @@ scd_data as ( ]) }} as _airbyte_unique_key, id, _airbyte_emitted_at as _airbyte_start_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, anyOrNull(_airbyte_emitted_at) over ( partition by id order by _airbyte_emitted_at is null asc, _airbyte_emitted_at desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING ) as _airbyte_end_at, - case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_renamed_dedup_cdc_excluded_hashid @@ -84,7 +90,7 @@ dedup_data as ( -- additionally, we generate a unique key for the scd table row_number() over ( partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at - order by _airbyte_ab_id + order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ '_airbyte_unique_key', diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql index 086676f173719..6a6248e7cb6a8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -4,6 +4,7 @@ tags = [ "top-level" ] ) }} -- Final base SQL model +-- depends_on: {{ ref('dedup_cdc_excluded_scd') }} select _airbyte_unique_key, id, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index 3fb8238f2479b..180310a437ff6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -4,6 +4,7 @@ tags = [ "top-level" ] ) }} -- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} select _airbyte_unique_key, id, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index 8a8ff85f59024..d9f20813f833e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -4,6 +4,7 @@ tags = [ "top-level" ] ) }} -- Final base SQL model +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_scd') }} select _airbyte_unique_key, id, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql index c370f10264a3f..77fba32c34999 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -4,6 +4,7 @@ tags = [ "top-level" ] ) }} -- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} select id, currency, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql deleted file mode 100644 index 446204f691ebc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record -select - {{ dbt_utils.surrogate_key([ - 'id', - 'name', - '_ab_cdc_lsn', - '_ab_cdc_updated_at', - '_ab_cdc_deleted_at', - ]) }} as _airbyte_dedup_cdc_excluded_hashid, - tmp.* -from {{ ref('dedup_cdc_excluded_ab2') }} tmp --- dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql similarity index 92% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index b14a1fb639b1b..0b4900731039d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -4,6 +4,7 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} select {{ dbt_utils.surrogate_key([ 'id', diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql deleted file mode 100644 index dbe0c313b238b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record -select - {{ dbt_utils.surrogate_key([ - 'id', - 'name', - '_ab_cdc_lsn', - '_ab_cdc_updated_at', - '_ab_cdc_deleted_at', - '_ab_cdc_log_pos', - ]) }} as _airbyte_pos_dedup_cdcx_hashid, - tmp.* -from {{ ref('pos_dedup_cdcx_ab2') }} tmp --- pos_dedup_cdcx -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql deleted file mode 100644 index 2356b929f1f38..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql +++ /dev/null @@ -1,16 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record -select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_renamed_dedup_cdc_excluded_hashid, - tmp.* -from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp --- renamed_dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql index 0eb15bc43e455..2ee3d293b8403 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -14,6 +14,7 @@ with __dbt__cte__exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_exchange_rate select JSONExtractRaw(_airbyte_data, 'id') as id, JSONExtractRaw(_airbyte_data, 'currency') as currency, @@ -33,6 +34,7 @@ where 1 = 1 ), __dbt__cte__exchange_rate_ab2 as ( -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 select accurateCastOrNull(id, ' BIGINT @@ -60,6 +62,7 @@ where 1 = 1 ), __dbt__cte__exchange_rate_ab3 as ( -- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 select assumeNotNull(hex(MD5( @@ -95,6 +98,7 @@ from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate where 1 = 1 )-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 select id, currency, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql deleted file mode 100644 index fe2bf632dbf20..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql +++ /dev/null @@ -1,71 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_cdc_excluded_ab3__dbt_tmp - - as ( - -with __dbt__cte__dedup_cdc_excluded_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'name') as name, - JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, - JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias --- dedup_cdc_excluded -where 1 = 1 - -), __dbt__cte__dedup_cdc_excluded_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, - accurateCastOrNull(_ab_cdc_lsn, ' - Float64 -') as _ab_cdc_lsn, - accurateCastOrNull(_ab_cdc_updated_at, ' - Float64 -') as _ab_cdc_updated_at, - accurateCastOrNull(_ab_cdc_deleted_at, ' - Float64 -') as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__dedup_cdc_excluded_ab1 --- dedup_cdc_excluded -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record -select - assumeNotNull(hex(MD5( - - toString(id) || '~' || - - - toString(name) || '~' || - - - toString(_ab_cdc_lsn) || '~' || - - - toString(_ab_cdc_updated_at) || '~' || - - - toString(_ab_cdc_deleted_at) - - ))) as _airbyte_dedup_cdc_excluded_hashid, - tmp.* -from __dbt__cte__dedup_cdc_excluded_ab2 tmp --- dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql similarity index 91% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index 28abd1a79a7f2..799af4ec78aba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -1,12 +1,13 @@ - create view _airbyte_test_normalization.dedup_exchange_rate_ab3__dbt_tmp + create view _airbyte_test_normalization.dedup_exchange_rate_stg__dbt_tmp as ( with __dbt__cte__dedup_exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate select JSONExtractRaw(_airbyte_data, 'id') as id, JSONExtractRaw(_airbyte_data, 'currency') as currency, @@ -26,6 +27,7 @@ where 1 = 1 ), __dbt__cte__dedup_exchange_rate_ab2 as ( -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 select accurateCastOrNull(id, ' BIGINT @@ -51,6 +53,7 @@ from __dbt__cte__dedup_exchange_rate_ab1 where 1 = 1 )-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 select assumeNotNull(hex(MD5( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql deleted file mode 100644 index 9f515f09a4a44..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql +++ /dev/null @@ -1,78 +0,0 @@ - - - create view _airbyte_test_normalization.pos_dedup_cdcx_ab3__dbt_tmp - - as ( - -with __dbt__cte__pos_dedup_cdcx_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'name') as name, - JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, - JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias --- pos_dedup_cdcx -where 1 = 1 - -), __dbt__cte__pos_dedup_cdcx_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, - accurateCastOrNull(_ab_cdc_lsn, ' - Float64 -') as _ab_cdc_lsn, - accurateCastOrNull(_ab_cdc_updated_at, ' - Float64 -') as _ab_cdc_updated_at, - accurateCastOrNull(_ab_cdc_deleted_at, ' - Float64 -') as _ab_cdc_deleted_at, - accurateCastOrNull(_ab_cdc_log_pos, ' - Float64 -') as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__pos_dedup_cdcx_ab1 --- pos_dedup_cdcx -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record -select - assumeNotNull(hex(MD5( - - toString(id) || '~' || - - - toString(name) || '~' || - - - toString(_ab_cdc_lsn) || '~' || - - - toString(_ab_cdc_updated_at) || '~' || - - - toString(_ab_cdc_deleted_at) || '~' || - - - toString(_ab_cdc_log_pos) - - ))) as _airbyte_pos_dedup_cdcx_hashid, - tmp.* -from __dbt__cte__pos_dedup_cdcx_ab2 tmp --- pos_dedup_cdcx -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql deleted file mode 100644 index 43c5b8ad9e18a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql +++ /dev/null @@ -1,45 +0,0 @@ - - - create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab3__dbt_tmp - - as ( - -with __dbt__cte__renamed_dedup_cdc_excluded_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias --- renamed_dedup_cdc_excluded -where 1 = 1 - -), __dbt__cte__renamed_dedup_cdc_excluded_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__renamed_dedup_cdc_excluded_ab1 --- renamed_dedup_cdc_excluded -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record -select - assumeNotNull(hex(MD5( - - toString(id) - - ))) as _airbyte_renamed_dedup_cdc_excluded_hashid, - tmp.* -from __dbt__cte__renamed_dedup_cdc_excluded_ab2 tmp --- renamed_dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index e41b683b6b1bc..1a8993ddf8cc3 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -649,7 +649,144 @@ def safe_cast_to_string(definition: Dict, column_name: str, destination_type: De return col def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> str: - scd_sql_template = """ + order_null = "is null asc" + if self.destination_type.value == DestinationType.ORACLE.value: + order_null = "asc nulls last" + if self.destination_type.value == DestinationType.MSSQL.value: + # SQL Server treats NULL values as the lowest values, then sorted in ascending order, NULLs come first. + order_null = "desc" + + lag_begin = "lag" + lag_end = "" + input_data_table = "input_data" + if self.destination_type == DestinationType.CLICKHOUSE: + # ClickHouse doesn't support lag() yet, this is a workaround solution + # Ref: https://clickhouse.com/docs/en/sql-reference/window-functions/ + lag_begin = "anyOrNull" + lag_end = "ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING" + input_data_table = "input_data_with_active_row_num" + + enable_left_join_null = "" + cast_begin = "cast(" + cast_as = " as " + cast_end = ")" + if self.destination_type == DestinationType.CLICKHOUSE: + enable_left_join_null = "--" + cast_begin = "accurateCastOrNull(" + cast_as = ", '" + cast_end = "')" + + # TODO move all cdc columns out of scd models + cdc_active_row_pattern = "" + cdc_updated_order_pattern = "" + cdc_cols = "" + quoted_cdc_cols = "" + if "_ab_cdc_deleted_at" in column_names.keys(): + col_cdc_deleted_at = self.name_transformer.normalize_column_name("_ab_cdc_deleted_at") + col_cdc_updated_at = self.name_transformer.normalize_column_name("_ab_cdc_updated_at") + quoted_col_cdc_deleted_at = self.name_transformer.normalize_column_name("_ab_cdc_deleted_at", in_jinja=True) + quoted_col_cdc_updated_at = self.name_transformer.normalize_column_name("_ab_cdc_updated_at", in_jinja=True) + cdc_active_row_pattern = f" and {col_cdc_deleted_at} is null" + cdc_updated_order_pattern = f", {col_cdc_updated_at} desc" + cdc_cols = ( + f", {cast_begin}{col_cdc_deleted_at}{cast_as}" + + "{{ dbt_utils.type_string() }}" + + f"{cast_end}" + + f", {cast_begin}{col_cdc_updated_at}{cast_as}" + + "{{ dbt_utils.type_string() }}" + + f"{cast_end}" + ) + quoted_cdc_cols = f", {quoted_col_cdc_deleted_at}, {quoted_col_cdc_updated_at}" + + if "_ab_cdc_log_pos" in column_names.keys(): + col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos") + quoted_col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos", in_jinja=True) + cdc_updated_order_pattern += f", {col_cdc_log_pos} desc" + cdc_cols += f", {cast_begin}{col_cdc_log_pos}{cast_as}" + "{{ dbt_utils.type_string() }}" + f"{cast_end}" + quoted_cdc_cols += f", {quoted_col_cdc_log_pos}" + + jinja_variables = { + "active_row": self.name_transformer.normalize_column_name("_airbyte_active_row"), + "airbyte_end_at": self.name_transformer.normalize_column_name("_airbyte_end_at"), + "airbyte_row_num": self.name_transformer.normalize_column_name("_airbyte_row_num"), + "airbyte_start_at": self.name_transformer.normalize_column_name("_airbyte_start_at"), + "airbyte_unique_key_scd": self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), + "cdc_active_row": cdc_active_row_pattern, + "cdc_cols": cdc_cols, + "cdc_updated_at_order": cdc_updated_order_pattern, + "col_ab_id": self.get_ab_id(), + "col_emitted_at": self.get_emitted_at(), + "col_normalized_at": self.get_normalized_at(), + "cursor_field": self.get_cursor_field(column_names), + "enable_left_join_null": enable_left_join_null, + "fields": self.list_fields(column_names), + "from_table": from_table, + "hash_id": self.hash_id(), + "input_data_table": input_data_table, + "lag_begin": lag_begin, + "lag_end": lag_end, + "order_null": order_null, + "parent_hash_id": self.parent_hash_id(), + "primary_key_partition": self.get_primary_key_partition(column_names), + "primary_keys": self.list_primary_keys(column_names), + "quoted_airbyte_row_num": self.name_transformer.normalize_column_name("_airbyte_row_num", in_jinja=True), + "quoted_airbyte_start_at": self.name_transformer.normalize_column_name("_airbyte_start_at", in_jinja=True), + "quoted_cdc_cols": quoted_cdc_cols, + "quoted_col_emitted_at": self.get_emitted_at(in_jinja=True), + "quoted_unique_key": self.get_unique_key(in_jinja=True), + "sql_table_comment": self.sql_table_comment(include_from_table=True), + "unique_key": self.get_unique_key(), + } + if self.destination_type == DestinationType.CLICKHOUSE: + clickhouse_active_row_sql = Template( + """ +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by {{ primary_key_partition | join(", ") }} + order by + {{ cursor_field }} {{ order_null }}, + {{ cursor_field }} desc, + {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + ) as _airbyte_active_row_num + from input_data +),""" + ).render(jinja_variables) + jinja_variables["clickhouse_active_row_sql"] = clickhouse_active_row_sql + scd_columns_sql = Template( + """ + case when _airbyte_active_row_num = 1{{ cdc_active_row }} then 1 else 0 end as {{ active_row }}, + {{ lag_begin }}({{ cursor_field }}) over ( + partition by {{ primary_key_partition | join(", ") }} + order by + {{ cursor_field }} {{ order_null }}, + {{ cursor_field }} desc, + {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + {{ lag_end }} + ) as {{ airbyte_end_at }}""" + ).render(jinja_variables) + jinja_variables["scd_columns_sql"] = scd_columns_sql + else: + scd_columns_sql = Template( + """ + lag({{ cursor_field }}) over ( + partition by {{ primary_key_partition | join(", ") }} + order by + {{ cursor_field }} {{ order_null }}, + {{ cursor_field }} desc, + {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + ) as {{ airbyte_end_at }}, + case when row_number() over ( + partition by {{ primary_key_partition | join(", ") }} + order by + {{ cursor_field }} {{ order_null }}, + {{ cursor_field }} desc, + {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + ) = 1{{ cdc_active_row }} then 1 else 0 end as {{ active_row }}""" + ).render(jinja_variables) + jinja_variables["scd_columns_sql"] = scd_columns_sql + sql = Template( + """ -- depends_on: {{ from_table }} with {{ '{% if is_incremental() %}' }} @@ -699,17 +836,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup {{ sql_table_comment }} ), {{ '{% endif %}' }} -input_data_with_active_row_num as ( - select *, - row_number() over ( - partition by {{ primary_key_partition | join(", ") }} - order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} - ) as _airbyte_active_row_num - from input_data -), +{{ clickhouse_active_row_sql }} scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select @@ -725,19 +852,11 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup {{ field }}, {%- endfor %} {{ cursor_field }} as {{ airbyte_start_at }}, - {{ lag_begin }}({{ cursor_field }}) over ( - partition by {{ primary_key_partition | join(", ") }} - order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} - {{ lag_end }} - ) as {{ airbyte_end_at }}, - case when _airbyte_active_row_num = 1{{ cdc_active_row }} then 1 else 0 end as {{ active_row }}, + {{ scd_columns_sql }}, {{ col_ab_id }}, {{ col_emitted_at }}, {{ hash_id }} - from input_data_with_active_row_num + from {{ input_data_table }} ), dedup_data as ( select @@ -772,94 +891,8 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }}, {{ hash_id }} from dedup_data where {{ airbyte_row_num }} = 1 - """ - template = Template(scd_sql_template) - - order_null = "is null asc" - if self.destination_type.value == DestinationType.ORACLE.value: - order_null = "asc nulls last" - if self.destination_type.value == DestinationType.MSSQL.value: - # SQL Server treats NULL values as the lowest values, then sorted in ascending order, NULLs come first. - order_null = "desc" - - lag_begin = "lag" - lag_end = "" - if self.destination_type == DestinationType.CLICKHOUSE: - # ClickHouse doesn't support lag() yet, this is a workaround solution - # Ref: https://clickhouse.com/docs/en/sql-reference/window-functions/ - lag_begin = "anyOrNull" - lag_end = "ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING" - - enable_left_join_null = "" - cast_begin = "cast(" - cast_as = " as " - cast_end = ")" - if self.destination_type == DestinationType.CLICKHOUSE: - enable_left_join_null = "--" - cast_begin = "accurateCastOrNull(" - cast_as = ", '" - cast_end = "')" - - # TODO move all cdc columns out of scd models - cdc_active_row_pattern = "" - cdc_updated_order_pattern = "" - cdc_cols = "" - quoted_cdc_cols = "" - if "_ab_cdc_deleted_at" in column_names.keys(): - col_cdc_deleted_at = self.name_transformer.normalize_column_name("_ab_cdc_deleted_at") - col_cdc_updated_at = self.name_transformer.normalize_column_name("_ab_cdc_updated_at") - quoted_col_cdc_deleted_at = self.name_transformer.normalize_column_name("_ab_cdc_deleted_at", in_jinja=True) - quoted_col_cdc_updated_at = self.name_transformer.normalize_column_name("_ab_cdc_updated_at", in_jinja=True) - cdc_active_row_pattern = f" and {col_cdc_deleted_at} is null" - cdc_updated_order_pattern = f", {col_cdc_updated_at} desc" - cdc_cols = ( - f", {cast_begin}{col_cdc_deleted_at}{cast_as}" - + "{{ dbt_utils.type_string() }}" - + f"{cast_end}" - + f", {cast_begin}{col_cdc_updated_at}{cast_as}" - + "{{ dbt_utils.type_string() }}" - + f"{cast_end}" - ) - quoted_cdc_cols = f", {quoted_col_cdc_deleted_at}, {quoted_col_cdc_updated_at}" - - if "_ab_cdc_log_pos" in column_names.keys(): - col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos") - quoted_col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos", in_jinja=True) - cdc_updated_order_pattern += f", {col_cdc_log_pos} desc" - cdc_cols += f", {cast_begin}{col_cdc_log_pos}{cast_as}" + "{{ dbt_utils.type_string() }}" + f"{cast_end}" - quoted_cdc_cols += f", {quoted_col_cdc_log_pos}" - - sql = template.render( - order_null=order_null, - airbyte_start_at=self.name_transformer.normalize_column_name("_airbyte_start_at"), - quoted_airbyte_start_at=self.name_transformer.normalize_column_name("_airbyte_start_at", in_jinja=True), - airbyte_end_at=self.name_transformer.normalize_column_name("_airbyte_end_at"), - active_row=self.name_transformer.normalize_column_name("_airbyte_active_row"), - airbyte_row_num=self.name_transformer.normalize_column_name("_airbyte_row_num"), - quoted_airbyte_row_num=self.name_transformer.normalize_column_name("_airbyte_row_num", in_jinja=True), - airbyte_unique_key_scd=self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), - unique_key=self.get_unique_key(), - quoted_unique_key=self.get_unique_key(in_jinja=True), - col_ab_id=self.get_ab_id(), - col_emitted_at=self.get_emitted_at(), - quoted_col_emitted_at=self.get_emitted_at(in_jinja=True), - col_normalized_at=self.get_normalized_at(), - parent_hash_id=self.parent_hash_id(), - fields=self.list_fields(column_names), - cursor_field=self.get_cursor_field(column_names), - primary_keys=self.list_primary_keys(column_names), - primary_key_partition=self.get_primary_key_partition(column_names), - hash_id=self.hash_id(), - from_table=from_table, - sql_table_comment=self.sql_table_comment(include_from_table=True), - cdc_active_row=cdc_active_row_pattern, - cdc_updated_at_order=cdc_updated_order_pattern, - cdc_cols=cdc_cols, - quoted_cdc_cols=quoted_cdc_cols, - lag_begin=lag_begin, - lag_end=lag_end, - enable_left_join_null=enable_left_join_null, - ) +""" + ).render(jinja_variables) return sql def get_cursor_field(self, column_names: Dict[str, Tuple[str, str]], in_jinja: bool = False) -> str: diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java index c2454395b6501..adcee9eb3b7a4 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java @@ -14,7 +14,7 @@ public class NormalizationRunnerFactory { public static final String BASE_NORMALIZATION_IMAGE_NAME = "airbyte/normalization"; - public static final String NORMALIZATION_VERSION = "0.1.61"; + public static final String NORMALIZATION_VERSION = "0.1.62"; static final Map> NORMALIZATION_MAPPING = ImmutableMap.>builder() diff --git a/build.gradle b/build.gradle index a6e51434a74c6..91393c72c57f0 100644 --- a/build.gradle +++ b/build.gradle @@ -82,6 +82,7 @@ def createSpotlessTarget = { pattern -> 'dbt-project-template-mssql', 'dbt-project-template-mysql', 'dbt-project-template-oracle', + 'dbt-project-template-clickhouse', 'dbt_test_config', 'normalization_test_output', 'tools', diff --git a/settings.gradle b/settings.gradle index 56da8a917448b..b50a3f1933dea 100644 --- a/settings.gradle +++ b/settings.gradle @@ -97,6 +97,7 @@ if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD" include ':airbyte-integrations:connectors:destination-snowflake' include ':airbyte-integrations:connectors:destination-oracle' include ':airbyte-integrations:connectors:destination-mssql' + include ':airbyte-integrations:connectors:destination-clickhouse' //Needed by destination-bigquery include ':airbyte-integrations:connectors:destination-s3' From 680e74cee9111fd36046a5c061570419396f7de5 Mon Sep 17 00:00:00 2001 From: ycherniaiev <94798230+ycherniaiev@users.noreply.github.com> Date: Wed, 5 Jan 2022 13:45:08 +0200 Subject: [PATCH 036/215] Update fields in source-connectors specifications: freshservice (#9143) --- .gitignore | 1 + .../src/main/resources/seed/source_definitions.yaml | 2 +- .../init/src/main/resources/seed/source_specs.yaml | 13 +++++++------ .../connectors/source-freshservice/Dockerfile | 2 +- .../source_freshservice/spec.json | 11 ++++++----- docs/integrations/sources/freshservice.md | 1 + 6 files changed, 17 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 17b544c1d7e26..bacf3dc141f8d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ data .settings **/gmon.out static_checker_reports/ +.vscode # Logs acceptance_tests_logs/ diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 973f9fdee9f1e..7a288e95b2d9a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -210,7 +210,7 @@ - name: Freshservice sourceDefinitionId: 9bb85338-ea95-4c93-b267-6be89125b267 dockerRepository: airbyte/source-freshservice - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.1 documentationUrl: https://docs.airbyte.io/integrations/sources/freshservice icon: freshdesk.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 0195aa75a728d..696cdec25608f 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1947,9 +1947,9 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-freshservice:0.1.0" +- dockerImage: "airbyte/source-freshservice:0.1.1" spec: - documentationUrl: "https://hub.docker.com/r/airbyte/source-freshservice" + documentationUrl: "https://docs.airbyte.io/integrations/sources/freshservice" connectionSpecification: $schema: "http://json-schema.org/draft-07/schema#" title: "Freshservice Spec" @@ -1962,17 +1962,18 @@ properties: domain_name: type: "string" - description: "Freshservice domain" + title: "Domain Name" + description: "The name of your Freshservice domain" examples: - "mydomain.freshservice.com" api_key: - title: "Api Key" + title: "API Key" type: "string" - description: "Your API Access Key. See here. The key is case sensitive." airbyte_secret: true start_date: - title: "Replication Start Date" + title: "Start Date" type: "string" description: "UTC date and time in the format 2020-10-01T00:00:00Z. Any\ \ data before this date will not be replicated." diff --git a/airbyte-integrations/connectors/source-freshservice/Dockerfile b/airbyte-integrations/connectors/source-freshservice/Dockerfile index 0d92e17cda097..857bf236df892 100644 --- a/airbyte-integrations/connectors/source-freshservice/Dockerfile +++ b/airbyte-integrations/connectors/source-freshservice/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-freshservice diff --git a/airbyte-integrations/connectors/source-freshservice/source_freshservice/spec.json b/airbyte-integrations/connectors/source-freshservice/source_freshservice/spec.json index 3bd93bc087c05..05f99811d50b2 100644 --- a/airbyte-integrations/connectors/source-freshservice/source_freshservice/spec.json +++ b/airbyte-integrations/connectors/source-freshservice/source_freshservice/spec.json @@ -1,5 +1,5 @@ { - "documentationUrl": "https://hub.docker.com/r/airbyte/source-freshservice", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/freshservice", "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Freshservice Spec", @@ -9,17 +9,18 @@ "properties": { "domain_name": { "type": "string", - "description": "Freshservice domain", + "title": "Domain Name", + "description": "The name of your Freshservice domain", "examples": ["mydomain.freshservice.com"] }, "api_key": { - "title": "Api Key", + "title": "API Key", "type": "string", - "description": "Your API Access Key. See here. The key is case sensitive.", + "description": "Freshservice API Key. See here. The key is case sensitive.", "airbyte_secret": true }, "start_date": { - "title": "Replication Start Date", + "title": "Start Date", "type": "string", "description": "UTC date and time in the format 2020-10-01T00:00:00Z. Any data before this date will not be replicated.", "examples": ["2020-10-01T00:00:00Z"], diff --git a/docs/integrations/sources/freshservice.md b/docs/integrations/sources/freshservice.md index 10b908c43a231..0133f3244a3d5 100644 --- a/docs/integrations/sources/freshservice.md +++ b/docs/integrations/sources/freshservice.md @@ -53,4 +53,5 @@ Please read [How to find your API key](https://api.freshservice.com/#authenticat | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.1 | 2021-12-28 | [9143](https://github.com/airbytehq/airbyte/pull/9143) | Update titles and descriptions | | 0.1.0 | 2021-10-29 | [6967](https://github.com/airbytehq/airbyte/pull/6967) | 🎉 New Source: Freshservice | From 74cb28e0f6aa037c48cb520911e3780107fb18dd Mon Sep 17 00:00:00 2001 From: Iryna Grankova <87977540+igrankova@users.noreply.github.com> Date: Wed, 5 Jan 2022 15:47:41 +0200 Subject: [PATCH 037/215] Update fields in source-connectors specifications: mssql, mssql-strict-encrypt (#9206) --- .../b5ea17b1-f170-46dc-bc31-cc744ca984c1.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 28 +++++++++++-------- .../source-mssql-strict-encrypt/Dockerfile | 2 +- .../src/test/resources/expected_spec.json | 23 +++++++++------ .../connectors/source-mssql/Dockerfile | 2 +- .../source-mssql/src/main/resources/spec.json | 23 +++++++++------ docs/integrations/sources/mssql.md | 1 + 8 files changed, 50 insertions(+), 33 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json index 3c3036096d35e..0e8eaa23e5237 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "b5ea17b1-f170-46dc-bc31-cc744ca984c1", "name": "Microsoft SQL Server (MSSQL)", "dockerRepository": "airbyte/source-mssql", - "dockerImageTag": "0.3.11", + "dockerImageTag": "0.3.12", "documentationUrl": "https://docs.airbyte.io/integrations/sources/mssql", "icon": "mssql.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 7a288e95b2d9a..baceb95dab680 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -398,7 +398,7 @@ - name: Microsoft SQL Server (MSSQL) sourceDefinitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1 dockerRepository: airbyte/source-mssql - dockerImageTag: 0.3.11 + dockerImageTag: 0.3.12 documentationUrl: https://docs.airbyte.io/integrations/sources/mssql icon: mssql.svg sourceType: database diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 696cdec25608f..34760856f74b4 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3841,7 +3841,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-mssql:0.3.11" +- dockerImage: "airbyte/source-mssql:0.3.12" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/mssql" connectionSpecification: @@ -3856,31 +3856,37 @@ additionalProperties: false properties: host: - description: "Hostname of the database." + description: "The hostname of the database." + title: "Host" type: "string" port: - description: "Port of the database." + description: "The port of the database." + title: "Port" type: "integer" minimum: 0 maximum: 65536 examples: - "1433" database: - description: "Name of the database." + description: "The name of the database." + title: "Database" type: "string" examples: - "master" username: - description: "Username to use to access the database." + description: "The username which is used to access the database." + title: "Username" type: "string" password: - description: "Password associated with the username." + description: "The password associated with the username." + title: "Password" type: "string" airbyte_secret: true ssl_method: title: "SSL Method" type: "object" - description: "Encryption method to use when communicating with the database" + description: "The encryption method which is used when communicating with\ + \ the database." order: 6 oneOf: - title: "Unencrypted" @@ -3897,8 +3903,8 @@ default: "unencrypted" - title: "Encrypted (trust server certificate)" additionalProperties: false - description: "Use the cert provided by the server without verification.\ - \ (For testing purposes only!)" + description: "Use the certificate provided by the server without verification.\ + \ (For testing purposes only!)" required: - "ssl_method" properties: @@ -3910,7 +3916,7 @@ default: "encrypted_trust_server_certificate" - title: "Encrypted (verify certificate)" additionalProperties: false - description: "Verify and use the cert provided by the server." + description: "Verify and use the certificate provided by the server." required: - "ssl_method" - "trustStoreName" @@ -3931,7 +3937,7 @@ replication_method: type: "string" title: "Replication Method" - description: "Replication method to use for extracting data from the database.\ + description: "The replication method used for extracting data from the database.\ \ STANDARD replication requires no setup on the DB side but will not be\ \ able to represent deletions incrementally. CDC uses {TBC} to detect\ \ inserts, updates, and deletes. This needs to be configured on the source\ diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile index f1a9b336fb554..daaf6dd152332 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-mssql-strict-encrypt COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.4 +LABEL io.airbyte.version=0.1.5 LABEL io.airbyte.name=airbyte/source-mssql-strict-encrypt diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/resources/expected_spec.json index 87fff41abf97d..46ae43cf7426d 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/resources/expected_spec.json +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/resources/expected_spec.json @@ -8,40 +8,45 @@ "additionalProperties": false, "properties": { "host": { - "description": "Hostname of the database.", + "description": "The hostname of the database.", + "title": "Host", "type": "string" }, "port": { - "description": "Port of the database.", + "description": "The port of the database.", + "title": "Port", "type": "integer", "minimum": 0, "maximum": 65536, "examples": ["1433"] }, "database": { - "description": "Name of the database.", + "description": "The name of the database.", + "title": "Database", "type": "string", "examples": ["master"] }, "username": { - "description": "Username to use to access the database.", + "description": "The username which is used to access the database.", + "title": "Username", "type": "string" }, "password": { - "description": "Password associated with the username.", + "description": "The password associated with the username.", + "title": "Password", "type": "string", "airbyte_secret": true }, "ssl_method": { "title": "SSL Method", "type": "object", - "description": "Encryption method to use when communicating with the database", + "description": "The encryption method which is used when communicating with the database.", "order": 6, "oneOf": [ { "title": "Encrypted (trust server certificate)", "additionalProperties": false, - "description": "Use the cert provided by the server without verification. (For testing purposes only!)", + "description": "Use the certificate provided by the server without verification. (For testing purposes only!)", "required": ["ssl_method"], "properties": { "ssl_method": { @@ -55,7 +60,7 @@ { "title": "Encrypted (verify certificate)", "additionalProperties": false, - "description": "Verify and use the cert provided by the server.", + "description": "Verify and use the certificate provided by the server.", "required": ["ssl_method", "trustStoreName", "trustStorePassword"], "properties": { "ssl_method": { @@ -77,7 +82,7 @@ "replication_method": { "type": "string", "title": "Replication Method", - "description": "Replication method to use for extracting data from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. CDC uses {TBC} to detect inserts, updates, and deletes. This needs to be configured on the source database itself.", + "description": "The replication method used for extracting data from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. CDC uses {TBC} to detect inserts, updates, and deletes. This needs to be configured on the source database itself.", "default": "STANDARD", "enum": ["STANDARD", "CDC"] } diff --git a/airbyte-integrations/connectors/source-mssql/Dockerfile b/airbyte-integrations/connectors/source-mssql/Dockerfile index 344279f0d5e49..8b6be50732f10 100644 --- a/airbyte-integrations/connectors/source-mssql/Dockerfile +++ b/airbyte-integrations/connectors/source-mssql/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-mssql COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.3.11 +LABEL io.airbyte.version=0.3.12 LABEL io.airbyte.name=airbyte/source-mssql diff --git a/airbyte-integrations/connectors/source-mssql/src/main/resources/spec.json b/airbyte-integrations/connectors/source-mssql/src/main/resources/spec.json index 26318a1beb39a..bfbd1d5276875 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-mssql/src/main/resources/spec.json @@ -8,34 +8,39 @@ "additionalProperties": false, "properties": { "host": { - "description": "Hostname of the database.", + "description": "The hostname of the database.", + "title": "Host", "type": "string" }, "port": { - "description": "Port of the database.", + "description": "The port of the database.", + "title": "Port", "type": "integer", "minimum": 0, "maximum": 65536, "examples": ["1433"] }, "database": { - "description": "Name of the database.", + "description": "The name of the database.", + "title": "Database", "type": "string", "examples": ["master"] }, "username": { - "description": "Username to use to access the database.", + "description": "The username which is used to access the database.", + "title": "Username", "type": "string" }, "password": { - "description": "Password associated with the username.", + "description": "The password associated with the username.", + "title": "Password", "type": "string", "airbyte_secret": true }, "ssl_method": { "title": "SSL Method", "type": "object", - "description": "Encryption method to use when communicating with the database", + "description": "The encryption method which is used when communicating with the database.", "order": 6, "oneOf": [ { @@ -55,7 +60,7 @@ { "title": "Encrypted (trust server certificate)", "additionalProperties": false, - "description": "Use the cert provided by the server without verification. (For testing purposes only!)", + "description": "Use the certificate provided by the server without verification. (For testing purposes only!)", "required": ["ssl_method"], "properties": { "ssl_method": { @@ -69,7 +74,7 @@ { "title": "Encrypted (verify certificate)", "additionalProperties": false, - "description": "Verify and use the cert provided by the server.", + "description": "Verify and use the certificate provided by the server.", "required": ["ssl_method", "trustStoreName", "trustStorePassword"], "properties": { "ssl_method": { @@ -91,7 +96,7 @@ "replication_method": { "type": "string", "title": "Replication Method", - "description": "Replication method to use for extracting data from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. CDC uses {TBC} to detect inserts, updates, and deletes. This needs to be configured on the source database itself.", + "description": "The replication method used for extracting data from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. CDC uses {TBC} to detect inserts, updates, and deletes. This needs to be configured on the source database itself.", "default": "STANDARD", "enum": ["STANDARD", "CDC"] } diff --git a/docs/integrations/sources/mssql.md b/docs/integrations/sources/mssql.md index 4a38f515c1d85..fc7236880cae0 100644 --- a/docs/integrations/sources/mssql.md +++ b/docs/integrations/sources/mssql.md @@ -294,6 +294,7 @@ If you do not see a type in this list, assume that it is coerced into a string. | Version | Date | Pull Request | Subject | | |:--------| :--- | :--- | :--- | :-- | +| 0.3.12 | 2021-12-30 | [9206](https://github.com/airbytehq/airbyte/pull/9206) | Update connector fields title/description | | 0.3.11 | 2021-12-24 | [8958](https://github.com/airbytehq/airbyte/pull/8958) | Add support for JdbcType.ARRAY | | 0.3.10 | 2021-12-01 | [8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | | | 0.3.9 | 2021-11-09 | [7386](https://github.com/airbytehq/airbyte/pull/7386) | Improve support for binary and varbinary data types | | From d418f9cd4b84924937af8fcaf2e75c038cbe88be Mon Sep 17 00:00:00 2001 From: Iryna Grankova <87977540+igrankova@users.noreply.github.com> Date: Wed, 5 Jan 2022 16:17:00 +0200 Subject: [PATCH 038/215] Update fields in source-connectors specifications: notion (#9207) --- .../6e00b415-b02e-4160-bf02-58176a0ae687.json | 2 +- .../src/main/resources/seed/source_definitions.yaml | 2 +- .../init/src/main/resources/seed/source_specs.yaml | 4 +++- .../connectors/source-notion/Dockerfile | 2 +- .../connectors/source-notion/README.md | 3 ++- .../source-notion/source_notion/schemas/pages.json | 12 ++++++++++-- .../source_notion/schemas/shared/date.json | 7 ++++--- .../connectors/source-notion/source_notion/spec.json | 2 ++ docs/integrations/sources/notion.md | 1 + 9 files changed, 25 insertions(+), 10 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/6e00b415-b02e-4160-bf02-58176a0ae687.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/6e00b415-b02e-4160-bf02-58176a0ae687.json index a788dd9dc6f32..59bee7cfe8fb0 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/6e00b415-b02e-4160-bf02-58176a0ae687.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/6e00b415-b02e-4160-bf02-58176a0ae687.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "6e00b415-b02e-4160-bf02-58176a0ae687", "name": "Notion", "dockerRepository": "airbyte/source-notion", - "dockerImageTag": "0.1.0", + "dockerImageTag": "0.1.1", "documentationUrl": "https://hub.docker.com/r/airbyte/source-notion", "icon": "notion.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index baceb95dab680..71f2ec237cb68 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -447,7 +447,7 @@ - name: Notion sourceDefinitionId: 6e00b415-b02e-4160-bf02-58176a0ae687 dockerRepository: airbyte/source-notion - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.1 documentationUrl: https://docs.airbyte.io/integrations/sources/notion icon: notion.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 34760856f74b4..01437c10c03d1 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -4716,7 +4716,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-notion:0.1.0" +- dockerImage: "airbyte/source-notion:0.1.1" spec: documentationUrl: "https://docsurl.com" connectionSpecification: @@ -4730,11 +4730,13 @@ properties: access_token: type: "string" + title: "Access Token" description: "Notion API access token, see the docs for more information on how to obtain this token." airbyte_secret: true start_date: type: "string" + title: "Start Date" description: "The date from which you'd like to replicate data for Notion\ \ API, in the format YYYY-MM-DDT00:00:00.000Z. All data generated after\ \ this date will be replicated." diff --git a/airbyte-integrations/connectors/source-notion/Dockerfile b/airbyte-integrations/connectors/source-notion/Dockerfile index c5a925d1a3ed1..9e121a38fd34d 100644 --- a/airbyte-integrations/connectors/source-notion/Dockerfile +++ b/airbyte-integrations/connectors/source-notion/Dockerfile @@ -34,5 +34,5 @@ COPY source_notion ./source_notion ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-notion diff --git a/airbyte-integrations/connectors/source-notion/README.md b/airbyte-integrations/connectors/source-notion/README.md index 8d003a2c81f69..1ba66cbf88fb1 100644 --- a/airbyte-integrations/connectors/source-notion/README.md +++ b/airbyte-integrations/connectors/source-notion/README.md @@ -102,7 +102,8 @@ Customize `acceptance-test-config.yml` file to configure tests. See [Source Acce If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. To run your integration tests with acceptance tests, from the connector root, run ``` -python -m pytest integration_tests -p integration_tests.acceptance +docker build . --no-cache -t airbyte/source-notion:dev \ +&& python -m pytest -p source_acceptance_test.plugin ``` To run your integration tests with docker diff --git a/airbyte-integrations/connectors/source-notion/source_notion/schemas/pages.json b/airbyte-integrations/connectors/source-notion/source_notion/schemas/pages.json index ce59b0037eaed..be5124292e20a 100644 --- a/airbyte-integrations/connectors/source-notion/source_notion/schemas/pages.json +++ b/airbyte-integrations/connectors/source-notion/source_notion/schemas/pages.json @@ -29,13 +29,13 @@ }, "properties": { "type": "object", - "additionalProperties": false, + "additionalProperties": true, "patternProperties": { ".*": { "anyOf": [ { "type": "object", - "additionalProperties": false, + "additionalProperties": true, "patternProperties": { "^id$": { "type": "string" }, "^type$": { "enum": ["title", "rich_text"] }, @@ -80,6 +80,14 @@ "date": { "$ref": "date.json" } } }, + { + "type": "object", + "properties": { + "id": { "type": "string" }, + "type": { "enum": ["date"] }, + "date": { "$ref": "date.json" } + } + }, { "type": "object", "properties": { diff --git a/airbyte-integrations/connectors/source-notion/source_notion/schemas/shared/date.json b/airbyte-integrations/connectors/source-notion/source_notion/schemas/shared/date.json index 94f1e299c8bcd..18a4f13debaee 100644 --- a/airbyte-integrations/connectors/source-notion/source_notion/schemas/shared/date.json +++ b/airbyte-integrations/connectors/source-notion/source_notion/schemas/shared/date.json @@ -1,9 +1,10 @@ { "$schema": "http://json-schema.org/draft-04/schema#", "type": ["null", "object"], - "additionalProperties": false, + "additionalProperties": true, "properties": { - "start": { "type": "string" }, - "end": { "type": ["null", "string"] } + "start": { "type": ["null", "string"] }, + "end": { "type": ["null", "string"] }, + "time_zone": { "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-notion/source_notion/spec.json b/airbyte-integrations/connectors/source-notion/source_notion/spec.json index 4ab7b6db4d20a..b3029b1d5fbfd 100644 --- a/airbyte-integrations/connectors/source-notion/source_notion/spec.json +++ b/airbyte-integrations/connectors/source-notion/source_notion/spec.json @@ -9,11 +9,13 @@ "properties": { "access_token": { "type": "string", + "title": "Access Token", "description": "Notion API access token, see the docs for more information on how to obtain this token.", "airbyte_secret": true }, "start_date": { "type": "string", + "title": "Start Date", "description": "The date from which you'd like to replicate data for Notion API, in the format YYYY-MM-DDT00:00:00.000Z. All data generated after this date will be replicated.", "examples": ["2020-11-16T00:00:00.000Z"], "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3}Z$" diff --git a/docs/integrations/sources/notion.md b/docs/integrations/sources/notion.md index 006bf9a024767..474396e1a6211 100644 --- a/docs/integrations/sources/notion.md +++ b/docs/integrations/sources/notion.md @@ -57,6 +57,7 @@ Please register on Notion and follow this [docs](https://developers.notion.com/d | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.1 | 2021-12-30 | [9207](https://github.com/airbytehq/airbyte/pull/9207) | Update connector fields title/description | | 0.1.0 | 2021-10-17 | [7092](https://github.com/airbytehq/airbyte/pull/7092) | Initial Release | From bbd5fec7ed66edee995ca06f2c3d8e40fdd02534 Mon Sep 17 00:00:00 2001 From: ycherniaiev <94798230+ycherniaiev@users.noreply.github.com> Date: Wed, 5 Jan 2022 17:06:41 +0200 Subject: [PATCH 039/215] Update fields in source-connectors specifications: freshsales (#9101) --- .../eca08d79-7b92-4065-b7f3-79c14836ebe7.json | 2 +- .../src/main/resources/seed/source_definitions.yaml | 2 +- .../init/src/main/resources/seed/source_specs.yaml | 10 ++++++---- .../connectors/source-freshsales/Dockerfile | 2 +- .../source-freshsales/source_freshsales/spec.json | 8 +++++--- .../source-freshsales/unit_tests/test_source.py | 10 ++++++---- docs/integrations/sources/freshsales.md | 1 + 7 files changed, 21 insertions(+), 14 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eca08d79-7b92-4065-b7f3-79c14836ebe7.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eca08d79-7b92-4065-b7f3-79c14836ebe7.json index ca06a161adf3f..015d901132bc0 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eca08d79-7b92-4065-b7f3-79c14836ebe7.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eca08d79-7b92-4065-b7f3-79c14836ebe7.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "eca08d79-7b92-4065-b7f3-79c14836ebe7", "name": "Freshsales", "dockerRepository": "airbyte/source-freshsales", - "dockerImageTag": "0.1.0", + "dockerImageTag": "0.1.1", "documentationUrl": "https://docs.airbyte.io/integrations/sources/freshsales", "icon": "freshsales.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 71f2ec237cb68..2cd80d54091dd 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -203,7 +203,7 @@ - name: Freshsales sourceDefinitionId: eca08d79-7b92-4065-b7f3-79c14836ebe7 dockerRepository: airbyte/source-freshsales - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.1 documentationUrl: https://docs.airbyte.io/integrations/sources/freshsales icon: freshsales.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 01437c10c03d1..d3cd840965729 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1922,9 +1922,9 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-freshsales:0.1.0" +- dockerImage: "airbyte/source-freshsales:0.1.1" spec: - documentationUrl: "https://docsurl.com" + documentationUrl: "https://docs.airbyte.io/integrations/sources/freshsales" connectionSpecification: $schema: "http://json-schema.org/draft-07/schema#" title: "Freshsales Spec" @@ -1936,12 +1936,14 @@ properties: domain_name: type: "string" - description: "Freshsales domain" + title: "Domain Name" + description: "The Name of your Freshsales domain" examples: - "mydomain.myfreshworks.com" api_key: type: "string" - description: "Your API Access Key. See here. The key is case sensitive." airbyte_secret: true supportsNormalization: false diff --git a/airbyte-integrations/connectors/source-freshsales/Dockerfile b/airbyte-integrations/connectors/source-freshsales/Dockerfile index d7e7bc9102319..04a39200b2b18 100644 --- a/airbyte-integrations/connectors/source-freshsales/Dockerfile +++ b/airbyte-integrations/connectors/source-freshsales/Dockerfile @@ -34,5 +34,5 @@ COPY source_freshsales ./source_freshsales ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-freshsales diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/spec.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/spec.json index f4155198bc275..fee78c2b571c0 100644 --- a/airbyte-integrations/connectors/source-freshsales/source_freshsales/spec.json +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/spec.json @@ -1,5 +1,5 @@ { - "documentationUrl": "https://docsurl.com", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/freshsales", "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Freshsales Spec", @@ -9,12 +9,14 @@ "properties": { "domain_name": { "type": "string", - "description": "Freshsales domain", + "title": "Domain Name", + "description": "The Name of your Freshsales domain", "examples": ["mydomain.myfreshworks.com"] }, "api_key": { "type": "string", - "description": "Your API Access Key. See here. The key is case sensitive.", + "title": "API Key", + "description": "Freshsales API Key. See here. The key is case sensitive.", "airbyte_secret": true } } diff --git a/airbyte-integrations/connectors/source-freshsales/unit_tests/test_source.py b/airbyte-integrations/connectors/source-freshsales/unit_tests/test_source.py index 132f3c417ad17..0a07fc972adf0 100644 --- a/airbyte-integrations/connectors/source-freshsales/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-freshsales/unit_tests/test_source.py @@ -6,11 +6,13 @@ from source_freshsales.source import SourceFreshsales +# Test `test_check_connection` has been commented out, due to innactive test account. +# TODO: please uncomment this test, once the integration test account works again. -def test_check_connection(mocker, config): - source = SourceFreshsales() - logger_mock = MagicMock() - assert source.check_connection(logger_mock, config) == (True, None) +# def test_check_connection(mocker, config): +# source = SourceFreshsales() +# logger_mock = MagicMock() +# assert source.check_connection(logger_mock, config) == (True, None) def test_count_streams(mocker): diff --git a/docs/integrations/sources/freshsales.md b/docs/integrations/sources/freshsales.md index 62ef70ff7b457..2f6a3e92187ec 100644 --- a/docs/integrations/sources/freshsales.md +++ b/docs/integrations/sources/freshsales.md @@ -49,4 +49,5 @@ Please read [How to find your API key](https://crmsupport.freshworks.com/support | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.1 | 2021-12-24 | [9101](https://github.com/airbytehq/airbyte/pull/9101) | Update fields and descriptions | | 0.1.0 | 2021-11-03 | [6963](https://github.com/airbytehq/airbyte/pull/6963) | 🎉 New Source: Freshsales | From 0161dd673caeac56a0cb28c3f6df7d81e5cb3228 Mon Sep 17 00:00:00 2001 From: ycherniaiev <94798230+ycherniaiev@users.noreply.github.com> Date: Wed, 5 Jan 2022 17:49:42 +0200 Subject: [PATCH 040/215] Update fields in source-connectors specifications: google-search-console (#9186) Signed-off-by: Sergey Chvalyuk Co-authored-by: Sergey Chvalyuk --- .../main/resources/seed/source_definitions.yaml | 2 +- .../source-google-search-console/Dockerfile | 2 +- .../source_google_search_console/spec.json | 15 +++++++++------ .../integrations/sources/google-search-console.md | 1 + 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 2cd80d54091dd..1d360c7fca4de 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -252,7 +252,7 @@ - name: Google Search Console sourceDefinitionId: eb4c9e00-db83-4d63-a386-39cfa91012a8 dockerRepository: airbyte/source-google-search-console - dockerImageTag: 0.1.10 + dockerImageTag: 0.1.11 documentationUrl: https://docs.airbyte.io/integrations/sources/google-search-console icon: googlesearchconsole.svg sourceType: api diff --git a/airbyte-integrations/connectors/source-google-search-console/Dockerfile b/airbyte-integrations/connectors/source-google-search-console/Dockerfile index c14914f5360ea..ac0985737b872 100755 --- a/airbyte-integrations/connectors/source-google-search-console/Dockerfile +++ b/airbyte-integrations/connectors/source-google-search-console/Dockerfile @@ -13,5 +13,5 @@ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENV SENTRY_DSN "https://d4b03de0c4574c78999b8d58e55243dc@o1009025.ingest.sentry.io/6102835" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.10 +LABEL io.airbyte.version=0.1.11 LABEL io.airbyte.name=airbyte/source-google-search-console diff --git a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/spec.json b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/spec.json index 26e659b8a80f3..6951a331f66ff 100755 --- a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/spec.json +++ b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/spec.json @@ -12,20 +12,23 @@ "items": { "type": "string" }, + "title": "Site URLs", "description": "Website URLs property; do not include the domain-level property in the list", "examples": ["https://example1.com", "https://example2.com"], "order": 0 }, "start_date": { "type": "string", - "description": "The date from which you'd like to replicate data in the format YYYY-MM-DD.", + "title": "Start Date", + "description": "UTC date in the format 2017-01-25. Any data before this date will not be replicated.", "examples": ["2021-01-01"], "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", "order": 1 }, "end_date": { "type": "string", - "description": "The date from which you'd like to replicate data in the format YYYY-MM-DD. Must be greater or equal start_date field", + "title": "End Date", + "description": "UTC date in the format 2017-01-25. Any data after this date will not be replicated. Must be greater or equal to the Start Date field.", "examples": ["2021-12-12"], "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", "order": 2 @@ -55,25 +58,25 @@ "client_id": { "title": "Client ID", "type": "string", - "description": "The Client ID of your developer application", + "description": "The Client ID of your Google Search Console developer application.", "airbyte_secret": true }, "client_secret": { "title": "Client Secret", "type": "string", - "description": "The client secret of your developer application", + "description": "The Client Secret of your Google Search Console developer application.", "airbyte_secret": true }, "access_token": { "title": "Access Token", "type": "string", - "description": "An access token generated using the above client ID and secret", + "description": "Access Token for making authenticated requests.", "airbyte_secret": true }, "refresh_token": { "title": "Refresh Token", "type": "string", - "description": "A refresh token generated using the above client ID and secret", + "description": "The token for obtaining new access token.", "airbyte_secret": true } } diff --git a/docs/integrations/sources/google-search-console.md b/docs/integrations/sources/google-search-console.md index adc26eced7e52..6a0eb2da4c490 100644 --- a/docs/integrations/sources/google-search-console.md +++ b/docs/integrations/sources/google-search-console.md @@ -96,6 +96,7 @@ You should now be ready to use the Google Workspace Admin Reports API connector | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| `0.1.11` | 2022-01-05 | [9186](https://github.com/airbytehq/airbyte/pull/9186) | Update titles and descriptions | | `0.1.10` | 2021-12-23 | [9073](https://github.com/airbytehq/airbyte/pull/9073) | Add slicing by date range | | `0.1.9` | 2021-12-22 | [9047](https://github.com/airbytehq/airbyte/pull/9047) | Add 'order' to spec.json props | | `0.1.8` | 2021-12-21 | [8248](https://github.com/airbytehq/airbyte/pull/8248) | Enable Sentry for performance and errors tracking | From 8ace9eaccc3cbeeb6ed40b7cead633c517480552 Mon Sep 17 00:00:00 2001 From: Serhii Chvaliuk Date: Wed, 5 Jan 2022 18:40:13 +0200 Subject: [PATCH 041/215] =?UTF-8?q?=F0=9F=90=9BSource=20Google=20search=20?= =?UTF-8?q?console:=20Fix=20incremental=20sync:=20keep=20all=20urls=20in?= =?UTF-8?q?=20state=20object=20(#9194)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * bugfix: keep all urls in state object Signed-off-by: Sergey Chvalyuk --- .../eb4c9e00-db83-4d63-a386-39cfa91012a8.json | 2 +- .../src/main/resources/seed/source_specs.yaml | 25 ++++++++------- .../source_google_search_console/streams.py | 32 +++++++++++++------ .../unit_tests/unit_test.py | 16 ++++++++++ .../sources/google-search-console.md | 2 +- 5 files changed, 54 insertions(+), 23 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json index 4402251686201..034d07ac76926 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eb4c9e00-db83-4d63-a386-39cfa91012a8.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "eb4c9e00-db83-4d63-a386-39cfa91012a8", "name": "Google Search Console", "dockerRepository": "airbyte/source-google-search-console", - "dockerImageTag": "0.1.9", + "dockerImageTag": "0.1.11", "documentationUrl": "https://docs.airbyte.io/integrations/sources/google-search-console", "icon": "googlesearchconsole.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index d3cd840965729..06ea8ac2abcbb 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2458,7 +2458,7 @@ - - "client_secret" oauthFlowOutputParameters: - - "refresh_token" -- dockerImage: "airbyte/source-google-search-console:0.1.10" +- dockerImage: "airbyte/source-google-search-console:0.1.11" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/google-search-console" connectionSpecification: @@ -2475,6 +2475,7 @@ type: "array" items: type: "string" + title: "Site URLs" description: "Website URLs property; do not include the domain-level property\ \ in the list" examples: @@ -2483,16 +2484,18 @@ order: 0 start_date: type: "string" - description: "The date from which you'd like to replicate data in the format\ - \ YYYY-MM-DD." + title: "Start Date" + description: "UTC date in the format 2017-01-25. Any data before this date\ + \ will not be replicated." examples: - "2021-01-01" pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" order: 1 end_date: type: "string" - description: "The date from which you'd like to replicate data in the format\ - \ YYYY-MM-DD. Must be greater or equal start_date field" + title: "End Date" + description: "UTC date in the format 2017-01-25. Any data after this date\ + \ will not be replicated. Must be greater or equal to the Start Date field." examples: - "2021-12-12" pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" @@ -2520,24 +2523,24 @@ client_id: title: "Client ID" type: "string" - description: "The Client ID of your developer application" + description: "The Client ID of your Google Search Console developer\ + \ application." airbyte_secret: true client_secret: title: "Client Secret" type: "string" - description: "The client secret of your developer application" + description: "The Client Secret of your Google Search Console developer\ + \ application." airbyte_secret: true access_token: title: "Access Token" type: "string" - description: "An access token generated using the above client ID\ - \ and secret" + description: "Access Token for making authenticated requests." airbyte_secret: true refresh_token: title: "Refresh Token" type: "string" - description: "A refresh token generated using the above client ID\ - \ and secret" + description: "The token for obtaining new access token." airbyte_secret: true - type: "object" title: "Service Account Key Authentication" diff --git a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py index 1649ba58e0a6b..afc05f754b53d 100755 --- a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py +++ b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/streams.py @@ -236,6 +236,24 @@ def get_updated_state( """ With the existing nested loop implementation, we have to store a `cursor_field` for each `site_url` and `searchType`. This functionality is placed in `get_update_state`. + + { + "stream": { + "http://domain1.com": { + "web": {"date": "2022-01-03"}, + "news": {"date": "2022-01-03"}, + "image": {"date": "2022-01-03"}, + "video": {"date": "2022-01-03"} + }, + "http://domain2.com": { + "web": {"date": "2022-01-03"}, + "news": {"date": "2022-01-03"}, + "image": {"date": "2022-01-03"}, + "video": {"date": "2022-01-03"} + }, + "date": "2022-01-03", + } + } """ latest_benchmark = latest_record[self.cursor_field] @@ -243,16 +261,10 @@ def get_updated_state( site_url = latest_record.get("site_url") search_type = latest_record.get("search_type") - if current_stream_state.get(site_url, {}).get(search_type): - current_stream_state[site_url][search_type] = { - self.cursor_field: max(latest_benchmark, current_stream_state[site_url][search_type][self.cursor_field]) - } - - elif current_stream_state.get(site_url): - current_stream_state[site_url][search_type] = {self.cursor_field: latest_benchmark} - - else: - current_stream_state = {site_url: {search_type: {self.cursor_field: latest_benchmark}}} + value = current_stream_state.get(site_url, {}).get(search_type, {}).get(self.cursor_field) + if value: + latest_benchmark = max(latest_benchmark, value) + current_stream_state.setdefault(site_url, {}).setdefault(search_type, {})[self.cursor_field] = latest_benchmark # we need to get the max date over all searchTypes but the current acceptance test YAML format doesn't # support that diff --git a/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py index 9273554a9a40f..2f1c14841292c 100755 --- a/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py @@ -89,3 +89,19 @@ def test_state(current_stream_state, latest_record, expected): value = stream.get_updated_state(current_stream_state, latest_record) assert value == expected + + +def test_updated_state(): + stream = SearchAnalyticsByDate(NoAuth(), ["https://domain1.com", "https://domain2.com"], "start_date", "end_date") + + state = {} + record = {"site_url": "https://domain1.com", "search_type": "web", "date": "2022-01-01"} + state = stream.get_updated_state(state, record) + record = {"site_url": "https://domain2.com", "search_type": "web", "date": "2022-01-01"} + state = stream.get_updated_state(state, record) + + assert state == { + "https://domain1.com": {"web": {"date": "2022-01-01"}}, + "https://domain2.com": {"web": {"date": "2022-01-01"}}, + "date": "2022-01-01", + } diff --git a/docs/integrations/sources/google-search-console.md b/docs/integrations/sources/google-search-console.md index 6a0eb2da4c490..5ce46c7935bd9 100644 --- a/docs/integrations/sources/google-search-console.md +++ b/docs/integrations/sources/google-search-console.md @@ -96,7 +96,7 @@ You should now be ready to use the Google Workspace Admin Reports API connector | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| `0.1.11` | 2022-01-05 | [9186](https://github.com/airbytehq/airbyte/pull/9186) | Update titles and descriptions | +| `0.1.11` | 2022-01-05 | [9186](https://github.com/airbytehq/airbyte/pull/9186) [9194](https://github.com/airbytehq/airbyte/pull/9194) | Fix incremental sync: keep all urls in state object | | `0.1.10` | 2021-12-23 | [9073](https://github.com/airbytehq/airbyte/pull/9073) | Add slicing by date range | | `0.1.9` | 2021-12-22 | [9047](https://github.com/airbytehq/airbyte/pull/9047) | Add 'order' to spec.json props | | `0.1.8` | 2021-12-21 | [8248](https://github.com/airbytehq/airbyte/pull/8248) | Enable Sentry for performance and errors tracking | From 40db22c2b48ba5b64ce2497840c4286421606399 Mon Sep 17 00:00:00 2001 From: "Sherif A. Nada" Date: Wed, 5 Jan 2022 09:16:44 -0800 Subject: [PATCH 042/215] Add a note about the output of the engagements stream in Hubspot docs (#9316) --- docs/integrations/sources/hubspot.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index d1c6ccb7b5c2e..c85e7fdafcc79 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -13,7 +13,7 @@ Check out common troubleshooting issues for the HubSpot connector on our Discourse [here](https://discuss.airbyte.io/tags/c/connector/11/source-hubspot). -## Supported Tables +## Supported Streams This source is capable of syncing the following tables and their data: @@ -35,6 +35,18 @@ This source is capable of syncing the following tables and their data: * [Tickets](https://developers.hubspot.com/docs/api/crm/tickets) \(Incremental\) * [Workflows](https://legacydocs.hubspot.com/docs/methods/workflows/v3/get_workflows) +### A note on the `engagements` stream +Objects in the `engagements` stream can have one of the following types: `note`, `email`, `task`, `meeting`, `call`. + +Depending on the type of engagement, different properties will be set for that object in the `engagements_metadata` table in the destination. + +* A `call` engagement will have a corresponding `engagements_metadata` object with non-null values in the `toNumber`, `fromNumber`, `status`, `externalId`, `durationMilliseconds`, `externalAccountId`, `recordingUrl`, `body`, and `disposition` columns. +* An `email` engagement will have a corresponding `engagements_metadata` object with with non-null values in the `subject`, `html`, and `text` columns. In addition, there will be records in four related tables, `engagements_metadata_from`, `engagements_metadata_to`, `engagements_metadata_cc`, `engagements_metadata_bcc`. +* A `meeting` engagement will have a corresponding `engagements_metadata` object with non-null values in the `body`, `startTime`, `endTime`, and `title` columns. +* A `note` engagement will have a corresponding `engagements_metadata` object with non-null values in the `body` column. +* A `task` engagement will have a corresponding `engagements_metadata` object with non-null values in the `body`, `status`, and `forObjectType` columns. + + **Note**: HubSpot API currently only supports `quotes` endpoint using API Key, using Oauth it is impossible to access this stream (as reported by [community.hubspot.com](https://community.hubspot.com/t5/APIs-Integrations/Help-with-using-Feedback-CRM-API-and-Quotes-CRM-API/m-p/449104/highlight/true#M44411)). ## Getting Started \(Airbyte Open-Source / Airbyte Cloud\) From d791972278500a9ce470b9befaf0d5898bc445d8 Mon Sep 17 00:00:00 2001 From: Eugene Date: Wed, 5 Jan 2022 21:33:58 +0200 Subject: [PATCH 043/215] =?UTF-8?q?=F0=9F=90=9BSource-postgres:=20added=20?= =?UTF-8?q?materialized=20views=20processing=20(#9116)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [9012] Source-postgres: added materialized views processing --- .../decd338e-5647-4c0b-adf4-da0e75f5a750.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../source-postgres-strict-encrypt/Dockerfile | 2 +- .../connectors/source-postgres/Dockerfile | 2 +- .../source/postgres/PostgresSource.java | 82 +++++++++++++++---- .../sources/PostgresSourceAcceptanceTest.java | 11 +++ docs/integrations/sources/postgres.md | 1 + 7 files changed, 80 insertions(+), 22 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json index 58c40f87a119a..e97a5f07c782b 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "decd338e-5647-4c0b-adf4-da0e75f5a750", "name": "Postgres", "dockerRepository": "airbyte/source-postgres", - "dockerImageTag": "0.3.17", + "dockerImageTag": "0.4.1", "documentationUrl": "https://docs.airbyte.io/integrations/sources/postgres", "icon": "postgresql.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 1d360c7fca4de..79ff39c818bbb 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -537,7 +537,7 @@ - name: Postgres sourceDefinitionId: decd338e-5647-4c0b-adf4-da0e75f5a750 dockerRepository: airbyte/source-postgres - dockerImageTag: 0.4.0 + dockerImageTag: 0.4.1 documentationUrl: https://docs.airbyte.io/integrations/sources/postgres icon: postgresql.svg sourceType: database diff --git a/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile index 5ad1f8e4340fb..a303e5b1dd99d 100644 --- a/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres-strict-encrypt COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.5 +LABEL io.airbyte.version=0.1.6 LABEL io.airbyte.name=airbyte/source-postgres-strict-encrypt diff --git a/airbyte-integrations/connectors/source-postgres/Dockerfile b/airbyte-integrations/connectors/source-postgres/Dockerfile index 85cfddff1d742..7b76636deed89 100644 --- a/airbyte-integrations/connectors/source-postgres/Dockerfile +++ b/airbyte-integrations/connectors/source-postgres/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.4.0 +LABEL io.airbyte.version=0.4.1 LABEL io.airbyte.name=airbyte/source-postgres diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 4fcdc9a09bbb2..9cdc63b7a1801 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -117,8 +117,10 @@ public AirbyteCatalog discover(final JsonNode config) throws Exception { } @Override - public List> getCheckOperations(final JsonNode config) throws Exception { - final List> checkOperations = new ArrayList<>(super.getCheckOperations(config)); + public List> getCheckOperations(final JsonNode config) + throws Exception { + final List> checkOperations = new ArrayList<>( + super.getCheckOperations(config)); if (isCdc(config)) { checkOperations.add(database -> { @@ -129,21 +131,24 @@ public List> getCheckOperations(final J ps.setString(2, PostgresUtils.getPluginValue(config.get("replication_method"))); ps.setString(3, config.get("database").asText()); - LOGGER.info("Attempting to find the named replication slot using the query: " + ps.toString()); + LOGGER.info( + "Attempting to find the named replication slot using the query: " + ps.toString()); return ps; }, sourceOperations::rowToJson).collect(toList()); if (matchingSlots.size() != 1) { - throw new RuntimeException("Expected exactly one replication slot but found " + matchingSlots.size() - + ". Please read the docs and add a replication slot to your database."); + throw new RuntimeException( + "Expected exactly one replication slot but found " + matchingSlots.size() + + ". Please read the docs and add a replication slot to your database."); } }); checkOperations.add(database -> { final List matchingPublications = database.query(connection -> { - final PreparedStatement ps = connection.prepareStatement("SELECT * FROM pg_publication WHERE pubname = ?"); + final PreparedStatement ps = connection + .prepareStatement("SELECT * FROM pg_publication WHERE pubname = ?"); ps.setString(1, config.get("replication_method").get("publication").asText()); LOGGER.info("Attempting to find the publication using the query: " + ps.toString()); @@ -152,8 +157,9 @@ public List> getCheckOperations(final J }, sourceOperations::rowToJson).collect(toList()); if (matchingPublications.size() != 1) { - throw new RuntimeException("Expected exactly one publication but found " + matchingPublications.size() - + ". Please read the docs and add a publication to your database."); + throw new RuntimeException( + "Expected exactly one publication but found " + matchingPublications.size() + + ". Please read the docs and add a publication to your database."); } }); @@ -163,7 +169,9 @@ public List> getCheckOperations(final J } @Override - public AutoCloseableIterator read(final JsonNode config, final ConfiguredAirbyteCatalog catalog, final JsonNode state) + public AutoCloseableIterator read(final JsonNode config, + final ConfiguredAirbyteCatalog catalog, + final JsonNode state) throws Exception { // this check is used to ensure that have the pgoutput slot available so Debezium won't attempt to // create it. @@ -177,7 +185,8 @@ public AutoCloseableIterator read(final JsonNode config, final C } @Override - public List> getIncrementalIterators(final JdbcDatabase database, + public List> getIncrementalIterators( + final JdbcDatabase database, final ConfiguredAirbyteCatalog catalog, final Map>> tableNameToTable, final StateManager stateManager, @@ -192,10 +201,13 @@ public List> getIncrementalIterators(final */ final JsonNode sourceConfig = database.getSourceConfig(); if (isCdc(sourceConfig)) { - final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler(sourceConfig, PostgresCdcTargetPosition.targetPosition(database), + final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler(sourceConfig, + PostgresCdcTargetPosition.targetPosition(database), PostgresCdcProperties.getDebeziumProperties(sourceConfig), catalog, false); - return handler.getIncrementalIterators(new PostgresCdcSavedInfoFetcher(stateManager.getCdcStateManager().getCdcState()), - new PostgresCdcStateHandler(stateManager), new PostgresCdcConnectorMetadataInjector(), emittedAt); + return handler.getIncrementalIterators( + new PostgresCdcSavedInfoFetcher(stateManager.getCdcStateManager().getCdcState()), + new PostgresCdcStateHandler(stateManager), new PostgresCdcConnectorMetadataInjector(), + emittedAt); } else { return super.getIncrementalIterators(database, catalog, tableNameToTable, stateManager, emittedAt); @@ -228,13 +240,47 @@ private static AirbyteStream removeIncrementalWithoutPk(final AirbyteStream stre } @Override - public Set getPrivilegesTableForCurrentUser(final JdbcDatabase database, final String schema) throws SQLException { + public Set getPrivilegesTableForCurrentUser(final JdbcDatabase database, + final String schema) + throws SQLException { return database.query(connection -> { final PreparedStatement ps = connection.prepareStatement( - "SELECT DISTINCT table_catalog, table_schema, table_name, privilege_type\n" - + "FROM information_schema.table_privileges\n" - + "WHERE grantee = ? AND privilege_type = 'SELECT'"); - ps.setString(1, database.getDatabaseConfig().get("username").asText()); + """ + SELECT DISTINCT table_catalog, + table_schema, + table_name, + privilege_type + FROM information_schema.table_privileges + WHERE grantee = ? + AND privilege_type = 'SELECT' + UNION ALL + SELECT r.rolname AS table_catalog, + n.nspname AS table_schema, + c.relname AS table_name, + -- the initial query is supposed to get a SELECT type. Since we use a UNION query + -- to get Views that we can read (i.e. select) - then lets fill this columns with SELECT + -- value to keep the backward-compatibility + COALESCE ('SELECT') AS privilege_type + FROM pg_class c + JOIN pg_namespace n + ON n.oid = relnamespace + JOIN pg_roles r + ON r.oid = relowner, + Unnest(COALESCE(relacl::text[], Format('{%s=arwdDxt/%s}', rolname, rolname)::text[])) acl, + Regexp_split_to_array(acl, '=|/') s + WHERE r.rolname = ? + AND nspname = 'public' + -- 'm' means Materialized View + AND c.relkind = 'm' + AND ( + -- all grants + c.relacl IS NULL + -- read grant + OR s[2] = 'r'); + """); + final String username = database.getDatabaseConfig().get("username").asText(); + ps.setString(1, username); + ps.setString(2, username); return ps; }, sourceOperations::rowToJson) .collect(toSet()) diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java index c5a3709631f8e..9b8b0a634324e 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java @@ -31,6 +31,7 @@ public class PostgresSourceAcceptanceTest extends SourceAcceptanceTest { private static final String STREAM_NAME = "public.id_and_name"; private static final String STREAM_NAME2 = "public.starships"; + private static final String STREAM_NAME_MATERIALIZED_VIEW = "public.testview"; private PostgreSQLContainer container; private JsonNode config; @@ -67,6 +68,7 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); ctx.fetch("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); + ctx.fetch("CREATE MATERIALIZED VIEW testview AS select * from id_and_name where id = '2';"); return null; }); @@ -113,6 +115,15 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { STREAM_NAME2, Field.of("id", JsonSchemaPrimitive.NUMBER), Field.of("name", JsonSchemaPrimitive.STRING)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))), + new ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME_MATERIALIZED_VIEW, + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))))); } diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index 3695a3316c1d7..84350062df93e 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -257,6 +257,7 @@ According to Postgres [documentation](https://www.postgresql.org/docs/14/datatyp | Version | Date | Pull Request | Subject | |:--------|:-----------|:-------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------| +| 0.4.1 | 2022-01-05 | [9116](https://github.com/airbytehq/airbyte/pull/9116) | Added materialized views processing | | 0.4.0 | 2021-12-13 | [8726](https://github.com/airbytehq/airbyte/pull/8726) | Support all Postgres types | | 0.3.17 | 2021-12-01 | [8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | | 0.3.16 | 2021-11-28 | [7995](https://github.com/airbytehq/airbyte/pull/7995) | Fixed money type with amount > 1000 | From 37c4a752fa71fbb048d79005c78414056c3d9d46 Mon Sep 17 00:00:00 2001 From: Baz Date: Wed, 5 Jan 2022 21:59:26 +0200 Subject: [PATCH 044/215] =?UTF-8?q?=F0=9F=90=9B=20Source=20Mixpanel:=20bum?= =?UTF-8?q?ped=20version,=20hot-fix.=20(#9320)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- airbyte-integrations/connectors/source-mixpanel/Dockerfile | 2 +- docs/integrations/sources/mixpanel.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 79ff39c818bbb..0fb24d6aab430 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -412,7 +412,7 @@ - name: Mixpanel sourceDefinitionId: 12928b32-bf0a-4f1e-964f-07e12e37153a dockerRepository: airbyte/source-mixpanel - dockerImageTag: 0.1.8.1 + dockerImageTag: 0.1.9 documentationUrl: https://docs.airbyte.io/integrations/sources/mixpanel icon: mixpanel.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 06ea8ac2abcbb..20589a4c7d2e9 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -4202,7 +4202,7 @@ path_in_connector_config: - "credentials" - "client_secret" -- dockerImage: "airbyte/source-mixpanel:0.1.8.1" +- dockerImage: "airbyte/source-mixpanel:0.1.9" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/mixpanel" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-mixpanel/Dockerfile b/airbyte-integrations/connectors/source-mixpanel/Dockerfile index 30301ba1aa407..e82df2210a78f 100644 --- a/airbyte-integrations/connectors/source-mixpanel/Dockerfile +++ b/airbyte-integrations/connectors/source-mixpanel/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.8.1 +LABEL io.airbyte.version=0.1.9 LABEL io.airbyte.name=airbyte/source-mixpanel diff --git a/docs/integrations/sources/mixpanel.md b/docs/integrations/sources/mixpanel.md index c9ddb12c27772..6d5cf7d4ad7ba 100644 --- a/docs/integrations/sources/mixpanel.md +++ b/docs/integrations/sources/mixpanel.md @@ -59,7 +59,7 @@ Select the correct region \(EU or US\) for your Mixpanel project. See detail [he | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| `0.1.8.1` | 2021-12-07 | [8429](https://github.com/airbytehq/airbyte/pull/8578) | Updated titles and descriptions | +| `0.1.9` | 2021-12-07 | [8429](https://github.com/airbytehq/airbyte/pull/8578) | Updated titles and descriptions | | `0.1.7` | 2021-12-01 | [8381](https://github.com/airbytehq/airbyte/pull/8381) | Increased performance for `discovery` stage during connector setup | | `0.1.6` | 2021-11-25 | [8256](https://github.com/airbytehq/airbyte/issues/8256) | Deleted `date_window_size` and fix schemas date type issue | | `0.1.5` | 2021-11-10 | [7451](https://github.com/airbytehq/airbyte/issues/7451) | Support `start_date` older than 1 year | From 34c9366f2b15288948824730c2c515d48d182601 Mon Sep 17 00:00:00 2001 From: Augustin Date: Wed, 5 Jan 2022 21:19:40 +0100 Subject: [PATCH 045/215] =?UTF-8?q?=F0=9F=90=99=20=20Bootstrap=20octavia-c?= =?UTF-8?q?li=20project=20(#9070)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/gradle.yml | 70 +++++++++++++++++++++++ .gitignore | 1 + octavia-cli/.coveragerc | 3 + octavia-cli/.dockerignore | 3 + octavia-cli/.gitignore | 2 + octavia-cli/.python-version | 1 + octavia-cli/CHANGELOG.md | 0 octavia-cli/Dockerfile | 17 ++++++ octavia-cli/LICENSE | 21 +++++++ octavia-cli/README.md | 55 ++++++++++++++++++ octavia-cli/build.gradle | 9 +++ octavia-cli/octavia_cli/__init__.py | 0 octavia-cli/octavia_cli/entrypoint.py | 42 ++++++++++++++ octavia-cli/pytest.ini | 5 ++ octavia-cli/setup.py | 53 +++++++++++++++++ octavia-cli/unit_tests/test_entrypoint.py | 25 ++++++++ settings.gradle | 22 ++++--- 17 files changed, 320 insertions(+), 9 deletions(-) create mode 100644 octavia-cli/.coveragerc create mode 100644 octavia-cli/.dockerignore create mode 100644 octavia-cli/.gitignore create mode 100644 octavia-cli/.python-version create mode 100644 octavia-cli/CHANGELOG.md create mode 100644 octavia-cli/Dockerfile create mode 100644 octavia-cli/LICENSE create mode 100644 octavia-cli/README.md create mode 100644 octavia-cli/build.gradle create mode 100644 octavia-cli/octavia_cli/__init__.py create mode 100644 octavia-cli/octavia_cli/entrypoint.py create mode 100644 octavia-cli/pytest.ini create mode 100644 octavia-cli/setup.py create mode 100644 octavia-cli/unit_tests/test_entrypoint.py diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index 836b89f758a10..adc1ad47b14d9 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -275,6 +275,76 @@ jobs: SLACK_TITLE: "Build failure" SLACK_FOOTER: "" + - name: Slack Notification - Success + if: success() && github.ref == 'refs/heads/master' + uses: rtCamp/action-slack-notify@master + env: + SLACK_WEBHOOK: ${{ secrets.BUILD_SLACK_WEBHOOK }} + SLACK_USERNAME: Buildbot + SLACK_TITLE: "Build Success" + SLACK_FOOTER: "" + octavia-cli-build: + runs-on: ubuntu-latest + name: "Octavia CLI: Build" + timeout-minutes: 90 + steps: + - name: Checkout Airbyte + uses: actions/checkout@v2 + + # this intentionally does not use restore-keys so we don't mess with gradle caching + - name: Gradle Caching + uses: actions/cache@v2 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + **/.venv + key: ${{ secrets.CACHE_VERSION }}-${{ runner.os }}-${{ hashFiles('**/*.gradle*') }}-${{ hashFiles('**/package-lock.json') }} + + - uses: actions/setup-java@v1 + with: + java-version: "17" + + # octavia-cli install and testing requires Python. + # We use 3.8 in this project because 3.7 is not supported on Apple M1. + - uses: actions/setup-python@v2 + with: + python-version: "3.8" + + - name: Set up CI Gradle Properties + run: | + mkdir -p ~/.gradle/ + cat > ~/.gradle/gradle.properties <> ~/.zshrc +source ~/.zshrc +octavia +```` + +# Current development status +Octavia is currently under development. +You can find a detailed and updated execution plan [here](https://docs.google.com/spreadsheets/d/1weB9nf0Zx3IR_QvpkxtjBAzyfGb7B0PWpsVt6iMB5Us/edit#gid=0). +We welcome community contributions! + +Summary of achievements: + +| Date | Milestone | +|------------|-------------------------------------| +| 2021-12-22 | Bootstrapping the project's code base | + +# Developing locally +1. Install Python 3.10.0. We suggest doing it through `pyenv` +2. Create a virtualenv: `python -m venv .venv` +3. Activate the virtualenv: `source .venv/bin/activate` +4. Install dev dependencies: `pip install -e .\[dev\]` +5. Install `pre-commit` hooks: `pre-commit install` +6. Run the test suite: `pytest --cov=octavia_cli unit_tests` +7. Iterate; please check the [Contributing](#contributing) for instructions on contributing. + +# Contributing +1. Please sign up to [Airbyte's Slack workspace](https://slack.airbyte.io/) and join the `#octavia-cli`. We'll sync up community efforts in this channel. +2. Read the [execution plan](https://docs.google.com/spreadsheets/d/1weB9nf0Zx3IR_QvpkxtjBAzyfGb7B0PWpsVt6iMB5Us/edit#gid=0) and find a task you'd like to work on. +3. Open a PR, make sure to test your code thoroughly. \ No newline at end of file diff --git a/octavia-cli/build.gradle b/octavia-cli/build.gradle new file mode 100644 index 0000000000000..5ebd064d689b0 --- /dev/null +++ b/octavia-cli/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' +} + +airbytePython { + moduleDirectory 'octavia_cli' +} + diff --git a/octavia-cli/octavia_cli/__init__.py b/octavia-cli/octavia_cli/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/octavia-cli/octavia_cli/entrypoint.py b/octavia-cli/octavia_cli/entrypoint.py new file mode 100644 index 0000000000000..3d82bc32f5ea3 --- /dev/null +++ b/octavia-cli/octavia_cli/entrypoint.py @@ -0,0 +1,42 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import click + + +@click.group() +@click.option("--airbyte-url", envvar="AIRBYTE_URL", default="http://localhost:8000", help="The URL of your Airbyte instance.") +def octavia(airbyte_url): + # TODO: check if the airbyte_url is reachable + click.secho(f"🐙 - Octavia is targetting your Airbyte instance running at {airbyte_url}") + + +@octavia.command(help="Scaffolds a local project directories.") +def init(): + raise click.ClickException("The init command is not yet implemented.") + + +@octavia.command(name="list", help="List existing resources on the Airbyte instance.") +def _list(): + raise click.ClickException("The init command is not yet implemented.") + + +@octavia.command(name="import", help="Import an existing resources from the Airbyte instance.") +def _import(): + raise click.ClickException("The init command is not yet implemented.") + + +@octavia.command(help="Generate a YAML configuration file to manage a resource.") +def create(): + raise click.ClickException("The init command is not yet implemented.") + + +@octavia.command(help="Create or update resources according to YAML configurations.") +def apply(): + raise click.ClickException("The init command is not yet implemented.") + + +@octavia.command(help="Delete resources") +def delete(): + raise click.ClickException("The init command is not yet implemented.") diff --git a/octavia-cli/pytest.ini b/octavia-cli/pytest.ini new file mode 100644 index 0000000000000..f5276a231cadd --- /dev/null +++ b/octavia-cli/pytest.ini @@ -0,0 +1,5 @@ +[pytest] +log_cli = 1 +log_cli_level = INFO +log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s) +log_cli_date_format=%Y-%m-%d %H:%M:%S diff --git a/octavia-cli/setup.py b/octavia-cli/setup.py new file mode 100644 index 0000000000000..93534a6e851a6 --- /dev/null +++ b/octavia-cli/setup.py @@ -0,0 +1,53 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import pathlib + +from setuptools import find_packages, setup + +# The directory containing this file +HERE = pathlib.Path(__file__).parent + +# The text of the README file +README = (HERE / "README.md").read_text() + +setup( + name="octavia-cli", + version="0.1.0", + description="A command line interface to manage Airbyte configurations", + long_description=README, + author="Airbyte", + author_email="contact@airbyte.io", + license="MIT", + url="https://github.com/airbytehq/airbyte", + classifiers=[ + # This information is used when browsing on PyPi. + # Dev Status + "Development Status :: 3 - Alpha", + # Project Audience + "Intended Audience :: Developers", + "Topic :: Scientific/Engineering", + "Topic :: Software Development :: Libraries :: Python Modules", + "License :: OSI Approved :: MIT License", + # Python Version Support + "Programming Language :: Python :: 3.10", + ], + keywords="airbyte cli command-line-interface configuration", + project_urls={ + "Documentation": "https://docs.airbyte.io/", + "Source": "https://github.com/airbytehq/airbyte", + "Tracker": "https://github.com/airbytehq/airbyte/issues", + }, + packages=find_packages(exclude=("tests", "docs")), + install_requires=["click~=8.0.3"], + python_requires=">=3.8.12", + extras_require={ + "dev": ["MyPy~=0.812", "pytest~=6.2.5", "pytest-cov", "pytest-mock", "requests-mock", "pre-commit"], + "sphinx-docs": [ + "Sphinx~=4.2", + "sphinx-rtd-theme~=1.0", + ], + }, + entry_points={"console_scripts": ["octavia=octavia_cli.entrypoint:octavia"]}, +) diff --git a/octavia-cli/unit_tests/test_entrypoint.py b/octavia-cli/unit_tests/test_entrypoint.py new file mode 100644 index 0000000000000..c8effc674c65f --- /dev/null +++ b/octavia-cli/unit_tests/test_entrypoint.py @@ -0,0 +1,25 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import pytest +from click.testing import CliRunner +from octavia_cli import entrypoint + + +def test_octavia(): + runner = CliRunner() + result = runner.invoke(entrypoint.octavia) + assert result.exit_code == 0 + assert result.output.startswith("Usage: octavia [OPTIONS] COMMAND [ARGS]...") + + +@pytest.mark.parametrize( + "command", + [entrypoint.init, entrypoint.apply, entrypoint.create, entrypoint.delete, entrypoint._list, entrypoint._import], +) +def test_not_implemented_commands(command): + runner = CliRunner() + result = runner.invoke(command) + assert result.exit_code == 1 + assert result.output.endswith("not yet implemented.\n") diff --git a/settings.gradle b/settings.gradle index b50a3f1933dea..779ee999742f8 100644 --- a/settings.gradle +++ b/settings.gradle @@ -19,27 +19,30 @@ sourceControl { rootProject.name = 'airbyte' -// SUB_BUILD is an enum of , PLATFORM, CONNECTORS_BASE. Blank is equivalent to all. +// SUB_BUILD is an enum of , PLATFORM, CONNECTORS_BASE, OCTAVIA_CLI. Blank is equivalent to all. if (!System.getenv().containsKey("SUB_BUILD")) { println("Building all of Airbyte.") } else { def subBuild = System.getenv().get("SUB_BUILD") println("Building Airbyte Sub Build: " + subBuild) - if (subBuild != "PLATFORM" && subBuild != "CONNECTORS_BASE") { - throw new IllegalArgumentException(String.format("%s is invalid. Must be unset or PLATFORM or CONNECTORS_BASE", subBuild)) + if (subBuild != "PLATFORM" && subBuild != "CONNECTORS_BASE" && subBuild != "OCTAVIA_CLI") { + throw new IllegalArgumentException(String.format("%s is invalid. Must be unset or PLATFORM or CONNECTORS_BASE or OCTAVIA_CLI", subBuild)) } } // shared -include ':airbyte-api' include ':airbyte-commons' -include ':airbyte-commons-docker' + +// shared by CONNECTORS_BASE and PLATFORM sub builds +include ':airbyte-api' include ':airbyte-commons-cli' +include ':airbyte-commons-docker' include ':airbyte-config:models' // reused by acceptance tests in connector base. include ':airbyte-db:lib' // reused by acceptance tests in connector base. include ':airbyte-json-validation' -include ':airbyte-protocol:models' include ':airbyte-metrics' +include ':airbyte-oauth' +include ':airbyte-protocol:models' include ':airbyte-queue' include ':airbyte-test-utils' @@ -60,7 +63,6 @@ if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD" include ':airbyte-config:specs' include ':airbyte-container-orchestrator' include ':airbyte-webapp-e2e-tests' - include ':airbyte-oauth' include ':airbyte-scheduler:app' include ':airbyte-scheduler:client' include ':airbyte-server' @@ -86,8 +88,6 @@ if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD" include ':airbyte-integrations:connector-templates:generator' include ':airbyte-integrations:bases:debezium' - include ':airbyte-oauth' - // Needed by normalization integration tests include ':airbyte-integrations:connectors:destination-bigquery' include ':airbyte-integrations:connectors:destination-jdbc' @@ -106,6 +106,10 @@ if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD" include ':tools:code-generator' } +if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "OCTAVIA_CLI") { + include ':octavia-cli' +} + // connectors if (!System.getenv().containsKey("SUB_BUILD")) { // include all connector projects From b01eeb297d7085a1ef252c34ea48ad9fd6e4d09e Mon Sep 17 00:00:00 2001 From: Mohamed Magdy Date: Thu, 6 Jan 2022 01:17:27 +0100 Subject: [PATCH 046/215] Update `bootloader` image to `0.35.3-alpha` in HelmCharts (#9321) --- charts/airbyte/README.md | 2 +- charts/airbyte/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index fcf8a050d5315..58fe3b6b6a0ab 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -148,7 +148,7 @@ | ----------------------------- | -------------------------------------------------------------------- | -------------------- | | `bootloader.image.repository` | The repository to use for the airbyte bootloader image. | `airbyte/bootloader` | | `bootloader.image.pullPolicy` | the pull policy to use for the airbyte bootloader image | `IfNotPresent` | -| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.34.0-alpha` | +| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.3-alpha` | ### Temporal parameters diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 07d59b1d2c73e..31d9bc0c1a5cf 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -446,7 +446,7 @@ bootloader: image: repository: airbyte/bootloader pullPolicy: IfNotPresent - tag: 0.34.0-alpha + tag: 0.35.3-alpha ## @section Temporal parameters ## TODO: Move to consuming temporal from a dedicated helm chart From 4e91b989f6efeaa75306a86342cdf04d02ce4805 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Wed, 5 Jan 2022 21:46:38 -0800 Subject: [PATCH 047/215] fix source specs build error yet again (#9330) --- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 20589a4c7d2e9..b373fb83bbe9a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -5501,7 +5501,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-postgres:0.4.0" +- dockerImage: "airbyte/source-postgres:0.4.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/postgres" connectionSpecification: From 954929a4275b89ab1862f23c4509324a711342f5 Mon Sep 17 00:00:00 2001 From: "oleh.zorenko" <19872253+Zirochkaa@users.noreply.github.com> Date: Thu, 6 Jan 2022 10:56:40 +0200 Subject: [PATCH 048/215] Source Delighted: add incremental sync mode to streams in `integration_tests/configured_catalog.json` (#9333) * Add incremental sync mode to streams in `integration_tests/configured_catalog.json` * Bump connector's version --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-delighted/Dockerfile | 2 +- .../integration_tests/configured_catalog.json | 48 ++++++++++++------- docs/integrations/sources/delighted.md | 1 + 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 0fb24d6aab430..dadadf52fc82d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -147,7 +147,7 @@ - name: Delighted sourceDefinitionId: cc88c43f-6f53-4e8a-8c4d-b284baaf9635 dockerRepository: airbyte/source-delighted - dockerImageTag: 0.1.1 + dockerImageTag: 0.1.2 documentationUrl: https://docs.airbyte.io/integrations/sources/delighted icon: delighted.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index b373fb83bbe9a..44629eb404bec 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1280,7 +1280,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-delighted:0.1.1" +- dockerImage: "airbyte/source-delighted:0.1.2" spec: documentationUrl: "https://docsurl.com" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-delighted/Dockerfile b/airbyte-integrations/connectors/source-delighted/Dockerfile index 1e1396f91cea8..41c613ad0ff75 100644 --- a/airbyte-integrations/connectors/source-delighted/Dockerfile +++ b/airbyte-integrations/connectors/source-delighted/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/source-delighted diff --git a/airbyte-integrations/connectors/source-delighted/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-delighted/integration_tests/configured_catalog.json index 85a9f5051bdeb..95d7427591401 100644 --- a/airbyte-integrations/connectors/source-delighted/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-delighted/integration_tests/configured_catalog.json @@ -2,43 +2,55 @@ "streams": [ { "stream": { - "name": "people", + "name": "bounces", "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_primary_key": [["id"]] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["bounced_at"], + "source_defined_primary_key": [["person_id"]] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "destination_sync_mode": "append", + "cursor_field": ["bounced_at"] }, { "stream": { - "name": "unsubscribes", + "name": "people", "json_schema": {}, - "supported_sync_modes": ["full_refresh"], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["created_at"], "source_defined_primary_key": [["id"]] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "destination_sync_mode": "append", + "cursor_field": ["created_at"] }, { "stream": { - "name": "bounces", + "name": "survey_responses", "json_schema": {}, - "supported_sync_modes": ["full_refresh"], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updated_at"], "source_defined_primary_key": [["id"]] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "destination_sync_mode": "append", + "cursor_field": ["updated_at"] }, { "stream": { - "name": "survey_responses", + "name": "unsubscribes", "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_primary_key": [["id"]] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["unsubscribed_at"], + "source_defined_primary_key": [["person_id"]] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "destination_sync_mode": "append", + "cursor_field": ["unsubscribed_at"] } ] } diff --git a/docs/integrations/sources/delighted.md b/docs/integrations/sources/delighted.md index 9402469ee0d7b..89a6db716ed5c 100644 --- a/docs/integrations/sources/delighted.md +++ b/docs/integrations/sources/delighted.md @@ -37,5 +37,6 @@ This connector supports `API PASSWORD` as the authentication method. | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.2 | 2022-01-06 | [9333](https://github.com/airbytehq/airbyte/pull/9333) | Add incremental sync mode to streams in `integration_tests/configured_catalog.json` | | 0.1.1 | 2022-01-04 | [9275](https://github.com/airbytehq/airbyte/pull/9275) | Fix pagination handling for `survey_responses`, `bounces` and `unsubscribes` streams | | 0.1.0 | 2021-10-27 | [4551](https://github.com/airbytehq/airbyte/pull/4551) | Add Delighted source connector | From 5ffc880c3a6ca4bc687694aac6990fa3ea73eaa6 Mon Sep 17 00:00:00 2001 From: Artem Astapenko <3767150+Jamakase@users.noreply.github.com> Date: Thu, 6 Jan 2022 13:17:34 +0300 Subject: [PATCH 049/215] Some updated for e2e tests (#9091) --- airbyte-webapp-e2e-tests/cypress/global.d.ts | 11 +++++++++ .../cypress/support/commands/common.js | 6 +++++ .../support/commands/{index.js => index.ts} | 1 + .../cypress/support/commands/sidebar.ts | 3 +++ airbyte-webapp-e2e-tests/package-lock.json | 22 +++++++++++++++++- airbyte-webapp-e2e-tests/package.json | 1 + airbyte-webapp-e2e-tests/tsconfig.json | 23 +++++++++++++++++++ .../src/components/SideMenu/SideMenu.tsx | 4 ++-- .../SideMenu/components/MenuItem.tsx | 12 +++------- .../views/settings/CloudSettingsPage.tsx | 1 + 10 files changed, 72 insertions(+), 12 deletions(-) create mode 100644 airbyte-webapp-e2e-tests/cypress/global.d.ts rename airbyte-webapp-e2e-tests/cypress/support/commands/{index.js => index.ts} (80%) create mode 100644 airbyte-webapp-e2e-tests/cypress/support/commands/sidebar.ts create mode 100644 airbyte-webapp-e2e-tests/tsconfig.json diff --git a/airbyte-webapp-e2e-tests/cypress/global.d.ts b/airbyte-webapp-e2e-tests/cypress/global.d.ts new file mode 100644 index 0000000000000..57c2e680c1551 --- /dev/null +++ b/airbyte-webapp-e2e-tests/cypress/global.d.ts @@ -0,0 +1,11 @@ +declare global { + namespace Cypress { + interface Chainable { + clearApp(): Chainable; + + // sidebar + + openSettings(): Chainable; + } + } +} diff --git a/airbyte-webapp-e2e-tests/cypress/support/commands/common.js b/airbyte-webapp-e2e-tests/cypress/support/commands/common.js index a05dee4a53937..c305c6f4c4445 100644 --- a/airbyte-webapp-e2e-tests/cypress/support/commands/common.js +++ b/airbyte-webapp-e2e-tests/cypress/support/commands/common.js @@ -55,3 +55,9 @@ Cypress.Commands.add("deleteEntity", () => { cy.get("button[data-id='open-delete-modal']").click(); cy.get("button[data-id='delete']").click(); }) + +Cypress.Commands.add("clearApp", () => { + indexedDB.deleteDatabase("firebaseLocalStorageDb"); + cy.clearLocalStorage(); + cy.clearCookies(); +}); diff --git a/airbyte-webapp-e2e-tests/cypress/support/commands/index.js b/airbyte-webapp-e2e-tests/cypress/support/commands/index.ts similarity index 80% rename from airbyte-webapp-e2e-tests/cypress/support/commands/index.js rename to airbyte-webapp-e2e-tests/cypress/support/commands/index.ts index 22bd544cc480c..3ffc02becb89b 100644 --- a/airbyte-webapp-e2e-tests/cypress/support/commands/index.js +++ b/airbyte-webapp-e2e-tests/cypress/support/commands/index.ts @@ -1,4 +1,5 @@ import "./common"; +import "./sidebar"; import "./source"; import "./destination"; import "./connection"; diff --git a/airbyte-webapp-e2e-tests/cypress/support/commands/sidebar.ts b/airbyte-webapp-e2e-tests/cypress/support/commands/sidebar.ts new file mode 100644 index 0000000000000..39ddd9a8cc547 --- /dev/null +++ b/airbyte-webapp-e2e-tests/cypress/support/commands/sidebar.ts @@ -0,0 +1,3 @@ +Cypress.Commands.add("openSettings", () => { + cy.get("nav a[href*='settings']").click(); +}); diff --git a/airbyte-webapp-e2e-tests/package-lock.json b/airbyte-webapp-e2e-tests/package-lock.json index 9829809eaf757..a945bc04aac91 100644 --- a/airbyte-webapp-e2e-tests/package-lock.json +++ b/airbyte-webapp-e2e-tests/package-lock.json @@ -9,7 +9,8 @@ "version": "0.0.0", "devDependencies": { "cypress": "^9.2.0", - "eslint-plugin-cypress": "^2.12.1" + "eslint-plugin-cypress": "^2.12.1", + "typescript": "^4.5.4" } }, "node_modules/@cypress/request": { @@ -2344,6 +2345,19 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/typescript": { + "version": "4.5.4", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.5.4.tgz", + "integrity": "sha512-VgYs2A2QIRuGphtzFV7aQJduJ2gyfTljngLzjpfW9FoYZF6xuw1W0vW9ghCKLfcWrCFxK81CSGRAvS1pn4fIUg==", + "dev": true, + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=4.2.0" + } + }, "node_modules/universalify": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.0.tgz", @@ -4281,6 +4295,12 @@ "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", "dev": true }, + "typescript": { + "version": "4.5.4", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.5.4.tgz", + "integrity": "sha512-VgYs2A2QIRuGphtzFV7aQJduJ2gyfTljngLzjpfW9FoYZF6xuw1W0vW9ghCKLfcWrCFxK81CSGRAvS1pn4fIUg==", + "dev": true + }, "universalify": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.0.tgz", diff --git a/airbyte-webapp-e2e-tests/package.json b/airbyte-webapp-e2e-tests/package.json index 432b0287607c7..25eff6858bffa 100644 --- a/airbyte-webapp-e2e-tests/package.json +++ b/airbyte-webapp-e2e-tests/package.json @@ -20,6 +20,7 @@ }, "devDependencies": { "cypress": "^9.2.0", + "typescript": "^4.5.4", "eslint-plugin-cypress": "^2.12.1" } } diff --git a/airbyte-webapp-e2e-tests/tsconfig.json b/airbyte-webapp-e2e-tests/tsconfig.json new file mode 100644 index 0000000000000..49720291df092 --- /dev/null +++ b/airbyte-webapp-e2e-tests/tsconfig.json @@ -0,0 +1,23 @@ +{ + "include": ["./**/*.ts"], + "exclude": [], + "compilerOptions": { + "baseUrl": "src", + "target": "es5", + "skipLibCheck": true, + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "strict": true, + "forceConsistentCasingInFileNames": true, + "module": "esnext", + "moduleResolution": "node", + "resolveJsonModule": true, + "jsx": "react-jsx", + "noFallthroughCasesInSwitch": true, + "types": ["cypress"], + "lib": ["es2015", "dom"], + "isolatedModules": false, + "allowJs": true, + "noEmit": true + } +} diff --git a/airbyte-webapp/src/components/SideMenu/SideMenu.tsx b/airbyte-webapp/src/components/SideMenu/SideMenu.tsx index a5e84f9592e3b..fef8a8f939e74 100644 --- a/airbyte-webapp/src/components/SideMenu/SideMenu.tsx +++ b/airbyte-webapp/src/components/SideMenu/SideMenu.tsx @@ -8,7 +8,7 @@ export type SideMenuItem = { name: string | React.ReactNode; indicatorCount?: number; component: React.ComponentType; - testId?: string; + id?: string; }; export type CategoryItem = { @@ -49,7 +49,7 @@ const SideMenu: React.FC = ({ data, onSelect, activeItem }) => { )} {categoryItem.routes.map((route) => ( void; }; @@ -40,15 +40,9 @@ const Counter = styled.div` margin-left: 5px; `; -const MenuItem: React.FC = ({ - count, - isActive, - name, - testId, - onClick, -}) => { +const MenuItem: React.FC = ({ count, isActive, name, id, onClick }) => { return ( - + {name} {count ? {count} : null} diff --git a/airbyte-webapp/src/packages/cloud/views/settings/CloudSettingsPage.tsx b/airbyte-webapp/src/packages/cloud/views/settings/CloudSettingsPage.tsx index aa4665e5e1d9e..57ef1e568cfe7 100644 --- a/airbyte-webapp/src/packages/cloud/views/settings/CloudSettingsPage.tsx +++ b/airbyte-webapp/src/packages/cloud/views/settings/CloudSettingsPage.tsx @@ -72,6 +72,7 @@ export const CloudSettingsPage: React.FC = () => { path: CloudSettingsRoutes.AccessManagement, name: , component: UsersSettingsView, + id: "workspaceSettings.accessManagementSettings", }, { path: CloudSettingsRoutes.Notifications, From 7727b8635959b6581dd273759cfe62b578ba94c9 Mon Sep 17 00:00:00 2001 From: Christophe Duong Date: Thu, 6 Jan 2022 18:03:06 +0100 Subject: [PATCH 050/215] Include tcp port in Clickhouse destination configuration for normalization (#9340) * Include tcp port in Clickhouse destination configuration for normalization * revert custom dbt on clickhouse * Bumpversion of destination connector * Format code --- .../seed/destination_definitions.yaml | 2 +- .../resources/seed/destination_specs.yaml | 20 ++- .../transform_config/transform.py | 2 + ...ltBigQueryDenormalizedRecordFormatter.java | 2 +- .../destination-clickhouse/Dockerfile | 2 +- .../src/main/resources/spec.json | 18 ++- .../source_amazon_seller_partner/streams.py | 2 +- .../source-github/unit_tests/test_stream.py | 1 + .../source-jira/source_jira/source.py | 2 +- .../source_recurly/schemas/invoices.json | 115 +++--------------- .../NormalizationRunnerFactory.java | 5 +- 11 files changed, 58 insertions(+), 113 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index c03cc2c715d3f..100521ffbc212 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -37,7 +37,7 @@ - name: Clickhouse destinationDefinitionId: ce0d828e-1dc4-496c-b122-2da42e637e48 dockerRepository: airbyte/destination-clickhouse - dockerImageTag: 0.1.1 + dockerImageTag: 0.1.2 documentationUrl: https://docs.airbyte.io/integrations/destinations/clickhouse - name: DynamoDB destinationDefinitionId: 8ccd8909-4e99-4141-b48d-4984b70b2d89 diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 03dc563b6f39d..266fefa1716d8 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -646,7 +646,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-clickhouse:0.1.1" +- dockerImage: "airbyte/destination-clickhouse:0.1.2" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/clickhouse" connectionSpecification: @@ -675,28 +675,38 @@ examples: - "8123" order: 1 + tcp-port: + title: "Native Port" + description: "Native port (not the JDBC) of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 9000 + examples: + - "9000" + order: 2 database: title: "DB Name" description: "Name of the database." type: "string" - order: 2 + order: 3 username: title: "User" description: "Username to use to access the database." type: "string" - order: 3 + order: 4 password: title: "Password" description: "Password associated with the username." type: "string" airbyte_secret: true - order: 4 + order: 5 ssl: title: "SSL Connection" description: "Encrypt data using SSL." type: "boolean" default: false - order: 5 + order: 6 tunnel_method: type: "object" title: "SSH Tunnel Method" diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py b/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py index d45cbc0623fb0..4ed99e1c29b51 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py @@ -277,6 +277,8 @@ def transform_clickhouse(config: Dict[str, Any]): "user": config["username"], "password": config["password"], } + if "tcp-port" in config: + dbt_config["port"] = config["tcp-port"] return dbt_config @staticmethod diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/DefaultBigQueryDenormalizedRecordFormatter.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/DefaultBigQueryDenormalizedRecordFormatter.java index a08751b25164f..63e5478c17cfd 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/DefaultBigQueryDenormalizedRecordFormatter.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/formatter/DefaultBigQueryDenormalizedRecordFormatter.java @@ -191,7 +191,7 @@ private JsonNode getObjectNode(final FieldList fields, final JsonNode root) { @Override public Schema getBigQuerySchema(final JsonNode jsonSchema) { - final List fieldList = getSchemaFields(namingResolver, jsonSchema); + final List fieldList = getSchemaFields(namingResolver, jsonSchema); if (fieldList.stream().noneMatch(f -> f.getName().equals(JavaBaseConstants.COLUMN_NAME_AB_ID))) { fieldList.add(Field.of(JavaBaseConstants.COLUMN_NAME_AB_ID, StandardSQLTypeName.STRING)); } diff --git a/airbyte-integrations/connectors/destination-clickhouse/Dockerfile b/airbyte-integrations/connectors/destination-clickhouse/Dockerfile index 32402f87805c7..98e08d55920dc 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/Dockerfile +++ b/airbyte-integrations/connectors/destination-clickhouse/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-clickhouse COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/destination-clickhouse diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json index 6037b573394d5..1edbbb8465d94 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json @@ -27,31 +27,41 @@ "examples": ["8123"], "order": 1 }, + "tcp-port": { + "title": "Native Port", + "description": "Native port (not the JDBC) of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 9000, + "examples": ["9000"], + "order": 2 + }, "database": { "title": "DB Name", "description": "Name of the database.", "type": "string", - "order": 2 + "order": 3 }, "username": { "title": "User", "description": "Username to use to access the database.", "type": "string", - "order": 3 + "order": 4 }, "password": { "title": "Password", "description": "Password associated with the username.", "type": "string", "airbyte_secret": true, - "order": 4 + "order": 5 }, "ssl": { "title": "SSL Connection", "description": "Encrypt data using SSL.", "type": "boolean", "default": false, - "order": 5 + "order": 6 } } } diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py index bcf2569a67795..2d8c880cf3e3d 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py @@ -644,4 +644,4 @@ def request_params( return params def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]: - yield from response.json().get(self.data_field, {}).get("shippingLabels", []) \ No newline at end of file + yield from response.json().get(self.data_field, {}).get("shippingLabels", []) diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 6f1cbf1d65487..1f06233fd45a2 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -1,6 +1,7 @@ # # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # + from http import HTTPStatus from unittest.mock import patch diff --git a/airbyte-integrations/connectors/source-jira/source_jira/source.py b/airbyte-integrations/connectors/source-jira/source_jira/source.py index 3689d253b8014..538a9ff94ed35 100644 --- a/airbyte-integrations/connectors/source-jira/source_jira/source.py +++ b/airbyte-integrations/connectors/source-jira/source_jira/source.py @@ -107,7 +107,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: **incremental_args, additional_fields=config.get("additional_fields", []), expand_changelog=config.get("expand_issue_changelog", False), - render_fields=render_fields + render_fields=render_fields, ) issue_fields_stream = IssueFields(**args) experimental_streams = [] diff --git a/airbyte-integrations/connectors/source-recurly/source_recurly/schemas/invoices.json b/airbyte-integrations/connectors/source-recurly/source_recurly/schemas/invoices.json index a7bacc0d38b0d..fecf63de82eb4 100644 --- a/airbyte-integrations/connectors/source-recurly/source_recurly/schemas/invoices.json +++ b/airbyte-integrations/connectors/source-recurly/source_recurly/schemas/invoices.json @@ -16,11 +16,7 @@ "title": "Invoice type", "description": "Invoices are either charge, credit, or legacy invoices.", "type": ["null", "string"], - "enum": [ - "charge", - "credit", - "legacy" - ] + "enum": ["charge", "credit", "legacy"] }, "origin": { "title": "Origin", @@ -143,10 +139,7 @@ "description": "An automatic invoice means a corresponding transaction is run using the account's billing information at the same time the invoice is created. Manual invoices are created without a corresponding transaction. The merchant must enter a manual payment transaction or have the customer pay the invoice with an automatic method, like credit card, PayPal, Amazon, or ACH bank payment.", "default": "automatic", "type": ["null", "string"], - "enum": [ - "automatic", - "manual" - ] + "enum": ["automatic", "manual"] }, "po_number": { "type": ["null", "string"], @@ -500,10 +493,7 @@ "title": "Line item type", "description": "Charges are positive line items that debit the account. Credits are negative line items that credit the account.", "type": ["null", "string"], - "enum": [ - "charge", - "credit" - ] + "enum": ["charge", "credit"] }, "item_code": { "type": ["null", "string"], @@ -532,10 +522,7 @@ "title": "Current state of the line item", "description": "Pending line items are charges or credits on an account that have not been applied to an invoice yet. Invoiced line items will always have an `invoice_id` value.", "type": ["null", "string"], - "enum": [ - "invoiced", - "pending" - ] + "enum": ["invoiced", "pending"] }, "legacy_category": { "title": "Legacy category", @@ -1064,11 +1051,7 @@ "type": { "title": "Invoice type", "type": ["null", "string"], - "enum": [ - "charge", - "credit", - "legacy" - ] + "enum": ["charge", "credit", "legacy"] }, "state": { "title": "Invoice state", @@ -1106,11 +1089,7 @@ "type": { "title": "Invoice type", "type": ["null", "string"], - "enum": [ - "charge", - "credit", - "legacy" - ] + "enum": ["charge", "credit", "legacy"] }, "state": { "title": "Invoice state", @@ -1142,13 +1121,7 @@ "title": "Transaction type", "description": "- `authorization` \u2013 verifies billing information and places a hold on money in the customer's account.\n- `capture` \u2013 captures funds held by an authorization and completes a purchase.\n- `purchase` \u2013 combines the authorization and capture in one transaction.\n- `refund` \u2013 returns all or a portion of the money collected in a previous transaction to the customer.\n- `verify` \u2013 a $0 or $1 transaction used to verify billing information which is immediately voided.\n", "type": ["null", "string"], - "enum": [ - "authorization", - "capture", - "purchase", - "refund", - "verify" - ] + "enum": ["authorization", "capture", "purchase", "refund", "verify"] }, "origin": { "title": "Origin of transaction", @@ -1263,10 +1236,7 @@ "collection_method": { "description": "The method by which the payment was collected.", "type": ["null", "string"], - "enum": [ - "automatic", - "manual" - ] + "enum": ["automatic", "manual"] }, "payment_method": { "properties": { @@ -1362,10 +1332,7 @@ "account_type": { "description": "The bank account type. Only present for ACH payment methods.", "type": ["null", "string"], - "enum": [ - "checking", - "savings" - ] + "enum": ["checking", "savings"] }, "routing_number": { "type": ["null", "string"], @@ -1457,16 +1424,7 @@ "title": "CVV check", "description": "When processed, result from checking the CVV/CVC value on the transaction.", "type": ["null", "string"], - "enum": [ - "D", - "I", - "M", - "N", - "P", - "S", - "U", - "X" - ] + "enum": ["D", "I", "M", "N", "P", "S", "U", "X"] }, "avs_check": { "title": "AVS check", @@ -1548,12 +1506,7 @@ "title": "Action", "description": "The action for which the credit was created.", "type": ["null", "string"], - "enum": [ - "payment", - "reduction", - "refund", - "write_off" - ] + "enum": ["payment", "reduction", "refund", "write_off"] }, "account": { "type": "object", @@ -1626,11 +1579,7 @@ "type": { "title": "Invoice type", "type": ["null", "string"], - "enum": [ - "charge", - "credit", - "legacy" - ] + "enum": ["charge", "credit", "legacy"] }, "state": { "title": "Invoice state", @@ -1668,11 +1617,7 @@ "type": { "title": "Invoice type", "type": ["null", "string"], - "enum": [ - "charge", - "credit", - "legacy" - ] + "enum": ["charge", "credit", "legacy"] }, "state": { "title": "Invoice state", @@ -1803,11 +1748,7 @@ "type": { "title": "Invoice type", "type": ["null", "string"], - "enum": [ - "charge", - "credit", - "legacy" - ] + "enum": ["charge", "credit", "legacy"] }, "state": { "title": "Invoice state", @@ -1845,11 +1786,7 @@ "type": { "title": "Invoice type", "type": ["null", "string"], - "enum": [ - "charge", - "credit", - "legacy" - ] + "enum": ["charge", "credit", "legacy"] }, "state": { "title": "Invoice state", @@ -2002,10 +1939,7 @@ "collection_method": { "description": "The method by which the payment was collected.", "type": ["null", "string"], - "enum": [ - "automatic", - "manual" - ] + "enum": ["automatic", "manual"] }, "payment_method": { "type": "object", @@ -2102,10 +2036,7 @@ "account_type": { "description": "The bank account type. Only present for ACH payment methods.", "type": ["null", "string"], - "enum": [ - "checking", - "savings" - ] + "enum": ["checking", "savings"] }, "routing_number": { "type": ["null", "string"], @@ -2116,7 +2047,6 @@ "description": "The bank name of this routing number." } } - }, "ip_address_v4": { "type": ["null", "string"], @@ -2197,16 +2127,7 @@ "title": "CVV check", "description": "When processed, result from checking the CVV/CVC value on the transaction.", "type": ["null", "string"], - "enum": [ - "D", - "I", - "M", - "N", - "P", - "S", - "U", - "X" - ] + "enum": ["D", "I", "M", "N", "P", "S", "U", "X"] }, "avs_check": { "title": "AVS check", diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java index adcee9eb3b7a4..12368661ec5ca 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java @@ -18,9 +18,12 @@ public class NormalizationRunnerFactory { static final Map> NORMALIZATION_MAPPING = ImmutableMap.>builder() + // map destination connectors (alphabetically) to their expected normalization settings .put("airbyte/destination-bigquery", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DefaultNormalizationRunner.DestinationType.BIGQUERY)) .put("airbyte/destination-bigquery-denormalized", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DefaultNormalizationRunner.DestinationType.BIGQUERY)) + .put("airbyte/destination-clickhouse", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) + .put("airbyte/destination-clickhouse-strict-encrypt", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) .put("airbyte/destination-mssql", ImmutablePair.of("airbyte/normalization-mssql", DestinationType.MSSQL)) .put("airbyte/destination-mssql-strict-encrypt", ImmutablePair.of("airbyte/normalization-mssql", DestinationType.MSSQL)) .put("airbyte/destination-mysql", ImmutablePair.of("airbyte/normalization-mysql", DestinationType.MYSQL)) @@ -31,8 +34,6 @@ public class NormalizationRunnerFactory { .put("airbyte/destination-postgres-strict-encrypt", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.POSTGRES)) .put("airbyte/destination-redshift", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.REDSHIFT)) .put("airbyte/destination-snowflake", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.SNOWFLAKE)) - .put("airbyte/destination-clickhouse", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) - .put("airbyte/destination-clickhouse-strict-encrypt", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) .build(); public static NormalizationRunner create(final WorkerConfigs workerConfigs, final String connectorImageName, final ProcessFactory processFactory) { From e0bac4aaebf3c4d287a778ffddbaf4623843c114 Mon Sep 17 00:00:00 2001 From: Christophe Duong Date: Thu, 6 Jan 2022 18:49:31 +0100 Subject: [PATCH 051/215] =?UTF-8?q?=F0=9F=90=9B=20Fix=20normalization=20SC?= =?UTF-8?q?D=20partition=20by=20float=20columns=20errors=20with=20BigQuery?= =?UTF-8?q?=20(#9281)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ..._columns_resulting_into_long_names_scd.sql | 17 +- ..._columns_resulting_into_long_names_scd.sql | 19 ++- .../dedup_exchange_rate_scd.sql | 37 +++-- .../dedup_exchange_rate_scd.sql | 43 ++--- .../dedup_exchange_rate_scd.sql | 43 ++--- .../dedup_cdc_excluded_scd.sql | 46 +++--- .../dedup_exchange_rate_scd.sql | 40 ++--- .../renamed_dedup_cdc_excluded_scd.sql | 26 +-- .../renamed_dedup_cdc_excluded.sql | 1 + .../renamed_dedup_cdc_excluded_ab1.sql | 1 + .../renamed_dedup_cdc_excluded_ab2.sql | 1 + .../dedup_cdc_excluded_scd.sql | 48 +++--- .../dedup_exchange_rate_scd.sql | 46 +++--- .../renamed_dedup_cdc_excluded_scd.sql | 28 ++-- .../renamed_dedup_cdc_excluded.sql | 1 + .../renamed_dedup_cdc_excluded_scd.sql | 4 +- .../renamed_dedup_cdc_excluded.sql | 4 +- ...eam_with_co__lting_into_long_names_scd.sql | 19 +-- ...eam_with_co__lting_into_long_names_scd.sql | 21 +-- .../dedup_exchange_rate_scd.sql | 39 ++--- .../dedup_exchange_rate_scd.sql | 45 +++--- ..._stream_with_co_1g_into_long_names_scd.sql | 17 +- ..._stream_with_co_1g_into_long_names_scd.sql | 19 ++- ..._stream_with_co_1g_into_long_names_scd.sql | 17 +- .../dedup_exchange_rate_scd.sql | 37 +++-- .../dedup_exchange_rate_scd.sql | 43 ++--- .../dedup_exchange_rate_scd.sql | 37 +++-- .../dedup_exchange_rate_scd.sql | 43 ++--- .../dedup_exchange_rate_scd.sql | 49 +++--- .../dedup_exchange_rate_scd.sql | 43 ++--- ...ream_with_c__lting_into_long_names_scd.sql | 17 +- .../some_stream_that_was_empty_scd.sql | 13 +- ...ream_with_c__lting_into_long_names_scd.sql | 19 ++- .../some_stream_that_was_empty_scd.sql | 15 +- .../dedup_cdc_excluded_scd.sql | 43 ++--- .../dedup_exchange_rate_scd.sql | 37 +++-- .../test_normalization/pos_dedup_cdcx_scd.sql | 49 +++--- .../renamed_dedup_cdc_excluded_scd.sql | 23 +-- .../renamed_dedup_cdc_excluded.sql | 1 + .../renamed_dedup_cdc_excluded_stg.sql | 6 + .../renamed_dedup_cdc_excluded_ab1.sql | 1 + .../renamed_dedup_cdc_excluded_ab2.sql | 1 + .../dedup_cdc_excluded_scd.sql | 45 +++--- .../dedup_exchange_rate_scd.sql | 43 ++--- .../test_normalization/pos_dedup_cdcx_scd.sql | 51 +++--- .../renamed_dedup_cdc_excluded_scd.sql | 25 +-- .../renamed_dedup_cdc_excluded.sql | 1 + .../renamed_dedup_cdc_excluded_stg.sql | 1 + .../dedup_exchange_rate_scd.sql | 43 ++--- .../renamed_dedup_cdc_excluded_scd.sql | 45 +++--- .../renamed_dedup_cdc_excluded_scd.sql | 4 +- .../renamed_dedup_cdc_excluded.sql | 4 +- .../renamed_dedup_cdc_excluded_stg.sql | 4 +- .../renamed_dedup_cdc_excluded_scd.sql | 4 +- .../renamed_dedup_cdc_excluded.sql | 4 +- .../renamed_dedup_cdc_excluded_stg.sql | 4 +- ..._columns_resulting_into_long_names_scd.sql | 17 +- ..._columns_resulting_into_long_names_scd.sql | 19 ++- .../dedup_exchange_rate_scd.sql | 37 +++-- .../dedup_exchange_rate_scd.sql | 43 ++--- .../dedup_exchange_rate_scd.sql | 43 ++--- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 17 +- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 19 ++- .../DEDUP_EXCHANGE_RATE_SCD.sql | 37 +++-- .../DEDUP_EXCHANGE_RATE_SCD.sql | 43 ++--- .../data_input/catalog.json | 5 +- .../transform_catalog/stream_processor.py | 150 +++++++++++------- 67 files changed, 980 insertions(+), 787 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index c98dcfdb9a7ed..59cf6d3a78044 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -26,9 +26,9 @@ scd_data as ( ), '')) as string ))) as _airbyte_unique_key, - id, - date, - `partition`, + id, + date, + `partition`, date as _airbyte_start_at, lag(date) over ( partition by id @@ -54,7 +54,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as @@ -72,9 +75,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - date, - `partition`, + id, + date, + `partition`, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 188d65104df21..a1d766dd11035 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -57,11 +57,11 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - date, - {{ adapter.quote('partition') }}, + id, + date, + {{ adapter.quote('partition') }}, date as _airbyte_start_at, lag(date) over ( partition by id @@ -87,7 +87,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -101,9 +104,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - date, - {{ adapter.quote('partition') }}, + id, + date, + {{ adapter.quote('partition') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 28f810d9152da..d7fd59df15b5d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -30,14 +30,14 @@ scd_data as ( ), '')) as string ))) as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, date as _airbyte_start_at, lag(date) over ( partition by id, currency, cast(NZD as @@ -67,7 +67,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as @@ -85,14 +88,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index df5b57acadcc1..c2edf0afe7961 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -59,18 +59,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', + 'id', + 'currency', + 'NZD', ]) }} as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, date as _airbyte_start_at, lag(date) over ( partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) @@ -96,7 +96,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -110,14 +113,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index bd4c0b6271b45..cc6694836bb6a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -59,18 +59,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', + 'id', + 'currency', + 'NZD', ]) }} as _airbyte_unique_key, - id, - currency, - new_column, - date, - timestamp_col, - HKD_special___characters, - NZD, - USD, + id, + currency, + new_column, + date, + timestamp_col, + HKD_special___characters, + NZD, + USD, date as _airbyte_start_at, lag(date) over ( partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(NZD as {{ dbt_utils.type_string() }}) @@ -96,7 +96,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -110,14 +113,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - new_column, - date, - timestamp_col, - HKD_special___characters, - NZD, - USD, + id, + currency, + new_column, + date, + timestamp_col, + HKD_special___characters, + NZD, + USD, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 99e574c63fda6..0c7c151236fd2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -26,9 +26,10 @@ input_data_with_active_row_num as ( row_number() over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) as _airbyte_active_row_num from input_data ), @@ -40,21 +41,21 @@ scd_data as ( toString(id) ))) as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - anyOrNull(_airbyte_emitted_at) over ( + anyOrNull(_ab_cdc_lsn) over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_cdc_excluded_hashid @@ -65,7 +66,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String') + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String') order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, assumeNotNull(hex(MD5( @@ -91,11 +95,11 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 2486691308c65..c1e8e6cb63fec 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -46,14 +46,14 @@ scd_data as ( toString(NZD) ))) as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - "HKD@spéçiäl & characters", - HKD_special___characters, - NZD, - USD, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, date as _airbyte_start_at, case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, anyOrNull(date) over ( @@ -62,8 +62,7 @@ scd_data as ( date is null asc, date desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_exchange_rate_hashid @@ -74,7 +73,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, assumeNotNull(hex(MD5( @@ -94,14 +96,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - "HKD@spéçiäl & characters", - HKD_special___characters, - NZD, - USD, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 4fa7b03259e21..eedc913fd45a5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -26,8 +26,8 @@ input_data_with_active_row_num as ( row_number() over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) as _airbyte_active_row_num from input_data @@ -40,17 +40,17 @@ scd_data as ( toString(id) ))) as _airbyte_unique_key, - id, - _airbyte_emitted_at as _airbyte_start_at, + id, + _ab_cdc_updated_at, + _ab_cdc_updated_at as _airbyte_start_at, case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, - anyOrNull(_airbyte_emitted_at) over ( + anyOrNull(_ab_cdc_updated_at) over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_renamed_dedup_cdc_excluded_hashid @@ -61,7 +61,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, assumeNotNull(hex(MD5( @@ -81,7 +84,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, + id, + _ab_cdc_updated_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index b16b5361120f0..9966d52012aec 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -17,6 +17,7 @@ select _airbyte_unique_key, id, + _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql index a09668e69387e..88a3674f694b3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -7,6 +7,7 @@ -- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql index 2fd528509bc5a..b192f4915e98f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -7,6 +7,7 @@ -- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} select accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index eedb5184f0a89..a20276296c922 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -56,9 +56,10 @@ input_data_with_active_row_num as ( row_number() over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) as _airbyte_active_row_num from input_data ), @@ -66,23 +67,23 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - anyOrNull(_airbyte_emitted_at) over ( + anyOrNull(_ab_cdc_lsn) over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_cdc_excluded_hashid @@ -93,7 +94,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_string() }}') + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_string() }}') order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -107,11 +111,11 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 13744503505c2..1d94573fc99e1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -68,18 +68,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', + 'id', + 'currency', + 'NZD', ]) }} as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - {{ quote('HKD@spéçiäl & characters') }}, - HKD_special___characters, - NZD, - USD, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, date as _airbyte_start_at, case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, anyOrNull(date) over ( @@ -88,8 +88,7 @@ scd_data as ( date is null asc, date desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_exchange_rate_hashid @@ -100,7 +99,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -114,14 +116,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - {{ quote('HKD@spéçiäl & characters') }}, - HKD_special___characters, - NZD, - USD, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 525bee19a04f1..9e3c81ac18178 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -56,8 +56,8 @@ input_data_with_active_row_num as ( row_number() over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) as _airbyte_active_row_num from input_data @@ -66,19 +66,19 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - _airbyte_emitted_at as _airbyte_start_at, + id, + _ab_cdc_updated_at, + _ab_cdc_updated_at as _airbyte_start_at, case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, - anyOrNull(_airbyte_emitted_at) over ( + anyOrNull(_ab_cdc_updated_at) over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_renamed_dedup_cdc_excluded_hashid @@ -89,7 +89,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -103,7 +106,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, + id, + _ab_cdc_updated_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index d9f20813f833e..1b9cead2c4958 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -8,6 +8,7 @@ select _airbyte_unique_key, id, + _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 0fc967c7e00a1..f96e982626f6a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -1,5 +1,5 @@ - insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" from renamed_dedup_cdc_excluded_scd__dbt_tmp \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index 3ee365f07d589..4ff849492d8ab 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -1,5 +1,5 @@ - insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - select "_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" from renamed_dedup_cdc_excluded__dbt_tmp \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql index ca7cb37338611..113bf3c06d961 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql @@ -39,21 +39,19 @@ scd_data as ( concat(concat(coalesce(cast(id as VARCHAR(max)), ''''),''''), '''') as VARCHAR(max)), '''')), 2) as _airbyte_unique_key, - id, - "date", - "partition", + id, + "date", + "partition", "date" as _airbyte_start_at, lag("date") over ( partition by id order by - "date" desc, "date" desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by id order by - "date" desc, "date" desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, @@ -67,7 +65,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, convert(varchar(32), HashBytes(''md5'', coalesce(cast( @@ -85,9 +86,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - "date", - "partition", + id, + "date", + "partition", _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql index f312cf3afbfd4..b04acfdabc525 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql @@ -55,23 +55,21 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by id order by - {{ adapter.quote('date') }} desc, {{ adapter.quote('date') }} desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by id order by - {{ adapter.quote('date') }} desc, {{ adapter.quote('date') }} desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, @@ -85,7 +83,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -99,9 +100,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index a572299fff2d4..ea79968965206 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -41,20 +41,19 @@ scd_data as ( VARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as VARCHAR(max)), ''''),''''), '''') as VARCHAR(max)), '''')), 2) as _airbyte_unique_key, - id, - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + id, + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, "date" as _airbyte_start_at, lag("date") over ( partition by id, currency, cast(nzd as VARCHAR(max)) order by - "date" desc, "date" desc, _airbyte_emitted_at desc ) as _airbyte_end_at, @@ -62,7 +61,6 @@ scd_data as ( partition by id, currency, cast(nzd as VARCHAR(max)) order by - "date" desc, "date" desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, @@ -76,7 +74,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, convert(varchar(32), HashBytes(''md5'', coalesce(cast( @@ -94,14 +95,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + id, + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 3bd5d5d499ed7..fcc681aa95ba8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -57,30 +57,28 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', + 'id', + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) order by - {{ adapter.quote('date') }} desc, {{ adapter.quote('date') }} desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) order by - {{ adapter.quote('date') }} desc, {{ adapter.quote('date') }} desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, @@ -94,7 +92,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -108,14 +109,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index 1e688d24d74f1..e5f3e4859deba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -17,9 +17,9 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(concat(coalesce(cast(id as char), '')) as char)) as _airbyte_unique_key, - id, - `date`, - `partition`, + id, + `date`, + `partition`, `date` as _airbyte_start_at, lag(`date`) over ( partition by id @@ -45,7 +45,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, @@ -55,9 +58,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - `date`, - `partition`, + id, + `date`, + `partition`, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index 78b7de1f601f7..d098146930d0c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -55,11 +55,11 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by id @@ -85,7 +85,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -99,9 +102,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index 1e688d24d74f1..e5f3e4859deba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -17,9 +17,9 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(concat(coalesce(cast(id as char), '')) as char)) as _airbyte_unique_key, - id, - `date`, - `partition`, + id, + `date`, + `partition`, `date` as _airbyte_start_at, lag(`date`) over ( partition by id @@ -45,7 +45,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, @@ -55,9 +58,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - `date`, - `partition`, + id, + `date`, + `partition`, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 64ded010aa63b..59d722cb4f381 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -17,14 +17,14 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(nzd as char), '')) as char)) as _airbyte_unique_key, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, `date` as _airbyte_start_at, lag(`date`) over ( partition by id, currency, cast(nzd as char) @@ -50,7 +50,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, @@ -60,14 +63,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index abfaa2002b55f..309ac4c903fe5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -57,18 +57,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', + 'id', + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) @@ -94,7 +94,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -108,14 +111,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 64ded010aa63b..59d722cb4f381 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -17,14 +17,14 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(nzd as char), '')) as char)) as _airbyte_unique_key, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, `date` as _airbyte_start_at, lag(`date`) over ( partition by id, currency, cast(nzd as char) @@ -50,7 +50,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, @@ -60,14 +63,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index cbca1a34898ff..cfd186b006ae3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -27,27 +27,25 @@ scd_data as ( nzd ) as "_AIRBYTE_UNIQUE_KEY", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, "DATE" as "_AIRBYTE_START_AT", lag("DATE") over ( partition by id, currency, cast(nzd as varchar2(4000)) order by - "DATE" asc nulls last, - "DATE" desc, + "DATE" desc nulls last, "_AIRBYTE_EMITTED_AT" desc ) as "_AIRBYTE_END_AT", case when row_number() over ( partition by id, currency, cast(nzd as varchar2(4000)) order by - "DATE" asc nulls last, - "DATE" desc, + "DATE" desc nulls last, "_AIRBYTE_EMITTED_AT" desc ) = 1 then 1 else 0 end as "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", @@ -60,7 +58,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by "_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_START_AT", "_AIRBYTE_EMITTED_AT" + partition by + "_AIRBYTE_UNIQUE_KEY", + "_AIRBYTE_START_AT", + "_AIRBYTE_EMITTED_AT" order by "_AIRBYTE_ACTIVE_ROW" desc, "_AIRBYTE_AB_ID" ) as "_AIRBYTE_ROW_NUM", ora_hash( @@ -80,14 +81,14 @@ dedup_data as ( select "_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 09ada8d511715..8e8364a7b5072 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -57,31 +57,29 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', + 'id', + 'currency', + 'nzd', ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }}, - id, - currency, - {{ quote('DATE') }}, - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + {{ quote('DATE') }}, + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, {{ quote('DATE') }} as {{ quote('_AIRBYTE_START_AT') }}, lag({{ quote('DATE') }}) over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) order by - {{ quote('DATE') }} asc nulls last, - {{ quote('DATE') }} desc, + {{ quote('DATE') }} desc nulls last, {{ quote('_AIRBYTE_EMITTED_AT') }} desc ) as {{ quote('_AIRBYTE_END_AT') }}, case when row_number() over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) order by - {{ quote('DATE') }} asc nulls last, - {{ quote('DATE') }} desc, + {{ quote('DATE') }} desc nulls last, {{ quote('_AIRBYTE_EMITTED_AT') }} desc ) = 1 then 1 else 0 end as {{ quote('_AIRBYTE_ACTIVE_ROW') }}, {{ quote('_AIRBYTE_AB_ID') }}, @@ -94,7 +92,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by {{ quote('_AIRBYTE_UNIQUE_KEY') }}, {{ quote('_AIRBYTE_START_AT') }}, {{ quote('_AIRBYTE_EMITTED_AT') }} + partition by + {{ quote('_AIRBYTE_UNIQUE_KEY') }}, + {{ quote('_AIRBYTE_START_AT') }}, + {{ quote('_AIRBYTE_EMITTED_AT') }} order by {{ quote('_AIRBYTE_ACTIVE_ROW') }} desc, {{ quote('_AIRBYTE_AB_ID') }} ) as {{ quote('_AIRBYTE_ROW_NUM') }}, {{ dbt_utils.surrogate_key([ @@ -108,14 +109,14 @@ dedup_data as ( select {{ quote('_AIRBYTE_UNIQUE_KEY') }}, {{ quote('_AIRBYTE_UNIQUE_KEY_SCD') }}, - id, - currency, - {{ quote('DATE') }}, - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + {{ quote('DATE') }}, + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, {{ quote('_AIRBYTE_START_AT') }}, {{ quote('_AIRBYTE_END_AT') }}, {{ quote('_AIRBYTE_ACTIVE_ROW') }}, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index cbca1a34898ff..cfd186b006ae3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -27,27 +27,25 @@ scd_data as ( nzd ) as "_AIRBYTE_UNIQUE_KEY", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, "DATE" as "_AIRBYTE_START_AT", lag("DATE") over ( partition by id, currency, cast(nzd as varchar2(4000)) order by - "DATE" asc nulls last, - "DATE" desc, + "DATE" desc nulls last, "_AIRBYTE_EMITTED_AT" desc ) as "_AIRBYTE_END_AT", case when row_number() over ( partition by id, currency, cast(nzd as varchar2(4000)) order by - "DATE" asc nulls last, - "DATE" desc, + "DATE" desc nulls last, "_AIRBYTE_EMITTED_AT" desc ) = 1 then 1 else 0 end as "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", @@ -60,7 +58,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by "_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_START_AT", "_AIRBYTE_EMITTED_AT" + partition by + "_AIRBYTE_UNIQUE_KEY", + "_AIRBYTE_START_AT", + "_AIRBYTE_EMITTED_AT" order by "_AIRBYTE_ACTIVE_ROW" desc, "_AIRBYTE_AB_ID" ) as "_AIRBYTE_ROW_NUM", ora_hash( @@ -80,14 +81,14 @@ dedup_data as ( select "_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index 5ceb844df0ae3..b5d7f740ba6eb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -21,9 +21,9 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - "date", - "partition", + "id", + "date", + "partition", "date" as _airbyte_start_at, lag("date") over ( partition by "id" @@ -49,7 +49,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as @@ -67,9 +70,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", - "date", - "partition", + "id", + "date", + "partition", _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index c1b4813412748..53ef64cb928a8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -21,8 +21,8 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - "date", + "id", + "date", "date" as _airbyte_start_at, lag("date") over ( partition by "id" @@ -48,7 +48,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as @@ -66,8 +69,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", - "date", + "id", + "date", _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index 3f1ab268c7d40..8772de10b5e74 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -56,11 +56,11 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by {{ adapter.quote('id') }} @@ -86,7 +86,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -100,9 +103,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index d6592cc28f8fa..1eba7ba7bd0ba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -56,10 +56,10 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by {{ adapter.quote('id') }} @@ -85,7 +85,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -99,8 +102,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 450c207deea6c..c9c2e087d956b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -21,25 +21,27 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, + lag(_ab_cdc_lsn) over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, @@ -51,7 +53,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, cast(_ab_cdc_deleted_at as + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as varchar ), cast(_ab_cdc_updated_at as varchar @@ -77,11 +82,11 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index a44a3ee1ffd4f..3db3150ff2766 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -25,14 +25,14 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + "id", + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, "date" as _airbyte_start_at, lag("date") over ( partition by "id", currency, cast(nzd as @@ -62,7 +62,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as @@ -80,14 +83,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + "id", + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index a1466c6f433d8..438b303238b5e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -21,26 +21,30 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, @@ -52,7 +56,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, cast(_ab_cdc_deleted_at as + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as varchar ), cast(_ab_cdc_updated_at as varchar @@ -82,12 +89,12 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 993d6b15efad0..414ed447cc0b5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -21,20 +21,21 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + "id", + _ab_cdc_updated_at, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, @@ -47,7 +48,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as @@ -65,7 +69,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", + "id", + _ab_cdc_updated_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index d2a4347421f07..36303d71ef60e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -9,6 +9,7 @@ select _airbyte_unique_key, "id", + _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql index a31227240b16b..8fb3cb3a5c344 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -10,6 +10,7 @@ with __dbt__cte__renamed_dedup_cdc_excluded_ab1 as ( -- depends_on: "postgres".test_normalization._airbyte_raw_renamed_dedup_cdc_excluded select jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -25,6 +26,9 @@ select cast("id" as bigint ) as "id", + cast(_ab_cdc_updated_at as + float +) as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -37,6 +41,8 @@ where 1 = 1 select md5(cast(coalesce(cast("id" as varchar +), '') || '-' || coalesce(cast(_ab_cdc_updated_at as + varchar ), '') as varchar )) as _airbyte_renamed_dedup_cdc_excluded_hashid, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql index 332bcdac0d8ab..e75261bd70a4e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -8,6 +8,7 @@ -- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql index 54dbe8bb35287..f7a91a73a73cb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -8,6 +8,7 @@ -- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} select cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index d63cd07a8a4dd..98325193a5f00 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -56,27 +56,29 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, + lag(_ab_cdc_lsn) over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, @@ -88,7 +90,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -102,11 +107,11 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 3a2d131cb654c..87453dc261145 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -58,18 +58,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', + adapter.quote('id'), + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + {{ adapter.quote('id') }}, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by {{ adapter.quote('id') }}, currency, cast(nzd as {{ dbt_utils.type_string() }}) @@ -95,7 +95,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -109,14 +112,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + {{ adapter.quote('id') }}, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index 61210186eb00c..36ce51399a3f3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -56,28 +56,32 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, @@ -89,7 +93,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_log_pos as {{ dbt_utils.type_string() }}) + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_log_pos as {{ dbt_utils.type_string() }}) order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -103,12 +110,12 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 89a414852dbc8..0c50939426f79 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -56,22 +56,23 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + {{ adapter.quote('id') }}, + _ab_cdc_updated_at, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, @@ -84,7 +85,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -98,7 +102,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, + {{ adapter.quote('id') }}, + _ab_cdc_updated_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index 9b58ab7c73f2f..ca5093eb3e17e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -9,6 +9,7 @@ select _airbyte_unique_key, {{ adapter.quote('id') }}, + _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql index 8b713b1e15b3d..be9bbfcd86758 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -9,6 +9,7 @@ select {{ dbt_utils.surrogate_key([ adapter.quote('id'), + '_ab_cdc_updated_at', ]) }} as _airbyte_renamed_dedup_cdc_excluded_hashid, tmp.* from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 4fa8039ff1a85..5db2e106d7ddf 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -58,18 +58,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', + adapter.quote('id'), + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - currency, - new_column, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) @@ -95,7 +95,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -109,14 +112,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - currency, - new_column, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index f88aa6e23f3b5..3803571720588 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -56,27 +56,29 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, @@ -88,7 +90,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -102,11 +107,11 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 5536e95b30750..3ea9e1c724fb6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded_scd__dbt_tmp" ); - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" from "renamed_dedup_cdc_excluded_scd__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index a5cc40567b2cb..3fec1976ed9d0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded__dbt_tmp" ); - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") ( - select "_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" from "renamed_dedup_cdc_excluded__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql index 502e7141b3e86..5f39006c6490b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded_stg__dbt_tmp" ); - insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") ( - select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" from "renamed_dedup_cdc_excluded_stg__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 7693af7ef2e63..dfe10c6da794d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded_scd__dbt_tmp" ); - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at") + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at" + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" from "renamed_dedup_cdc_excluded_scd__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index c8edd1056dd5d..c1d1c310179d3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded__dbt_tmp" ); - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at") + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") ( - select "_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at" + select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" from "renamed_dedup_cdc_excluded__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql index 62a33963d7a20..55db812277ae0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded_stg__dbt_tmp" ); - insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at") + insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") ( - select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at" + select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" from "renamed_dedup_cdc_excluded_stg__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index f56ebb0d393ad..0f50d29444876 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -21,9 +21,9 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(coalesce(cast(id as varchar), '') as varchar)) as _airbyte_unique_key, - id, - date, - "partition", + id, + date, + "partition", date as _airbyte_start_at, lag(date) over ( partition by id @@ -49,7 +49,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as varchar), '') || '-' || coalesce(cast(_airbyte_start_at as varchar), '') || '-' || coalesce(cast(_airbyte_emitted_at as varchar), '') as varchar)) as _airbyte_unique_key_scd, @@ -59,9 +62,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - date, - "partition", + id, + date, + "partition", _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index f63d93b3787bc..1e30bf57e4595 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -56,11 +56,11 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - date, - {{ adapter.quote('partition') }}, + id, + date, + {{ adapter.quote('partition') }}, date as _airbyte_start_at, lag(date) over ( partition by id @@ -86,7 +86,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -100,9 +103,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - date, - {{ adapter.quote('partition') }}, + id, + date, + {{ adapter.quote('partition') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 6550ed24e0699..e2fb4b8024b06 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -21,14 +21,14 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(coalesce(cast(id as varchar), '') || '-' || coalesce(cast(currency as varchar), '') || '-' || coalesce(cast(nzd as varchar), '') as varchar)) as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - "hkd@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, date as _airbyte_start_at, lag(date) over ( partition by id, currency, cast(nzd as varchar) @@ -54,7 +54,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as varchar), '') || '-' || coalesce(cast(_airbyte_start_at as varchar), '') || '-' || coalesce(cast(_airbyte_emitted_at as varchar), '') as varchar)) as _airbyte_unique_key_scd, @@ -64,14 +67,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - "hkd@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 28eaf5da09dd3..81b85e492cd51 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -58,18 +58,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', + 'id', + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, date as _airbyte_start_at, lag(date) over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) @@ -95,7 +95,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -109,14 +112,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 24c88aab4c0d3..363a39ec25fe7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -58,18 +58,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', + 'id', + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - id, - currency, - new_column, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - nzd, - usd, + id, + currency, + new_column, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + nzd, + usd, date as _airbyte_start_at, lag(date) over ( partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) @@ -95,7 +95,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -109,14 +112,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - new_column, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - nzd, - usd, + id, + currency, + new_column, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 7bc1f2d4776b6..73631957ce269 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -20,9 +20,9 @@ scd_data as ( ), '') as varchar )) as _AIRBYTE_UNIQUE_KEY, - ID, - DATE, - PARTITION, + ID, + DATE, + PARTITION, DATE as _AIRBYTE_START_AT, lag(DATE) over ( partition by ID @@ -48,7 +48,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _AIRBYTE_UNIQUE_KEY, _AIRBYTE_START_AT, _AIRBYTE_EMITTED_AT + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID ) as _AIRBYTE_ROW_NUM, md5(cast(coalesce(cast(_AIRBYTE_UNIQUE_KEY as @@ -66,9 +69,9 @@ dedup_data as ( select _AIRBYTE_UNIQUE_KEY, _AIRBYTE_UNIQUE_KEY_SCD, - ID, - DATE, - PARTITION, + ID, + DATE, + PARTITION, _AIRBYTE_START_AT, _AIRBYTE_END_AT, _AIRBYTE_ACTIVE_ROW, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 0333595a99b97..167cdb066cb4c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -56,11 +56,11 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'ID', + 'ID', ]) }} as _AIRBYTE_UNIQUE_KEY, - ID, - DATE, - PARTITION, + ID, + DATE, + PARTITION, DATE as _AIRBYTE_START_AT, lag(DATE) over ( partition by ID @@ -86,7 +86,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _AIRBYTE_UNIQUE_KEY, _AIRBYTE_START_AT, _AIRBYTE_EMITTED_AT + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID ) as _AIRBYTE_ROW_NUM, {{ dbt_utils.surrogate_key([ @@ -100,9 +103,9 @@ dedup_data as ( select _AIRBYTE_UNIQUE_KEY, _AIRBYTE_UNIQUE_KEY_SCD, - ID, - DATE, - PARTITION, + ID, + DATE, + PARTITION, _AIRBYTE_START_AT, _AIRBYTE_END_AT, _AIRBYTE_ACTIVE_ROW, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index 804ca297a46fd..20ae8e46add52 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -24,14 +24,14 @@ scd_data as ( ), '') as varchar )) as _AIRBYTE_UNIQUE_KEY, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - "HKD@spéçiäl & characters", - HKD_SPECIAL___CHARACTERS, - NZD, - USD, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + "HKD@spéçiäl & characters", + HKD_SPECIAL___CHARACTERS, + NZD, + USD, DATE as _AIRBYTE_START_AT, lag(DATE) over ( partition by ID, CURRENCY, cast(NZD as @@ -61,7 +61,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _AIRBYTE_UNIQUE_KEY, _AIRBYTE_START_AT, _AIRBYTE_EMITTED_AT + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID ) as _AIRBYTE_ROW_NUM, md5(cast(coalesce(cast(_AIRBYTE_UNIQUE_KEY as @@ -79,14 +82,14 @@ dedup_data as ( select _AIRBYTE_UNIQUE_KEY, _AIRBYTE_UNIQUE_KEY_SCD, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - "HKD@spéçiäl & characters", - HKD_SPECIAL___CHARACTERS, - NZD, - USD, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + "HKD@spéçiäl & characters", + HKD_SPECIAL___CHARACTERS, + NZD, + USD, _AIRBYTE_START_AT, _AIRBYTE_END_AT, _AIRBYTE_ACTIVE_ROW, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index ca5c91ab9c9be..2b62f6776a223 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -58,18 +58,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'ID', - 'CURRENCY', - 'NZD', + 'ID', + 'CURRENCY', + 'NZD', ]) }} as _AIRBYTE_UNIQUE_KEY, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - HKD_SPECIAL___CHARACTERS, - NZD, - USD, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + HKD_SPECIAL___CHARACTERS, + NZD, + USD, DATE as _AIRBYTE_START_AT, lag(DATE) over ( partition by ID, CURRENCY, cast(NZD as {{ dbt_utils.type_string() }}) @@ -95,7 +95,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _AIRBYTE_UNIQUE_KEY, _AIRBYTE_START_AT, _AIRBYTE_EMITTED_AT + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID ) as _AIRBYTE_ROW_NUM, {{ dbt_utils.surrogate_key([ @@ -109,14 +112,14 @@ dedup_data as ( select _AIRBYTE_UNIQUE_KEY, _AIRBYTE_UNIQUE_KEY_SCD, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - HKD_SPECIAL___CHARACTERS, - NZD, - USD, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + HKD_SPECIAL___CHARACTERS, + NZD, + USD, _AIRBYTE_START_AT, _AIRBYTE_END_AT, _AIRBYTE_ACTIVE_ROW, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json index c8efc48f5b3e3..9b44f5e68d18a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json @@ -96,6 +96,9 @@ "properties": { "id": { "type": "integer" + }, + "_ab_cdc_updated_at": { + "type": ["null", "number"] } } }, @@ -136,7 +139,7 @@ "default_cursor_field": [] }, "sync_mode": "incremental", - "cursor_field": [], + "cursor_field": ["_ab_cdc_lsn"], "destination_sync_mode": "append_dedup", "primary_key": [["id"]] }, diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 1a8993ddf8cc3..8cae3703bc9fc 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -376,12 +376,12 @@ def generate_json_parsing_model(self, from_table: str, column_names: Dict[str, T -- depends_on: {{ from_table }} {{ unnesting_before_query }} select - {%- if parent_hash_id %} +{%- if parent_hash_id %} {{ parent_hash_id }}, - {%- endif %} - {%- for field in fields %} +{%- endif %} +{%- for field in fields %} {{ field }}, - {%- endfor %} +{%- endfor %} {{ col_ab_id }}, {{ col_emitted_at }}, {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }} @@ -454,12 +454,12 @@ def generate_column_typing_model(self, from_table: str, column_names: Dict[str, -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: {{ from_table }} select - {%- if parent_hash_id %} +{%- if parent_hash_id %} {{ parent_hash_id }}, - {%- endif %} - {%- for field in fields %} +{%- endif %} +{%- for field in fields %} {{ field }}, - {%- endfor %} +{%- endfor %} {{ col_ab_id }}, {{ col_emitted_at }}, {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }} @@ -573,9 +573,9 @@ def generate_snowflake_timestamp_statement(column_name: str) -> str: template = Template( """ case - {% for format_item in formats %} +{% for format_item in formats %} when {{column_name}} regexp '{{format_item['regex']}}' then to_timestamp_tz({{column_name}}, '{{format_item['format']}}') - {% endfor %} +{% endfor %} when {{column_name}} = '' then NULL else to_timestamp_tz({{column_name}}) end as {{column_name}} @@ -591,12 +591,12 @@ def generate_id_hashing_model(self, from_table: str, column_names: Dict[str, Tup -- depends_on: {{ from_table }} select {{ '{{' }} dbt_utils.surrogate_key([ - {%- if parent_hash_id %} +{%- if parent_hash_id %} {{ parent_hash_id }}, - {%- endif %} - {%- for field in fields %} +{%- endif %} +{%- for field in fields %} {{ field }}, - {%- endfor %} +{%- endfor %} ]) {{ '}}' }} as {{ hash_id }}, tmp.* from {{ from_table }} tmp @@ -649,11 +649,12 @@ def safe_cast_to_string(definition: Dict, column_name: str, destination_type: De return col def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> str: - order_null = "is null asc" + cursor_field = self.get_cursor_field(column_names) + order_null = f"is null asc,\n {cursor_field} desc" if self.destination_type.value == DestinationType.ORACLE.value: - order_null = "asc nulls last" + order_null = "desc nulls last" if self.destination_type.value == DestinationType.MSSQL.value: - # SQL Server treats NULL values as the lowest values, then sorted in ascending order, NULLs come first. + # SQL Server treats NULL values as the lowest values, thus NULLs come last when desc. order_null = "desc" lag_begin = "lag" @@ -663,7 +664,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup # ClickHouse doesn't support lag() yet, this is a workaround solution # Ref: https://clickhouse.com/docs/en/sql-reference/window-functions/ lag_begin = "anyOrNull" - lag_end = "ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING" + lag_end = " ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING" input_data_table = "input_data_with_active_row_num" enable_left_join_null = "" @@ -687,7 +688,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup quoted_col_cdc_deleted_at = self.name_transformer.normalize_column_name("_ab_cdc_deleted_at", in_jinja=True) quoted_col_cdc_updated_at = self.name_transformer.normalize_column_name("_ab_cdc_updated_at", in_jinja=True) cdc_active_row_pattern = f" and {col_cdc_deleted_at} is null" - cdc_updated_order_pattern = f", {col_cdc_updated_at} desc" + cdc_updated_order_pattern = f"\n {col_cdc_updated_at} desc," cdc_cols = ( f", {cast_begin}{col_cdc_deleted_at}{cast_as}" + "{{ dbt_utils.type_string() }}" @@ -701,15 +702,32 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup if "_ab_cdc_log_pos" in column_names.keys(): col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos") quoted_col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos", in_jinja=True) - cdc_updated_order_pattern += f", {col_cdc_log_pos} desc" + cdc_updated_order_pattern += f"\n {col_cdc_log_pos} desc," cdc_cols += f", {cast_begin}{col_cdc_log_pos}{cast_as}" + "{{ dbt_utils.type_string() }}" + f"{cast_end}" quoted_cdc_cols += f", {quoted_col_cdc_log_pos}" + if ( + self.destination_type == DestinationType.BIGQUERY + and self.get_cursor_field_property_name(column_names) != self.airbyte_emitted_at + and is_number(self.properties[self.get_cursor_field_property_name(column_names)]["type"]) + ): + # partition by float columns is not allowed in BigQuery, cast it to string + airbyte_start_at_string = ( + cast_begin + + self.name_transformer.normalize_column_name("_airbyte_start_at") + + cast_as + + "{{ dbt_utils.type_string() }}" + + cast_end + ) + else: + airbyte_start_at_string = self.name_transformer.normalize_column_name("_airbyte_start_at") + jinja_variables = { "active_row": self.name_transformer.normalize_column_name("_airbyte_active_row"), "airbyte_end_at": self.name_transformer.normalize_column_name("_airbyte_end_at"), "airbyte_row_num": self.name_transformer.normalize_column_name("_airbyte_row_num"), "airbyte_start_at": self.name_transformer.normalize_column_name("_airbyte_start_at"), + "airbyte_start_at_string": airbyte_start_at_string, "airbyte_unique_key_scd": self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), "cdc_active_row": cdc_active_row_pattern, "cdc_cols": cdc_cols, @@ -717,7 +735,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup "col_ab_id": self.get_ab_id(), "col_emitted_at": self.get_emitted_at(), "col_normalized_at": self.get_normalized_at(), - "cursor_field": self.get_cursor_field(column_names), + "cursor_field": cursor_field, "enable_left_join_null": enable_left_join_null, "fields": self.list_fields(column_names), "from_table": from_table, @@ -745,9 +763,8 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup row_number() over ( partition by {{ primary_key_partition | join(", ") }} order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc ) as _airbyte_active_row_num from input_data ),""" @@ -759,11 +776,9 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup {{ lag_begin }}({{ cursor_field }}) over ( partition by {{ primary_key_partition | join(", ") }} order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} - {{ lag_end }} - ) as {{ airbyte_end_at }}""" + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc + {{ lag_end }}) as {{ airbyte_end_at }}""" ).render(jinja_variables) jinja_variables["scd_columns_sql"] = scd_columns_sql else: @@ -772,16 +787,14 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup lag({{ cursor_field }}) over ( partition by {{ primary_key_partition | join(", ") }} order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc ) as {{ airbyte_end_at }}, case when row_number() over ( partition by {{ primary_key_partition | join(", ") }} order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc ) = 1{{ cdc_active_row }} then 1 else 0 end as {{ active_row }}""" ).render(jinja_variables) jinja_variables["scd_columns_sql"] = scd_columns_sql @@ -803,9 +816,9 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup -- build a subset of {{ unique_key }} from rows that are new select distinct {{ '{{' }} dbt_utils.surrogate_key([ - {%- for primary_key in primary_keys %} +{%- for primary_key in primary_keys %} {{ primary_key }}, - {%- endfor %} +{%- endfor %} ]) {{ '}}' }} as {{ unique_key }} from new_data ), @@ -840,17 +853,17 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - {%- if parent_hash_id %} - {{ parent_hash_id }}, - {%- endif %} +{%- if parent_hash_id %} + {{ parent_hash_id }}, +{%- endif %} {{ '{{' }} dbt_utils.surrogate_key([ - {%- for primary_key in primary_keys %} - {{ primary_key }}, - {%- endfor %} +{%- for primary_key in primary_keys %} + {{ primary_key }}, +{%- endfor %} ]) {{ '}}' }} as {{ unique_key }}, - {%- for field in fields %} - {{ field }}, - {%- endfor %} +{%- for field in fields %} + {{ field }}, +{%- endfor %} {{ cursor_field }} as {{ airbyte_start_at }}, {{ scd_columns_sql }}, {{ col_ab_id }}, @@ -863,7 +876,10 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by {{ unique_key }}, {{ airbyte_start_at }}, {{ col_emitted_at }}{{ cdc_cols }} + partition by + {{ unique_key }}, + {{ airbyte_start_at_string }}, + {{ col_emitted_at }}{{ cdc_cols }} order by {{ active_row }} desc, {{ col_ab_id }} ) as {{ airbyte_row_num }}, {{ '{{' }} dbt_utils.surrogate_key([ @@ -875,14 +891,14 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup from scd_data ) select - {%- if parent_hash_id %} - {{ parent_hash_id }}, - {%- endif %} +{%- if parent_hash_id %} + {{ parent_hash_id }}, +{%- endif %} {{ unique_key }}, {{ airbyte_unique_key_scd }}, - {%- for field in fields %} - {{ field }}, - {%- endfor %} +{%- for field in fields %} + {{ field }}, +{%- endfor %} {{ airbyte_start_at }}, {{ airbyte_end_at }}, {{ active_row }}, @@ -895,9 +911,22 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup ).render(jinja_variables) return sql + def get_cursor_field_property_name(self, column_names: Dict[str, Tuple[str, str]]) -> str: + if not self.cursor_field: + if "_ab_cdc_updated_at" in column_names.keys(): + return "_ab_cdc_updated_at" + elif "_ab_cdc_log_pos" in column_names.keys(): + return "_ab_cdc_log_pos" + else: + return self.airbyte_emitted_at + elif len(self.cursor_field) == 1: + return self.cursor_field[0] + else: + raise ValueError(f"Unsupported nested cursor field {'.'.join(self.cursor_field)} for stream {self.stream_name}") + def get_cursor_field(self, column_names: Dict[str, Tuple[str, str]], in_jinja: bool = False) -> str: if not self.cursor_field: - cursor = self.name_transformer.normalize_column_name(self.airbyte_emitted_at, in_jinja) + cursor = self.name_transformer.normalize_column_name(self.get_cursor_field_property_name(column_names), in_jinja) elif len(self.cursor_field) == 1: if not is_airbyte_column(self.cursor_field[0]): cursor = column_names[self.cursor_field[0]][0] @@ -906,7 +935,6 @@ def get_cursor_field(self, column_names: Dict[str, Tuple[str, str]], in_jinja: b cursor = self.cursor_field[0] else: raise ValueError(f"Unsupported nested cursor field {'.'.join(self.cursor_field)} for stream {self.stream_name}") - return cursor def list_primary_keys(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: @@ -952,15 +980,15 @@ def generate_final_model(self, from_table: str, column_names: Dict[str, Tuple[st -- Final base SQL model -- depends_on: {{ from_table }} select - {%- if parent_hash_id %} +{%- if parent_hash_id %} {{ parent_hash_id }}, - {%- endif %} - {%- if unique_key %} +{%- endif %} +{%- if unique_key %} {{ unique_key }}, - {%- endif %} - {%- for field in fields %} +{%- endif %} +{%- for field in fields %} {{ field }}, - {%- endfor %} +{%- endfor %} {{ col_ab_id }}, {{ col_emitted_at }}, {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }}, From c5d4a973631ccae7918b9d7881f875a265f30619 Mon Sep 17 00:00:00 2001 From: Christophe Duong Date: Thu, 6 Jan 2022 18:59:09 +0100 Subject: [PATCH 052/215] =?UTF-8?q?=F0=9F=90=9B=20Fix=20normalization=20is?= =?UTF-8?q?sue=20with=20quoted=20&=20case=20sensitive=20columns=20(#9317)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../bases/base-normalization/.gitignore | 1 + .../multiple_column_names_conflicts_stg.sql | 83 +++++++++++++++++ .../models/generated/sources.yml | 1 + .../test_normalization/dedup_cdc_excluded.sql | 34 ------- .../multiple_column_names_conflicts_stg.sql | 86 +++++++++++++++++ .../dedup_cdc_excluded_ab1.sql | 21 ----- .../dedup_cdc_excluded_ab2.sql | 21 ----- .../test_normalization/exchange_rate_ab1.sql | 24 ----- .../test_normalization/exchange_rate_ab2.sql | 24 ----- .../test_normalization/exchange_rate_ab3.sql | 24 ----- .../test_normalization/pos_dedup_cdcx_ab1.sql | 22 ----- .../test_normalization/pos_dedup_cdcx_ab2.sql | 22 ----- .../test_normalization/dedup_cdc_excluded.sql | 24 ----- .../models/generated/sources.yml | 1 + .../dedup_cdc_excluded_scd.sql | 5 - .../test_normalization/dedup_cdc_excluded.sql | 5 - .../multiple_column_names_conflicts_stg.sql | 76 +++++++++++++++ .../models/generated/sources.yml | 1 + .../multiple_column_names_conflicts_stg.sql | 72 ++++++++++++++ .../models/generated/sources.yml | 1 + .../multiple_column_names_conflicts_stg.sql | 87 +++++++++++++++++ .../models/generated/sources.yml | 1 + .../multiple_column_names_conflicts_scd.sql | 93 +++++++++++++++++++ .../multiple_column_names_conflicts.sql | 28 ++++++ .../multiple_column_names_conflicts_stg.sql | 85 +++++++++++++++++ .../multiple_column_names_conflicts_ab1.sql | 24 +++++ .../multiple_column_names_conflicts_ab2.sql | 24 +++++ .../multiple_column_names_conflicts_scd.sql} | 80 ++++++++-------- .../multiple_column_names_conflicts.sql | 27 ++++++ .../multiple_column_names_conflicts_stg.sql | 24 +++++ .../models/generated/sources.yml | 1 + .../multiple_column_names_conflicts_scd.sql | 14 +++ .../multiple_column_names_conflicts.sql | 14 +++ .../multiple_column_names_conflicts_stg.sql | 14 +++ .../multiple_column_names_conflicts_stg.sql | 62 +++++++++++++ .../models/generated/sources.yml | 1 + .../MULTIPLE_COLUMN_NAMES_CONFLICTS_STG.sql | 81 ++++++++++++++++ .../models/generated/sources.yml | 1 + .../data_input/catalog.json | 38 ++++++++ .../data_input/messages.txt | 2 + .../destination_name_transformer.py | 38 ++++++++ .../transform_catalog/stream_processor.py | 8 +- .../src/main/resources/spec.json | 2 +- 43 files changed, 1030 insertions(+), 267 deletions(-) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/{clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql => postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql} (53%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/MULTIPLE_COLUMN_NAMES_CONFLICTS_STG.sql diff --git a/airbyte-integrations/bases/base-normalization/.gitignore b/airbyte-integrations/bases/base-normalization/.gitignore index 707446495c018..5e426c453be52 100644 --- a/airbyte-integrations/bases/base-normalization/.gitignore +++ b/airbyte-integrations/bases/base-normalization/.gitignore @@ -20,6 +20,7 @@ integration_tests/normalization_test_output/**/*.yml # Simple Streams !integration_tests/normalization_test_output/**/dedup_exchange_rate*.sql !integration_tests/normalization_test_output/**/exchange_rate.sql +!integration_tests/normalization_test_output/**/test_simple_streams/first_output/airbyte_views/**/multiple_column_names_conflicts_stg.sql # Nested Streams # Parent table !integration_tests/normalization_test_output/**/nested_stream_with*_names_ab*.sql diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..f5079fc4f3003 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,83 @@ + + + create or replace view `dataline-integration-testing`._airbyte_test_normalization.`multiple_column_names_conflicts_stg` + OPTIONS() + as +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts +select + json_extract_scalar(_airbyte_data, "$['id']") as id, + json_extract_scalar(_airbyte_data, "$['User Id']") as User_Id, + json_extract_scalar(_airbyte_data, "$['user_id']") as user_id_1, + json_extract_scalar(_airbyte_data, "$['User id']") as User_id_2, + json_extract_scalar(_airbyte_data, "$['user id']") as user_id_3, + json_extract_scalar(_airbyte_data, "$['User@Id']") as User_Id_4, + json_extract_scalar(_airbyte_data, "$['UserId']") as UserId, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + cast(id as + int64 +) as id, + cast(User_Id as + string +) as User_Id, + cast(user_id_1 as + float64 +) as user_id_1, + cast(User_id_2 as + float64 +) as User_id_2, + cast(user_id_3 as + float64 +) as user_id_3, + cast(User_Id_4 as + string +) as User_Id_4, + cast(UserId as + float64 +) as UserId, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + to_hex(md5(cast(concat(coalesce(cast(id as + string +), ''), '-', coalesce(cast(User_Id as + string +), ''), '-', coalesce(cast(user_id_1 as + string +), ''), '-', coalesce(cast(User_id_2 as + string +), ''), '-', coalesce(cast(user_id_3 as + string +), ''), '-', coalesce(cast(User_Id_4 as + string +), ''), '-', coalesce(cast(UserId as + string +), '')) as + string +))) as _airbyte_multiple_column_names_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 +; + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml index 45c338b893cab..0e116b2bbec5d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml @@ -9,5 +9,6 @@ sources: - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts - name: _airbyte_raw_pos_dedup_cdcx - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql deleted file mode 100644 index 8aea31930d35c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql +++ /dev/null @@ -1,34 +0,0 @@ - - - - - create table test_normalization.dedup_cdc_excluded - - - - engine = MergeTree() - - order by (tuple()) - - as ( - --- Final base SQL model --- depends_on: test_normalization.dedup_cdc_excluded_scd -select - _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_dedup_cdc_excluded_hashid -from test_normalization.dedup_cdc_excluded_scd --- dedup_cdc_excluded from test_normalization._airbyte_raw_dedup_cdc_excluded -where 1 = 1 -and _airbyte_active_row = 1 - - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..b22cc4439922a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,86 @@ + + + create view _airbyte_test_normalization.multiple_column_names_conflicts_stg__dbt_tmp + + as ( + +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_multiple_column_names_conflicts +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'User Id') as "User Id", + JSONExtractRaw(_airbyte_data, 'user_id') as user_id, + JSONExtractRaw(_airbyte_data, 'User id') as "User id", + JSONExtractRaw(_airbyte_data, 'user id') as "user id", + JSONExtractRaw(_airbyte_data, 'User@Id') as "User@Id", + JSONExtractRaw(_airbyte_data, 'UserId') as UserId, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from "User Id"), 'String'), 'null') as "User Id", + accurateCastOrNull(user_id, ' + Float64 +') as user_id, + accurateCastOrNull("User id", ' + Float64 +') as "User id", + accurateCastOrNull("user id", ' + Float64 +') as "user id", + nullif(accurateCastOrNull(trim(BOTH '"' from "User@Id"), 'String'), 'null') as "User@Id", + accurateCastOrNull(UserId, ' + Float64 +') as UserId, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString("User Id") || '~' || + + + toString(user_id) || '~' || + + + toString("User id") || '~' || + + + toString("user id") || '~' || + + + toString("User@Id") || '~' || + + + toString(UserId) + + ))) as _airbyte_multiple_co__ames_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql deleted file mode 100644 index 5b9ee4b6b6820..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as name, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} as table_alias --- dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql deleted file mode 100644 index 6f7e747a0699a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_cdc_excluded_ab1') }} -select - accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from name), '{{ dbt_utils.type_string() }}'), 'null') as name, - accurateCastOrNull(_ab_cdc_lsn, '{{ dbt_utils.type_float() }}') as _ab_cdc_lsn, - accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, - accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_cdc_excluded_ab1') }} --- dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql deleted file mode 100644 index f9b9da32d25d1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ quote('HKD@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, - {{ json_extract_scalar('_airbyte_data', ['column`_\'with"_quotes'], ['column___with__quotes']) }} as {{ quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} as table_alias --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql deleted file mode 100644 index 49cb5ea4c759b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('exchange_rate_ab1') }} -select - accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from currency), '{{ dbt_utils.type_string() }}'), 'null') as currency, - parseDateTimeBestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('date') }})) as date, - parseDateTime64BestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('timestamp_col') }})) as timestamp_col, - accurateCastOrNull({{ quote('HKD@spéçiäl & characters') }}, '{{ dbt_utils.type_float() }}') as {{ quote('HKD@spéçiäl & characters') }}, - nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), '{{ dbt_utils.type_string() }}'), 'null') as HKD_special___characters, - accurateCastOrNull(NZD, '{{ dbt_utils.type_float() }}') as NZD, - accurateCastOrNull(USD, '{{ dbt_utils.type_float() }}') as USD, - nullif(accurateCastOrNull(trim(BOTH '"' from {{ quote('column`_\'with""_quotes') }}), '{{ dbt_utils.type_string() }}'), 'null') as {{ quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('exchange_rate_ab1') }} --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql deleted file mode 100644 index c45103fae85c5..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'date', - 'timestamp_col', - quote('HKD@spéçiäl & characters'), - 'HKD_special___characters', - 'NZD', - 'USD', - quote('column`_\'with""_quotes'), - ]) }} as _airbyte_exchange_rate_hashid, - tmp.* -from {{ ref('exchange_rate_ab2') }} tmp --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql deleted file mode 100644 index 909b7bd2366b6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as name, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_log_pos'], ['_ab_cdc_log_pos']) }} as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} as table_alias --- pos_dedup_cdcx -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql deleted file mode 100644 index 0b9192b2620a4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('pos_dedup_cdcx_ab1') }} -select - accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from name), '{{ dbt_utils.type_string() }}'), 'null') as name, - accurateCastOrNull(_ab_cdc_lsn, '{{ dbt_utils.type_float() }}') as _ab_cdc_lsn, - accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, - accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_deleted_at, - accurateCastOrNull(_ab_cdc_log_pos, '{{ dbt_utils.type_float() }}') as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('pos_dedup_cdcx_ab1') }} --- pos_dedup_cdcx -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql deleted file mode 100644 index 6a6248e7cb6a8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_cdc_excluded_scd') }} -select - _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_cdc_excluded_hashid -from {{ ref('dedup_cdc_excluded_scd') }} --- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml index 45c338b893cab..0e116b2bbec5d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml @@ -9,5 +9,6 @@ sources: - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts - name: _airbyte_raw_pos_dedup_cdcx - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql deleted file mode 100644 index 029806e67c97d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,5 +0,0 @@ - - insert into test_normalization.dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" - from dedup_cdc_excluded_scd__dbt_tmp - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql deleted file mode 100644 index bd7ed508ea036..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql +++ /dev/null @@ -1,5 +0,0 @@ - - insert into test_normalization.dedup_cdc_excluded ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") - select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" - from dedup_cdc_excluded__dbt_tmp - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..6f5a0cc6eb2d8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,76 @@ +USE [test_normalization]; + execute('create view _airbyte_test_normalization."multiple_column_names_conflicts_stg__dbt_tmp" as + +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "test_normalization".test_normalization._airbyte_raw_multiple_column_names_conflicts +select + json_value(_airbyte_data, ''$."id"'') as id, + json_value(_airbyte_data, ''$."User Id"'') as "User Id", + json_value(_airbyte_data, ''$."user_id"'') as user_id, + json_value(_airbyte_data, ''$."User id"'') as "User id_1", + json_value(_airbyte_data, ''$."user id"'') as "user id_2", + json_value(_airbyte_data, ''$."User@Id"'') as "User@Id", + json_value(_airbyte_data, ''$."UserId"'') as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from "test_normalization".test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + cast(id as + bigint +) as id, + cast("User Id" as + VARCHAR(max)) as "User Id", + cast(user_id as + float +) as user_id, + cast("User id_1" as + float +) as "User id_1", + cast("user id_2" as + float +) as "user id_2", + cast("User@Id" as + VARCHAR(max)) as "User@Id", + cast(userid as + float +) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(id as + VARCHAR(max)), ''''), ''-'', coalesce(cast("User Id" as + VARCHAR(max)), ''''), ''-'', coalesce(cast(user_id as + VARCHAR(max)), ''''), ''-'', coalesce(cast("User id_1" as + VARCHAR(max)), ''''), ''-'', coalesce(cast("user id_2" as + VARCHAR(max)), ''''), ''-'', coalesce(cast("User@Id" as + VARCHAR(max)), ''''), ''-'', coalesce(cast(userid as + VARCHAR(max)), ''''),''''), '''') as + VARCHAR(max)), '''')), 2) as _airbyte_multiple_col__ames_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + '); + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml index 45c338b893cab..0e116b2bbec5d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml @@ -9,5 +9,6 @@ sources: - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts - name: _airbyte_raw_pos_dedup_cdcx - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..ba1ab02165406 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,72 @@ + + create view _airbyte_test_normalization.`multiple_column_names_conflicts_stg__dbt_tmp` as ( + +with __dbt__CTE__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_multiple_column_names_conflicts +select + json_value(_airbyte_data, + '$."id"') as id, + json_value(_airbyte_data, + '$."User Id"') as `User Id`, + json_value(_airbyte_data, + '$."user_id"') as user_id, + json_value(_airbyte_data, + '$."User id"') as `User id_1`, + json_value(_airbyte_data, + '$."user id"') as `user id_2`, + json_value(_airbyte_data, + '$."User@Id"') as `User@Id`, + json_value(_airbyte_data, + '$."UserId"') as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__CTE__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__CTE__multiple_column_names_conflicts_ab1 +select + cast(id as + signed +) as id, + cast(`User Id` as char) as `User Id`, + cast(user_id as + float +) as user_id, + cast(`User id_1` as + float +) as `User id_1`, + cast(`user id_2` as + float +) as `user id_2`, + cast(`User@Id` as char) as `User@Id`, + cast(userid as + float +) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__CTE__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__CTE__multiple_column_names_conflicts_ab2 +select + md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(`User Id` as char), ''), '-', coalesce(cast(user_id as char), ''), '-', coalesce(cast(`User id_1` as char), ''), '-', coalesce(cast(`user id_2` as char), ''), '-', coalesce(cast(`User@Id` as char), ''), '-', coalesce(cast(userid as char), '')) as char)) as _airbyte_multiple_col__ames_conflicts_hashid, + tmp.* +from __dbt__CTE__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml index 45c338b893cab..0e116b2bbec5d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml @@ -9,5 +9,6 @@ sources: - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts - name: _airbyte_raw_pos_dedup_cdcx - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..5c34c11584562 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,87 @@ + + create view test_normalization.multiple_column_names_conflicts_stg__dbt_tmp as + +with dbt__cte__multiple_column_names_conflicts_ab1__ as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.airbyte_raw_multiple_column_names_conflicts +select + json_value("_AIRBYTE_DATA", '$."id"') as id, + json_value("_AIRBYTE_DATA", '$."User Id"') as user_id, + json_value("_AIRBYTE_DATA", '$."user_id"') as user_id_1, + json_value("_AIRBYTE_DATA", '$."User id"') as user_id_2, + json_value("_AIRBYTE_DATA", '$."user id"') as user_id_3, + json_value("_AIRBYTE_DATA", '$."User@Id"') as user_id_4, + json_value("_AIRBYTE_DATA", '$."UserId"') as userid, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from test_normalization.airbyte_raw_multiple_column_names_conflicts +-- multiple_column_names_conflicts +where 1 = 1 + +), dbt__cte__multiple_column_names_conflicts_ab2__ as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: dbt__cte__multiple_column_names_conflicts_ab1__ +select + cast(id as + numeric +) as id, + cast(user_id as varchar2(4000)) as user_id, + cast(user_id_1 as + float +) as user_id_1, + cast(user_id_2 as + float +) as user_id_2, + cast(user_id_3 as + float +) as user_id_3, + cast(user_id_4 as varchar2(4000)) as user_id_4, + cast(userid as + float +) as userid, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from dbt__cte__multiple_column_names_conflicts_ab1__ +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: dbt__cte__multiple_column_names_conflicts_ab2__ +select + ora_hash( + + id || '~' || + + + user_id || '~' || + + + user_id_1 || '~' || + + + user_id_2 || '~' || + + + user_id_3 || '~' || + + + user_id_4 || '~' || + + + userid + + ) as "_AIRBYTE_MULTIPLE_COLUMN_NAMES_CONFLICTS_HASHID", + tmp.* +from dbt__cte__multiple_column_names_conflicts_ab2__ tmp +-- multiple_column_names_conflicts +where 1 = 1 + + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml index c6b4b6023ba5f..3faad76c57b34 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml @@ -9,5 +9,6 @@ sources: - name: airbyte_raw_dedup_cdc_excluded - name: airbyte_raw_dedup_exchange_rate - name: airbyte_raw_exchange_rate + - name: airbyte_raw_multiple_column_names_conflicts - name: airbyte_raw_pos_dedup_cdcx - name: airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql new file mode 100644 index 0000000000000..e94644c18a173 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -0,0 +1,93 @@ + + + + create table "postgres".test_normalization."multiple_column_names_conflicts_scd" + as ( + +-- depends_on: ref('multiple_column_names_conflicts_stg') +with + +input_data as ( + select * + from "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" + -- multiple_column_names_conflicts from "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast("id" as + varchar +), '') as + varchar +)) as _airbyte_unique_key, + "id", + "User Id", + user_id, + "User id", + "user id", + "User@Id", + userid, + _airbyte_emitted_at as _airbyte_start_at, + lag(_airbyte_emitted_at) over ( + partition by "id" + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by "id" + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_multiple_co__ames_conflicts_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as + varchar +), '') || '-' || coalesce(cast(_airbyte_start_at as + varchar +), '') || '-' || coalesce(cast(_airbyte_emitted_at as + varchar +), '') as + varchar +)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + "id", + "User Id", + user_id, + "User id", + "user id", + "User@Id", + userid, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_multiple_co__ames_conflicts_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql new file mode 100644 index 0000000000000..eba2d8af4fcee --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql @@ -0,0 +1,28 @@ + + + + create table "postgres".test_normalization."multiple_column_names_conflicts" + as ( + +-- Final base SQL model +-- depends_on: "postgres".test_normalization."multiple_column_names_conflicts_scd" +select + _airbyte_unique_key, + "id", + "User Id", + user_id, + "User id", + "user id", + "User@Id", + userid, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_multiple_co__ames_conflicts_hashid +from "postgres".test_normalization."multiple_column_names_conflicts_scd" +-- multiple_column_names_conflicts from "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..7a2c133f995f7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,85 @@ + + + + create table "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" + as ( + +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'User Id') as "User Id", + jsonb_extract_path_text(_airbyte_data, 'user_id') as user_id, + jsonb_extract_path_text(_airbyte_data, 'User id') as "User id", + jsonb_extract_path_text(_airbyte_data, 'user id') as "user id", + jsonb_extract_path_text(_airbyte_data, 'User@Id') as "User@Id", + jsonb_extract_path_text(_airbyte_data, 'UserId') as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + cast("id" as + bigint +) as "id", + cast("User Id" as + varchar +) as "User Id", + cast(user_id as + float +) as user_id, + cast("User id" as + float +) as "User id", + cast("user id" as + float +) as "user id", + cast("User@Id" as + varchar +) as "User@Id", + cast(userid as + float +) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + md5(cast(coalesce(cast("id" as + varchar +), '') || '-' || coalesce(cast("User Id" as + varchar +), '') || '-' || coalesce(cast(user_id as + varchar +), '') || '-' || coalesce(cast("User id" as + varchar +), '') || '-' || coalesce(cast("user id" as + varchar +), '') || '-' || coalesce(cast("User@Id" as + varchar +), '') || '-' || coalesce(cast(userid as + varchar +), '') as + varchar +)) as _airbyte_multiple_co__ames_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql new file mode 100644 index 0000000000000..7268a550c1560 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql @@ -0,0 +1,24 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['User Id'], ['User Id']) }} as {{ adapter.quote('User Id') }}, + {{ json_extract_scalar('_airbyte_data', ['user_id'], ['user_id']) }} as user_id, + {{ json_extract_scalar('_airbyte_data', ['User id'], ['User id']) }} as {{ adapter.quote('User id') }}, + {{ json_extract_scalar('_airbyte_data', ['user id'], ['user id']) }} as {{ adapter.quote('user id') }}, + {{ json_extract_scalar('_airbyte_data', ['User@Id'], ['User@Id']) }} as {{ adapter.quote('User@Id') }}, + {{ json_extract_scalar('_airbyte_data', ['UserId'], ['UserId']) }} as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} as table_alias +-- multiple_column_names_conflicts +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql new file mode 100644 index 0000000000000..afed155ffbd8d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql @@ -0,0 +1,24 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('multiple_column_names_conflicts_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('User Id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('User Id') }}, + cast(user_id as {{ dbt_utils.type_float() }}) as user_id, + cast({{ adapter.quote('User id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('User id') }}, + cast({{ adapter.quote('user id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('user id') }}, + cast({{ adapter.quote('User@Id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('User@Id') }}, + cast(userid as {{ dbt_utils.type_float() }}) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('multiple_column_names_conflicts_ab1') }} +-- multiple_column_names_conflicts +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql similarity index 53% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index 9e3c81ac18178..3fcf2e971cbc6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -1,18 +1,19 @@ {{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg'], + post_hook = ['delete from _airbyte_test_normalization.multiple_column_names_conflicts_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.multiple_column_names_conflicts_stg)'], tags = [ "top-level" ] ) }} --- depends_on: ref('renamed_dedup_cdc_excluded_stg') +-- depends_on: ref('multiple_column_names_conflicts_stg') with {% if is_incremental() %} new_data as ( -- retrieve incremental "new" data select * - from {{ ref('renamed_dedup_cdc_excluded_stg') }} - -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} + from {{ ref('multiple_column_names_conflicts_stg') }} + -- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} where 1 = 1 {{ incremental_clause('_airbyte_emitted_at') }} ), @@ -20,7 +21,7 @@ new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ - 'id', + adapter.quote('id'), ]) }} as _airbyte_unique_key from new_data ), @@ -31,58 +32,58 @@ empty_new_data as ( previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes select - {{ star_intersect(ref('renamed_dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + {{ star_intersect(ref('multiple_column_names_conflicts_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} from {{ this }} as this_data -- make a join with new_data using primary key to filter active data that need to be updated only join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - --left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data + select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from new_data union all - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data + select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from previous_active_scd_data ), {% else %} input_data as ( select * - from {{ ref('renamed_dedup_cdc_excluded_stg') }} - -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} + from {{ ref('multiple_column_names_conflicts_stg') }} + -- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} ), {% endif %} -input_data_with_active_row_num as ( - select *, - row_number() over ( - partition by id - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) as _airbyte_active_row_num - from input_data -), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + adapter.quote('id'), ]) }} as _airbyte_unique_key, - id, - _ab_cdc_updated_at, - _ab_cdc_updated_at as _airbyte_start_at, - case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, - anyOrNull(_ab_cdc_updated_at) over ( - partition by id + {{ adapter.quote('id') }}, + {{ adapter.quote('User Id') }}, + user_id, + {{ adapter.quote('User id') }}, + {{ adapter.quote('user id') }}, + {{ adapter.quote('User@Id') }}, + userid, + _airbyte_emitted_at as _airbyte_start_at, + lag(_airbyte_emitted_at) over ( + partition by {{ adapter.quote('id') }} order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, - _airbyte_renamed_dedup_cdc_excluded_hashid - from input_data_with_active_row_num + _airbyte_multiple_co__ames_conflicts_hashid + from input_data ), dedup_data as ( select @@ -106,14 +107,19 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - _ab_cdc_updated_at, + {{ adapter.quote('id') }}, + {{ adapter.quote('User Id') }}, + user_id, + {{ adapter.quote('User id') }}, + {{ adapter.quote('user id') }}, + {{ adapter.quote('User@Id') }}, + userid, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid + _airbyte_multiple_co__ames_conflicts_hashid from dedup_data where _airbyte_row_num = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql new file mode 100644 index 0000000000000..9aa1f765c0c8f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql @@ -0,0 +1,27 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('multiple_column_names_conflicts_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('User Id') }}, + user_id, + {{ adapter.quote('User id') }}, + {{ adapter.quote('user id') }}, + {{ adapter.quote('User@Id') }}, + userid, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_multiple_co__ames_conflicts_hashid +from {{ ref('multiple_column_names_conflicts_scd') }} +-- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..85ac753575979 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,24 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('multiple_column_names_conflicts_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('User Id'), + 'user_id', + adapter.quote('User id'), + adapter.quote('user id'), + adapter.quote('User@Id'), + 'userid', + ]) }} as _airbyte_multiple_co__ames_conflicts_hashid, + tmp.* +from {{ ref('multiple_column_names_conflicts_ab2') }} tmp +-- multiple_column_names_conflicts +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml index 45c338b893cab..0e116b2bbec5d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml @@ -9,5 +9,6 @@ sources: - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts - name: _airbyte_raw_pos_dedup_cdcx - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql new file mode 100644 index 0000000000000..77ba6202fe818 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -0,0 +1,14 @@ + + delete + from "postgres".test_normalization."multiple_column_names_conflicts_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "multiple_column_names_conflicts_scd__dbt_tmp" + ); + + insert into "postgres".test_normalization."multiple_column_names_conflicts_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid" + from "multiple_column_names_conflicts_scd__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql new file mode 100644 index 0000000000000..55bd30540c228 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql @@ -0,0 +1,14 @@ + + delete + from "postgres".test_normalization."multiple_column_names_conflicts" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "multiple_column_names_conflicts__dbt_tmp" + ); + + insert into "postgres".test_normalization."multiple_column_names_conflicts" ("_airbyte_unique_key", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid") + ( + select "_airbyte_unique_key", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid" + from "multiple_column_names_conflicts__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..9acf3e0a0ee38 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,14 @@ + + delete + from "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "multiple_column_names_conflicts_stg__dbt_tmp" + ); + + insert into "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" ("_airbyte_multiple_co__ames_conflicts_hashid", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_multiple_co__ames_conflicts_hashid", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "multiple_column_names_conflicts_stg__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..0777ba0c53932 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,62 @@ + + + create view "integrationtests"._airbyte_test_normalization."multiple_column_names_conflicts_stg__dbt_tmp" as ( + +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "integrationtests".test_normalization._airbyte_raw_multiple_column_names_conflicts +select + case when json_extract_path_text(_airbyte_data, 'id', true) != '' then json_extract_path_text(_airbyte_data, 'id', true) end as id, + case when json_extract_path_text(_airbyte_data, 'User Id', true) != '' then json_extract_path_text(_airbyte_data, 'User Id', true) end as "user id", + case when json_extract_path_text(_airbyte_data, 'user_id', true) != '' then json_extract_path_text(_airbyte_data, 'user_id', true) end as user_id, + case when json_extract_path_text(_airbyte_data, 'User id', true) != '' then json_extract_path_text(_airbyte_data, 'User id', true) end as "user id_1", + case when json_extract_path_text(_airbyte_data, 'user id', true) != '' then json_extract_path_text(_airbyte_data, 'user id', true) end as "user id_2", + case when json_extract_path_text(_airbyte_data, 'User@Id', true) != '' then json_extract_path_text(_airbyte_data, 'User@Id', true) end as "user@id", + case when json_extract_path_text(_airbyte_data, 'UserId', true) != '' then json_extract_path_text(_airbyte_data, 'UserId', true) end as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "integrationtests".test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + cast(id as + bigint +) as id, + cast("user id" as varchar) as "user id", + cast(user_id as + float +) as user_id, + cast("user id_1" as + float +) as "user id_1", + cast("user id_2" as + float +) as "user id_2", + cast("user@id" as varchar) as "user@id", + cast(userid as + float +) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + md5(cast(coalesce(cast(id as varchar), '') || '-' || coalesce(cast("user id" as varchar), '') || '-' || coalesce(cast(user_id as varchar), '') || '-' || coalesce(cast("user id_1" as varchar), '') || '-' || coalesce(cast("user id_2" as varchar), '') || '-' || coalesce(cast("user@id" as varchar), '') || '-' || coalesce(cast(userid as varchar), '') as varchar)) as _airbyte_multiple_column_names_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml index 45c338b893cab..0e116b2bbec5d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml @@ -9,5 +9,6 @@ sources: - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts - name: _airbyte_raw_pos_dedup_cdcx - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/MULTIPLE_COLUMN_NAMES_CONFLICTS_STG.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/MULTIPLE_COLUMN_NAMES_CONFLICTS_STG.sql new file mode 100644 index 0000000000000..c9a26f11445b2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/MULTIPLE_COLUMN_NAMES_CONFLICTS_STG.sql @@ -0,0 +1,81 @@ + + create or replace view "AIRBYTE_DATABASE"._AIRBYTE_TEST_NORMALIZATION."MULTIPLE_COLUMN_NAMES_CONFLICTS_STG" as ( + +with __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "AIRBYTE_DATABASE".TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS +select + to_varchar(get_path(parse_json(_airbyte_data), '"id"')) as ID, + to_varchar(get_path(parse_json(_airbyte_data), '"User Id"')) as "User Id", + to_varchar(get_path(parse_json(_airbyte_data), '"user_id"')) as USER_ID, + to_varchar(get_path(parse_json(_airbyte_data), '"User id"')) as "User id", + to_varchar(get_path(parse_json(_airbyte_data), '"user id"')) as "user id", + to_varchar(get_path(parse_json(_airbyte_data), '"User@Id"')) as "User@Id", + to_varchar(get_path(parse_json(_airbyte_data), '"UserId"')) as USERID, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from "AIRBYTE_DATABASE".TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS as table_alias +-- MULTIPLE_COLUMN_NAMES_CONFLICTS +where 1 = 1 + +), __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1 +select + cast(ID as + bigint +) as ID, + cast("User Id" as + varchar +) as "User Id", + cast(USER_ID as + float +) as USER_ID, + cast("User id" as + float +) as "User id", + cast("user id" as + float +) as "user id", + cast("User@Id" as + varchar +) as "User@Id", + cast(USERID as + float +) as USERID, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1 +-- MULTIPLE_COLUMN_NAMES_CONFLICTS +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2 +select + md5(cast(coalesce(cast(ID as + varchar +), '') || '-' || coalesce(cast("User Id" as + varchar +), '') || '-' || coalesce(cast(USER_ID as + varchar +), '') || '-' || coalesce(cast("User id" as + varchar +), '') || '-' || coalesce(cast("user id" as + varchar +), '') || '-' || coalesce(cast("User@Id" as + varchar +), '') || '-' || coalesce(cast(USERID as + varchar +), '') as + varchar +)) as _AIRBYTE_MULTIPLE_COLUMN_NAMES_CONFLICTS_HASHID, + tmp.* +from __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2 tmp +-- MULTIPLE_COLUMN_NAMES_CONFLICTS +where 1 = 1 + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml index c09293ca67262..bec4269ba6bf8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml @@ -9,5 +9,6 @@ sources: - name: _AIRBYTE_RAW_DEDUP_CDC_EXCLUDED - name: _AIRBYTE_RAW_DEDUP_EXCHANGE_RATE - name: _AIRBYTE_RAW_EXCHANGE_RATE + - name: _AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS - name: _AIRBYTE_RAW_POS_DEDUP_CDCX - name: _AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json index 9b44f5e68d18a..dc25bf3713604 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json @@ -177,6 +177,44 @@ "cursor_field": [], "destination_sync_mode": "append_dedup", "primary_key": [["id"]] + }, + { + "stream": { + "name": "multiple_column_names_conflicts", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "User Id": { + "type": ["string", "null"] + }, + "user_id": { + "type": ["null", "number"] + }, + "User id": { + "type": ["null", "number"] + }, + "user id": { + "type": ["null", "number"] + }, + "User@Id": { + "type": ["null", "string"] + }, + "UserId": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "full_refresh", + "cursor_field": [], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] } ] } diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt index fa3af2c3f2548..e84ca1f63c79a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt @@ -41,3 +41,5 @@ {"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":null,"_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33279,"_ab_cdc_deleted_at":1623850868371},"emitted_at":1623861660}} {"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":5,"name":"lotus","_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010048,"_ab_cdc_log_pos": 33280,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} {"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":5,"name":"lily","_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33281,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} + +{"type":"RECORD","record":{"stream":"multiple_column_names_conflicts","data":{"id":1,"User Id":"chris","user_id":42,"User id":300,"user id": 102,"UserId":101},"emitted_at":1623959926}} diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py index 352fa8b9f93d1..ab7b3894d6134 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py @@ -223,6 +223,44 @@ def __normalize_identifier_case(self, input_name: str, is_quoted: bool = False) raise KeyError(f"Unknown destination type {self.destination_type}") return result + def normalize_column_identifier_case_for_lookup(self, input_name: str, is_quoted: bool = False) -> str: + """ + This function adds an additional normalization regarding the column name casing to determine if multiple columns + are in collisions. On certain destinations/settings, case sensitivity matters, in others it does not. + We separate this from standard identifier normalization "__normalize_identifier_case", + so the generated SQL queries are keeping the original casing from the catalog. + But we still need to determine if casing matters or not, thus by using this function. + """ + result = input_name + if self.destination_type.value == DestinationType.BIGQUERY.value: + # Columns are considered identical regardless of casing + result = input_name.lower() + elif self.destination_type.value == DestinationType.REDSHIFT.value: + # Columns are considered identical regardless of casing (even quoted ones) + result = input_name.lower() + elif self.destination_type.value == DestinationType.POSTGRES.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.lower() + elif self.destination_type.value == DestinationType.SNOWFLAKE.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.upper() + elif self.destination_type.value == DestinationType.MYSQL.value: + # Columns are considered identical regardless of casing (even quoted ones) + result = input_name.lower() + elif self.destination_type.value == DestinationType.MSSQL.value: + # Columns are considered identical regardless of casing (even quoted ones) + result = input_name.lower() + elif self.destination_type.value == DestinationType.ORACLE.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.lower() + else: + result = input_name.upper() + elif self.destination_type.value == DestinationType.CLICKHOUSE.value: + pass + else: + raise KeyError(f"Unknown destination type {self.destination_type}") + return result + # Static Functions diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 8cae3703bc9fc..d5ba201b9746b 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -314,15 +314,17 @@ def extract_column_names(self) -> Dict[str, Tuple[str, str]]: field_names = set() for field in fields: field_name = self.name_transformer.normalize_column_name(field, in_jinja=False) + field_name_lookup = self.name_transformer.normalize_column_identifier_case_for_lookup(field_name) jinja_name = self.name_transformer.normalize_column_name(field, in_jinja=True) - if field_name in field_names: + if field_name_lookup in field_names: # TODO handle column name duplicates or collisions deterministically in this stream for i in range(1, 1000): field_name = self.name_transformer.normalize_column_name(f"{field}_{i}", in_jinja=False) + field_name_lookup = self.name_transformer.normalize_column_identifier_case_for_lookup(field_name) jinja_name = self.name_transformer.normalize_column_name(f"{field}_{i}", in_jinja=True) - if field_name not in field_names: + if field_name_lookup not in field_names: break - field_names.add(field_name) + field_names.add(field_name_lookup) result[field] = (field_name, jinja_name) return result diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json index 1edbbb8465d94..e0345d4780fe5 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json @@ -2,7 +2,7 @@ "documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse", "supportsIncremental": true, "supportsNormalization": true, - "supportsDBT": false, + "supportsDBT": true, "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", From b6926d44d47073466fc360b6a21aaa88abfc4052 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Thu, 6 Jan 2022 10:10:25 -0800 Subject: [PATCH 053/215] =?UTF-8?q?=F0=9F=9A=A8=20Snowflake=20produces=20p?= =?UTF-8?q?ermanent=20tables=20=F0=9F=9A=A8=20(#9063)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../seed/destination_definitions.yaml | 2 +- .../resources/seed/destination_specs.yaml | 2 +- .../bases/base-normalization/.dockerignore | 1 + .../bases/base-normalization/.gitignore | 11 ++++ .../bases/base-normalization/build.gradle | 5 ++ .../dbt_project.yml | 64 +++++++++++++++++++ .../docker-compose.build.yaml | 7 ++ .../base-normalization/docker-compose.yaml | 2 + .../integration_tests/dbt_integration_test.py | 4 ++ .../test_nested_streams/dbt_project.yml | 25 ++++---- ...PLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql | 2 +- ...NS_RESULTING_INTO_LONG_NAMES_PARTITION.sql | 2 +- ...SULTING_INTO_LONG_NAMES_PARTITION_DATA.sql | 2 +- ...LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql | 2 +- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 2 +- .../test_simple_streams/dbt_project.yml | 25 ++++---- .../DEDUP_EXCHANGE_RATE.sql | 2 +- .../DEDUP_EXCHANGE_RATE_SCD.sql | 2 +- .../TEST_NORMALIZATION/EXCHANGE_RATE.sql | 2 +- .../TEST_NORMALIZATION/EXCHANGE_RATE.sql | 2 +- .../integration_tests/test_ephemeral.py | 2 + .../integration_tests/test_normalization.py | 3 + .../base-normalization/snowflake.Dockerfile | 33 ++++++++++ .../DestinationAcceptanceTest.java | 15 +++-- .../destination-snowflake/Dockerfile | 2 +- ...wflakeInsertDestinationAcceptanceTest.java | 19 ++++-- .../NormalizationRunnerFactory.java | 11 +++- .../sync/DbtTransformationActivityImpl.java | 9 +-- .../sync/NormalizationActivityImpl.java | 5 +- .../NormalizationRunnerFactoryTest.java | 6 +- build.gradle | 1 + docs/integrations/destinations/snowflake.md | 5 +- .../basic-normalization.md | 7 +- 33 files changed, 224 insertions(+), 60 deletions(-) create mode 100644 airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml mode change 100755 => 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml mode change 100755 => 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml create mode 100644 airbyte-integrations/bases/base-normalization/snowflake.Dockerfile diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 100521ffbc212..694449982dea6 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -179,7 +179,7 @@ - name: Snowflake destinationDefinitionId: 424892c4-daac-4491-b35d-c6688ba547ba dockerRepository: airbyte/destination-snowflake - dockerImageTag: 0.3.24 + dockerImageTag: 0.4.0 documentationUrl: https://docs.airbyte.io/integrations/destinations/snowflake icon: snowflake.svg - name: MariaDB ColumnStore diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 266fefa1716d8..a99171e40b825 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -3767,7 +3767,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-snowflake:0.3.24" +- dockerImage: "airbyte/destination-snowflake:0.4.0" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" connectionSpecification: diff --git a/airbyte-integrations/bases/base-normalization/.dockerignore b/airbyte-integrations/bases/base-normalization/.dockerignore index e6fbfb3101a91..a44e07cac6f81 100644 --- a/airbyte-integrations/bases/base-normalization/.dockerignore +++ b/airbyte-integrations/bases/base-normalization/.dockerignore @@ -9,3 +9,4 @@ !dbt-project-template-mysql !dbt-project-template-oracle !dbt-project-template-clickhouse +!dbt-project-template-snowflake diff --git a/airbyte-integrations/bases/base-normalization/.gitignore b/airbyte-integrations/bases/base-normalization/.gitignore index 5e426c453be52..7994f50ee6bea 100644 --- a/airbyte-integrations/bases/base-normalization/.gitignore +++ b/airbyte-integrations/bases/base-normalization/.gitignore @@ -19,13 +19,18 @@ integration_tests/normalization_test_output/**/*.yml # We keep a minimal/restricted subset of sql files for all destinations to avoid noise in diff # Simple Streams !integration_tests/normalization_test_output/**/dedup_exchange_rate*.sql +!integration_tests/normalization_test_output/**/DEDUP_EXCHANGE_RATE*.sql !integration_tests/normalization_test_output/**/exchange_rate.sql +!integration_tests/normalization_test_output/**/EXCHANGE_RATE.sql !integration_tests/normalization_test_output/**/test_simple_streams/first_output/airbyte_views/**/multiple_column_names_conflicts_stg.sql # Nested Streams # Parent table !integration_tests/normalization_test_output/**/nested_stream_with*_names_ab*.sql !integration_tests/normalization_test_output/**/nested_stream_with*_names_scd.sql !integration_tests/normalization_test_output/**/nested_stream_with*_names.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_AB*.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_SCD.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES.sql # Nested table !integration_tests/normalization_test_output/**/nested_stream_with_*_partition_ab1.sql !integration_tests/normalization_test_output/**/nested_stream_with_*_data_ab1.sql @@ -33,6 +38,12 @@ integration_tests/normalization_test_output/**/*.yml !integration_tests/normalization_test_output/**/nested_stream_with*_data_scd.sql !integration_tests/normalization_test_output/**/nested_stream_with*_partition.sql !integration_tests/normalization_test_output/**/nested_stream_with*_data.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_PARTITION_AB1.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_DATA_AB1.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION_SCD.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA_SCD.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA.sql # but we keep all sql files for Postgres !integration_tests/normalization_test_output/postgres/**/*.sql diff --git a/airbyte-integrations/bases/base-normalization/build.gradle b/airbyte-integrations/bases/base-normalization/build.gradle index eff597c11a3e1..105dee09c2363 100644 --- a/airbyte-integrations/bases/base-normalization/build.gradle +++ b/airbyte-integrations/bases/base-normalization/build.gradle @@ -73,11 +73,16 @@ task airbyteDockerClickhouse(type: Exec, dependsOn: checkSshScriptCopy) { configure buildAirbyteDocker('clickhouse') dependsOn assemble } +task airbyteDockerSnowflake(type: Exec, dependsOn: checkSshScriptCopy) { + configure buildAirbyteDocker('snowflake') + dependsOn assemble +} airbyteDocker.dependsOn(airbyteDockerMSSql) airbyteDocker.dependsOn(airbyteDockerMySql) airbyteDocker.dependsOn(airbyteDockerOracle) airbyteDocker.dependsOn(airbyteDockerClickhouse) +airbyteDocker.dependsOn(airbyteDockerSnowflake) task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs) { module = "pytest" diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml new file mode 100644 index 0000000000000..c22ddc2282c25 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml @@ -0,0 +1,64 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `source-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +source-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +data-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `source-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + +transient: false + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml b/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml index 16948b4a22211..628136ffc33a8 100644 --- a/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml +++ b/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml @@ -36,3 +36,10 @@ services: context: . labels: io.airbyte.git-revision: ${GIT_REVISION} + normalization-snowflake: + image: airbyte/normalization-snowflake:${VERSION} + build: + dockerfile: snowflake.Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} diff --git a/airbyte-integrations/bases/base-normalization/docker-compose.yaml b/airbyte-integrations/bases/base-normalization/docker-compose.yaml index 8dd94275765be..23edde345b51b 100644 --- a/airbyte-integrations/bases/base-normalization/docker-compose.yaml +++ b/airbyte-integrations/bases/base-normalization/docker-compose.yaml @@ -12,3 +12,5 @@ services: image: airbyte/normalization-oracle:${VERSION} normalization-clickhouse: image: airbyte/normalization-clickhouse:${VERSION} + normalization-snowflake: + image: airbyte/normalization-snowflake:${VERSION} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py index bce4152dc7b3f..bd67df357b284 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py @@ -374,6 +374,8 @@ def get_normalization_image(destination_type: DestinationType) -> str: return "airbyte/normalization-oracle:dev" elif DestinationType.CLICKHOUSE.value == destination_type.value: return "airbyte/normalization-clickhouse:dev" + elif DestinationType.SNOWFLAKE.value == destination_type.value: + return "airbyte/normalization-snowflake:dev" else: return "airbyte/normalization:dev" @@ -445,6 +447,8 @@ def run_check_dbt_command(normalization_image: str, command: str, cwd: str, forc "Configuration paths exist in your dbt_project.yml", # When no cte / view are generated "Error loading config file: .dockercfg: $HOME is not defined", # ignore warning "depends on a node named 'disabled_test' which was not found", # Tests throwing warning because it is disabled + "The requested image's platform (linux/amd64) does not match the detected host platform " + + "(linux/arm64/v8) and no specific platform was requested", # temporary patch until we publish images for arm64 ]: if except_clause in str_line: is_exception = True diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml old mode 100755 new mode 100644 index 9ad8158759001..c22ddc2282c25 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml @@ -4,13 +4,13 @@ # Name your package! Package names should contain only lowercase characters # and underscores. A good package name should reflect your organization's # name or the intended use of these models -name: 'airbyte_utils' -version: '1.0' +name: "airbyte_utils" +version: "1.0" config-version: 2 # This setting configures which "profile" dbt uses for this project. Profiles contain # database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: 'normalize' +profile: "normalize" # These configurations specify where dbt should look for different types of files. # The `source-paths` config, for example, states that source models can be found @@ -22,18 +22,18 @@ test-paths: ["tests"] data-paths: ["data"] macro-paths: ["macros"] -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" quoting: database: true -# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) -# all schemas should be unquoted + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted schema: false identifier: true @@ -41,6 +41,7 @@ quoting: # Using these configurations, you can enable or disable models, change how they # are materialized, and more! models: + +transient: false airbyte_utils: +materialized: table generated: @@ -60,4 +61,4 @@ models: dispatch: - macro_namespace: dbt_utils - search_order: ['airbyte_utils', 'dbt_utils'] + search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql index dbb7eed19d4e4..070ff60ba2f03 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql @@ -1,6 +1,6 @@ - create or replace transient table "AIRBYTE_DATABASE".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES" as + create or replace table "AIRBYTE_DATABASE".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES" as (select * from( -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql index fe425246856a5..d70e72e47308c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql @@ -1,6 +1,6 @@ - create or replace transient table "AIRBYTE_DATABASE".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" as + create or replace table "AIRBYTE_DATABASE".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" as (select * from( with __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1 as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql index 4ea5abb0f0f21..b550314ec9c29 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql @@ -1,6 +1,6 @@ - create or replace transient table "AIRBYTE_DATABASE".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA" as + create or replace table "AIRBYTE_DATABASE".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA" as (select * from( with __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1 as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql index a5e6ac9fdc401..9bd584e0f1d23 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql @@ -1,6 +1,6 @@ - create or replace transient table "AIRBYTE_DATABASE".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA" as + create or replace table "AIRBYTE_DATABASE".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA" as (select * from( with __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1 as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 73631957ce269..3bd7c7e79ee22 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -1,6 +1,6 @@ - create or replace transient table "AIRBYTE_DATABASE".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" as + create or replace table "AIRBYTE_DATABASE".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" as (select * from( -- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml old mode 100755 new mode 100644 index 9ad8158759001..c22ddc2282c25 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml @@ -4,13 +4,13 @@ # Name your package! Package names should contain only lowercase characters # and underscores. A good package name should reflect your organization's # name or the intended use of these models -name: 'airbyte_utils' -version: '1.0' +name: "airbyte_utils" +version: "1.0" config-version: 2 # This setting configures which "profile" dbt uses for this project. Profiles contain # database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: 'normalize' +profile: "normalize" # These configurations specify where dbt should look for different types of files. # The `source-paths` config, for example, states that source models can be found @@ -22,18 +22,18 @@ test-paths: ["tests"] data-paths: ["data"] macro-paths: ["macros"] -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" quoting: database: true -# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) -# all schemas should be unquoted + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted schema: false identifier: true @@ -41,6 +41,7 @@ quoting: # Using these configurations, you can enable or disable models, change how they # are materialized, and more! models: + +transient: false airbyte_utils: +materialized: table generated: @@ -60,4 +61,4 @@ models: dispatch: - macro_namespace: dbt_utils - search_order: ['airbyte_utils', 'dbt_utils'] + search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql index 253d50b73d9c3..e54fe5cedf121 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql @@ -1,6 +1,6 @@ - create or replace transient table "AIRBYTE_DATABASE".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE" as + create or replace table "AIRBYTE_DATABASE".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE" as (select * from( -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index 20ae8e46add52..b65fbb50b75e4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -1,6 +1,6 @@ - create or replace transient table "AIRBYTE_DATABASE".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" as + create or replace table "AIRBYTE_DATABASE".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" as (select * from( -- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql index 83d0d7cc8fd82..8280ecbf2960e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql @@ -1,6 +1,6 @@ - create or replace transient table "AIRBYTE_DATABASE".TEST_NORMALIZATION."EXCHANGE_RATE" as + create or replace table "AIRBYTE_DATABASE".TEST_NORMALIZATION."EXCHANGE_RATE" as (select * from( with __dbt__cte__EXCHANGE_RATE_AB1 as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql index 83d0d7cc8fd82..8280ecbf2960e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql @@ -1,6 +1,6 @@ - create or replace transient table "AIRBYTE_DATABASE".TEST_NORMALIZATION."EXCHANGE_RATE" as + create or replace table "AIRBYTE_DATABASE".TEST_NORMALIZATION."EXCHANGE_RATE" as (select * from( with __dbt__cte__EXCHANGE_RATE_AB1 as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py index eaae4a18eda09..287789e378104 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py @@ -134,6 +134,8 @@ def setup_test_dir(integration_type: str) -> str: copy_tree("../dbt-project-template-mysql", test_root_dir) elif integration_type == DestinationType.ORACLE.value: copy_tree("../dbt-project-template-oracle", test_root_dir) + elif integration_type == DestinationType.SNOWFLAKE.value: + copy_tree("../dbt-project-template-snowflake", test_root_dir) return test_root_dir diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py index b411ae74d278f..0af945ede6305 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py @@ -192,6 +192,9 @@ def setup_test_dir(destination_type: DestinationType, test_resource_name: str) - elif destination_type.value == DestinationType.CLICKHOUSE.value: copy_tree("../dbt-project-template-clickhouse", test_root_dir) dbt_project_yaml = "../dbt-project-template-clickhouse/dbt_project.yml" + elif destination_type.value == DestinationType.SNOWFLAKE.value: + copy_tree("../dbt-project-template-snowflake", test_root_dir) + dbt_project_yaml = "../dbt-project-template-snowflake/dbt_project.yml" dbt_test_utils.copy_replace(dbt_project_yaml, os.path.join(test_root_dir, "dbt_project.yml")) return test_root_dir diff --git a/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile b/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile new file mode 100644 index 0000000000000..aa7349270dd38 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile @@ -0,0 +1,33 @@ +FROM fishtownanalytics/dbt:0.21.1 +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +# Install SSH Tunneling dependencies +RUN apt-get update && apt-get install -y jq sshpass + +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ +COPY dbt-project-template-snowflake/* ./dbt-template/ + +# Install python dependencies +WORKDIR /airbyte/base_python_structs +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . + +WORKDIR /airbyte/normalization_code/dbt-template/ +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.version=0.1.62 +LABEL io.airbyte.name=airbyte/normalization-snowflake diff --git a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java index 69794dc352d5a..7d84d1b4ac197 100644 --- a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java +++ b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java @@ -79,6 +79,8 @@ public abstract class DestinationAcceptanceTest { + private static final String NORMALIZATION_VERSION = "dev"; + private static final String JOB_ID = "0"; private static final int JOB_ATTEMPT = 0; @@ -462,7 +464,7 @@ public void specNormalizationValueShouldBeCorrect() throws Exception { if (normalizationFromSpec) { boolean normalizationRunnerFactorySupportsDestinationImage; try { - NormalizationRunnerFactory.create(workerConfigs, getImageName(), processFactory); + NormalizationRunnerFactory.create(workerConfigs, getImageName(), processFactory, NORMALIZATION_VERSION); normalizationRunnerFactorySupportsDestinationImage = true; } catch (final IllegalStateException e) { normalizationRunnerFactorySupportsDestinationImage = false; @@ -735,13 +737,14 @@ public void testCustomDbtTransformations() throws Exception { final DbtTransformationRunner runner = new DbtTransformationRunner(workerConfigs, processFactory, NormalizationRunnerFactory.create( workerConfigs, getImageName(), - processFactory)); + processFactory, + NORMALIZATION_VERSION)); runner.start(); final Path transformationRoot = Files.createDirectories(jobRoot.resolve("transform")); final OperatorDbt dbtConfig = new OperatorDbt() .withGitRepoUrl("https://github.com/fishtown-analytics/jaffle_shop.git") .withGitRepoBranch("main") - .withDockerImage(NormalizationRunnerFactory.getNormalizationInfoForConnector(getImageName()).getLeft()); + .withDockerImage(NormalizationRunnerFactory.getNormalizationInfoForConnector(getImageName()).getLeft() + ":" + NORMALIZATION_VERSION); // // jaffle_shop is a fictional ecommerce store maintained by fishtownanalytics/dbt. // @@ -802,7 +805,8 @@ void testCustomDbtTransformationsFailure() throws Exception { final DbtTransformationRunner runner = new DbtTransformationRunner(workerConfigs, processFactory, NormalizationRunnerFactory.create( workerConfigs, getImageName(), - processFactory)); + processFactory, + NORMALIZATION_VERSION)); runner.start(); final Path transformationRoot = Files.createDirectories(jobRoot.resolve("transform")); final OperatorDbt dbtConfig = new OperatorDbt() @@ -993,7 +997,8 @@ private List runSync( final NormalizationRunner runner = NormalizationRunnerFactory.create( workerConfigs, getImageName(), - processFactory); + processFactory, + NORMALIZATION_VERSION); runner.start(); final Path normalizationRoot = Files.createDirectories(jobRoot.resolve("normalize")); if (!runner.normalize(JOB_ID, JOB_ATTEMPT, normalizationRoot, destinationConfig.getDestinationConnectionConfiguration(), diff --git a/airbyte-integrations/connectors/destination-snowflake/Dockerfile b/airbyte-integrations/connectors/destination-snowflake/Dockerfile index 05507749bb026..18e82989befd7 100644 --- a/airbyte-integrations/connectors/destination-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/destination-snowflake/Dockerfile @@ -18,5 +18,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.3.24 +LABEL io.airbyte.version=0.4.0 LABEL io.airbyte.name=airbyte/destination-snowflake diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java index c8d08cc9fea01..6cf51a5102ffa 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java @@ -4,6 +4,9 @@ package io.airbyte.integrations.destination.snowflake; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.base.Preconditions; @@ -22,6 +25,7 @@ import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.io.IOException; import java.nio.file.Path; +import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.Collections; @@ -117,10 +121,17 @@ protected List resolveIdentifier(final String identifier) { return result; } - private List retrieveRecordsFromTable(final String tableName, final String schema) throws SQLException, InterruptedException { + private List retrieveRecordsFromTable(final String tableName, final String schema) throws SQLException { return SnowflakeDatabase.getDatabase(getConfig()).bufferedResultSetQuery( - connection -> connection.createStatement() - .executeQuery(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schema, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)), + connection -> { + final ResultSet tableInfo = connection.createStatement() + .executeQuery(String.format("SHOW TABLES LIKE '%s' IN SCHEMA %s;", tableName, schema)); + assertTrue(tableInfo.next()); + // check that we're creating permanent tables. DBT defaults to transient tables, which have `TRANSIENT` as the value for the `kind` column. + assertEquals("TABLE", tableInfo.getString("kind")); + return connection.createStatement() + .executeQuery(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schema, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)); + }, JdbcUtils.getDefaultSourceOperations()::rowToJson); } @@ -163,7 +174,7 @@ public void testSyncWithBillionRecords(final String messagesFilename, final Stri runSyncAndVerifyStateOutput(config, largeNumberRecords, configuredCatalog, false); } - private T parseConfig(final String path, Class clazz) throws IOException { + private T parseConfig(final String path, final Class clazz) throws IOException { return Jsons.deserialize(MoreResources.readResource(path), clazz); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java index 12368661ec5ca..2fee47175bd0e 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java @@ -33,16 +33,21 @@ public class NormalizationRunnerFactory { .put("airbyte/destination-postgres", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.POSTGRES)) .put("airbyte/destination-postgres-strict-encrypt", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.POSTGRES)) .put("airbyte/destination-redshift", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.REDSHIFT)) - .put("airbyte/destination-snowflake", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.SNOWFLAKE)) + .put("airbyte/destination-snowflake", ImmutablePair.of("airbyte/normalization-snowflake", DestinationType.SNOWFLAKE)) + .put("airbyte/destination-clickhouse", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) + .put("airbyte/destination-clickhouse-strict-encrypt", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) .build(); - public static NormalizationRunner create(final WorkerConfigs workerConfigs, final String connectorImageName, final ProcessFactory processFactory) { + public static NormalizationRunner create(final WorkerConfigs workerConfigs, + final String connectorImageName, + final ProcessFactory processFactory, + final String normalizationVersion) { final var valuePair = getNormalizationInfoForConnector(connectorImageName); return new DefaultNormalizationRunner( workerConfigs, valuePair.getRight(), processFactory, - String.format("%s:%s", valuePair.getLeft(), NORMALIZATION_VERSION)); + String.format("%s:%s", valuePair.getLeft(), normalizationVersion)); } public static ImmutablePair getNormalizationInfoForConnector(final String connectorImageName) { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java index 761e5e608eb2c..44716c1c566f9 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java @@ -83,7 +83,7 @@ public Void run(final JobRunConfig jobRunConfig, return fullInput; }; - CheckedSupplier, Exception> workerFactory; + final CheckedSupplier, Exception> workerFactory; if (containerOrchestratorEnabled) { workerFactory = getContainerLauncherWorkerFactory(workerConfigs, destinationLauncherConfig, jobRunConfig); @@ -111,9 +111,10 @@ private CheckedSupplier, Exception> getLegacyWork new DbtTransformationRunner( workerConfigs, jobProcessFactory, NormalizationRunnerFactory.create( - workerConfigs, - destinationLauncherConfig.getDockerImage(), - jobProcessFactory))); + workerConfigs, + destinationLauncherConfig.getDockerImage(), + jobProcessFactory, + NormalizationRunnerFactory.NORMALIZATION_VERSION))); } private CheckedSupplier, Exception> getContainerLauncherWorkerFactory( diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java index 3f421bdeaae3e..ee01e9075e676 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java @@ -80,7 +80,7 @@ public Void normalize(final JobRunConfig jobRunConfig, return fullInput; }; - CheckedSupplier, Exception> workerFactory; + final CheckedSupplier, Exception> workerFactory; if (containerOrchestratorEnabled) { workerFactory = getContainerLauncherWorkerFactory(workerConfigs, destinationLauncherConfig, jobRunConfig); @@ -108,7 +108,8 @@ private CheckedSupplier, Exception> getLegacyWo NormalizationRunnerFactory.create( workerConfigs, destinationLauncherConfig.getDockerImage(), - jobProcessFactory), + jobProcessFactory, + NormalizationRunnerFactory.NORMALIZATION_VERSION), workerEnvironment); } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/normalization/NormalizationRunnerFactoryTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/normalization/NormalizationRunnerFactoryTest.java index e03b2e5cd6bd2..ca7914e177fe6 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/normalization/NormalizationRunnerFactoryTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/normalization/NormalizationRunnerFactoryTest.java @@ -19,6 +19,7 @@ class NormalizationRunnerFactoryTest { + public static final String NORMALIZATION_VERSION = "dev"; private ProcessFactory processFactory; @BeforeEach @@ -32,10 +33,11 @@ void testMappings() { assertEquals(entry.getValue().getValue(), ((DefaultNormalizationRunner) NormalizationRunnerFactory.create( new WorkerConfigs(new EnvConfigs()), - String.format("%s:0.1.0", entry.getKey()), processFactory)).getDestinationType()); + String.format("%s:0.1.0", entry.getKey()), processFactory, NORMALIZATION_VERSION)).getDestinationType()); } assertThrows(IllegalStateException.class, - () -> NormalizationRunnerFactory.create(new WorkerConfigs(new EnvConfigs()), "airbyte/destination-csv:0.1.0", processFactory)); + () -> NormalizationRunnerFactory.create(new WorkerConfigs(new EnvConfigs()), "airbyte/destination-csv:0.1.0", processFactory, + NORMALIZATION_VERSION)); } } diff --git a/build.gradle b/build.gradle index 91393c72c57f0..ad8535bbc29f6 100644 --- a/build.gradle +++ b/build.gradle @@ -83,6 +83,7 @@ def createSpotlessTarget = { pattern -> 'dbt-project-template-mysql', 'dbt-project-template-oracle', 'dbt-project-template-clickhouse', + 'dbt-project-template-snowflake', 'dbt_test_config', 'normalization_test_output', 'tools', diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index e93d8b63c5b9b..d2435ccc8e5e9 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -14,6 +14,8 @@ Each stream will be output into its own table in Snowflake. Each table will cont * `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. The column type in Snowflake is `TIMESTAMP WITH TIME ZONE`. * `_airbyte_data`: a json blob representing with the event data. The column type in Snowflake is `VARIANT`. +Note that Airbyte will create **permanent** tables. If you prefer to create transient tables (see [Snowflake docs](https://docs.snowflake.com/en/user-guide/tables-temp-transient.html) for a comparison), you will want to create a dedicated transient database for Airbyte (`CREATE TRANSIENT DATABASE airbyte_database`). + #### Features | Feature | Supported?\(Yes/No\) | Notes | @@ -196,7 +198,8 @@ Finally, you need to add read/write permissions to your bucket with that email. | Version | Date | Pull Request | Subject | |:--------| :-------- | :----- | :------ | -| 0.3.24 | 2021-12-23 | [#8869](https://github.com/airbytehq/airbyte/pull/8869) | Changed staging approach to Byte-Buffered | | +| 0.4.0 | 2021-12-27 | [#9063](https://github.com/airbytehq/airbyte/pull/9063) | Updated normalization to produce permanent tables | +| 0.3.24 | 2021-12-23 | [#8869](https://github.com/airbytehq/airbyte/pull/8869) | Changed staging approach to Byte-Buffered | | 0.3.23 | 2021-12-22 | [#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration in UI for S3 loading method | | 0.3.22 | 2021-12-21 | [#9006](https://github.com/airbytehq/airbyte/pull/9006) | Updated jdbc schema naming to follow Snowflake Naming Conventions | | 0.3.21 | 2021-12-15 | [#8781](https://github.com/airbytehq/airbyte/pull/8781) | Updated check method to verify permissions to create/drop stage for internal staging; compatibility fix for Java 17 | diff --git a/docs/understanding-airbyte/basic-normalization.md b/docs/understanding-airbyte/basic-normalization.md index a40dab26f56b1..9122b06ea525a 100644 --- a/docs/understanding-airbyte/basic-normalization.md +++ b/docs/understanding-airbyte/basic-normalization.md @@ -50,7 +50,7 @@ CREATE TABLE "cars" ( ## Normalization metadata columns -You'll notice that some metadata are added to keep track of important information about each record. +You'll notice that some metadata are added to keep track of important information about each record. - Some are introduced at the destination connector level: These are propagated by the normalization process from the raw table to the final table - `_airbyte_ab_id`: uuid value assigned by connectors to each row of the data written in the destination. - `_airbyte_emitted_at`: time at which the record was emitted and recorded by destination connector. @@ -329,7 +329,7 @@ Normalization produces tables that are partitioned, clustered, sorted or indexed In general, normalization needs to do lookup on the last emitted_at column to know if a record is freshly produced and need to be incrementally processed or not. But in certain models, such as SCD tables for example, we also need to retrieve older data to update their type 2 SCD end_date and active_row flags, thus a different partitioning scheme is used to optimize that use case. - + On Postgres destination, an additional table suffixed with `_stg` for every stream replicated in [incremental deduped history](connections/incremental-deduped-history.md) needs to be persisted (in a different staging schema) for incremental transformations to work because of a [limitation](https://github.com/dbt-labs/docs.getdbt.com/issues/335#issuecomment-694199569). ## Extending Basic Normalization @@ -350,7 +350,8 @@ Therefore, in order to "upgrade" to the desired normalization version, you need | Airbyte Version | Normalization Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | :--- | -| 0.32.11-alpha | 0.1.61 | 2021-12-02 | [\#8394](https://github.com/airbytehq/airbyte/pull/8394) | Fix incremental queries not updating empty tables | +| 0.32.11-alpha | 0.1.62 | 2021-12-23 | [\#9063](https://github.com/airbytehq/airbyte/pull/9063) | Add Snowflake-specific normalization | +| | 0.1.61 | 2021-12-02 | [\#8394](https://github.com/airbytehq/airbyte/pull/8394) | Fix incremental queries not updating empty tables | | | 0.1.61 | 2021-12-01 | [\#8378](https://github.com/airbytehq/airbyte/pull/8378) | Fix un-nesting queries and add proper ref hints | | 0.32.5-alpha | 0.1.60 | 2021-11-22 | [\#8088](https://github.com/airbytehq/airbyte/pull/8088) | Speed-up incremental queries for SCD table on Snowflake | | 0.30.32-alpha | 0.1.59 | 2021-11-08 | [\#7669](https://github.com/airbytehq/airbyte/pull/7169) | Fix nested incremental dbt | From c7021e6f302c237d47c60fee76a1f63d743b8447 Mon Sep 17 00:00:00 2001 From: Serhii Chvaliuk Date: Thu, 6 Jan 2022 20:49:55 +0200 Subject: [PATCH 054/215] =?UTF-8?q?=F0=9F=90=9B=20Source=20S3:=20work-arou?= =?UTF-8?q?nd=20for=20format.delimiter=20change=20'\\t'=20->=20'\t'=20(#91?= =?UTF-8?q?63)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * work-around for format.delimiter '\\t' -> '\t' Signed-off-by: Sergey Chvalyuk --- .../69589781-7828-43c5-9f63-8925b1c1ccc2.json | 2 +- .../main/resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 5 +++-- .../connectors/source-s3/Dockerfile | 2 +- .../source-s3/integration_tests/spec.json | 2 +- .../connectors/source-s3/source_s3/source.py | 8 +++++++- .../source_files_abstract/formats/csv_spec.py | 2 +- .../source-s3/unit_tests/test_source.py | 16 ++++++++++++++++ docs/integrations/sources/s3.md | 1 + 9 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 airbyte-integrations/connectors/source-s3/unit_tests/test_source.py diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/69589781-7828-43c5-9f63-8925b1c1ccc2.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/69589781-7828-43c5-9f63-8925b1c1ccc2.json index eee0048204e04..13c14ceb20d21 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/69589781-7828-43c5-9f63-8925b1c1ccc2.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/69589781-7828-43c5-9f63-8925b1c1ccc2.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "69589781-7828-43c5-9f63-8925b1c1ccc2", "name": "S3", "dockerRepository": "airbyte/source-s3", - "dockerImageTag": "0.1.7", + "dockerImageTag": "0.1.9", "documentationUrl": "https://docs.airbyte.io/integrations/sources/s3", "icon": "s3.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index dadadf52fc82d..391a3d4d4a413 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -593,7 +593,7 @@ - name: S3 sourceDefinitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2 dockerRepository: airbyte/source-s3 - dockerImageTag: 0.1.8 + dockerImageTag: 0.1.9 documentationUrl: https://docs.airbyte.io/integrations/sources/s3 icon: s3.svg sourceType: file diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 44629eb404bec..1557ceac0235e 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6047,7 +6047,7 @@ path_in_connector_config: - "credentials" - "client_secret" -- dockerImage: "airbyte/source-s3:0.1.8" +- dockerImage: "airbyte/source-s3:0.1.9" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/s3" changelogUrl: "https://docs.airbyte.io/integrations/sources/s3" @@ -6103,7 +6103,8 @@ delimiter: title: "Delimiter" description: "The character delimiting individual cells in the CSV\ - \ data. This may only be a 1-character string." + \ data. This may only be a 1-character string. For tab-delimited\ + \ data enter '\\t'." default: "," minLength: 1 type: "string" diff --git a/airbyte-integrations/connectors/source-s3/Dockerfile b/airbyte-integrations/connectors/source-s3/Dockerfile index 5d0fb5b89aac3..c61cfe03a17cc 100644 --- a/airbyte-integrations/connectors/source-s3/Dockerfile +++ b/airbyte-integrations/connectors/source-s3/Dockerfile @@ -17,5 +17,5 @@ COPY source_s3 ./source_s3 ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.8 +LABEL io.airbyte.version=0.1.9 LABEL io.airbyte.name=airbyte/source-s3 diff --git a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json index 9dfac7d239095..07195b750e766 100644 --- a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json @@ -46,7 +46,7 @@ }, "delimiter": { "title": "Delimiter", - "description": "The character delimiting individual cells in the CSV data. This may only be a 1-character string.", + "description": "The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\\t'.", "default": ",", "minLength": 1, "type": "string" diff --git a/airbyte-integrations/connectors/source-s3/source_s3/source.py b/airbyte-integrations/connectors/source-s3/source_s3/source.py index 382f5ee756f8d..71d0a19031adb 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/source.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/source.py @@ -3,7 +3,7 @@ # -from typing import Optional +from typing import Any, Mapping, Optional from pydantic import BaseModel, Field @@ -47,3 +47,9 @@ class SourceS3(SourceFilesAbstract): stream_class = IncrementalFileStreamS3 spec_class = SourceS3Spec documentation_url = "https://docs.airbyte.io/integrations/sources/s3" + + def read_config(self, config_path: str) -> Mapping[str, Any]: + config = super().read_config(config_path) + if config.get("format", {}).get("delimiter") == r"\t": + config["format"]["delimiter"] = "\t" + return config diff --git a/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/csv_spec.py b/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/csv_spec.py index bce9f15db010e..0fe3faa3e06a4 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/csv_spec.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/csv_spec.py @@ -21,7 +21,7 @@ class Config: delimiter: str = Field( default=",", min_length=1, - description="The character delimiting individual cells in the CSV data. This may only be a 1-character string.", + description="The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\\t'.", ) quote_char: str = Field( default='"', description="The character used optionally for quoting CSV values. To disallow quoting, make this field blank." diff --git a/airbyte-integrations/connectors/source-s3/unit_tests/test_source.py b/airbyte-integrations/connectors/source-s3/unit_tests/test_source.py new file mode 100644 index 0000000000000..49c788c5334eb --- /dev/null +++ b/airbyte-integrations/connectors/source-s3/unit_tests/test_source.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import json + +from source_s3 import SourceS3 + + +def test_transform_backslash_t_to_tab(tmp_path): + config_file = tmp_path / "config.json" + with open(config_file, "w") as fp: + json.dump({"format": {"delimiter": "\\t"}}, fp) + source = SourceS3() + config = source.read_config(config_file) + assert config["format"]["delimiter"] == "\t" diff --git a/docs/integrations/sources/s3.md b/docs/integrations/sources/s3.md index 0eaf1e18b2646..1d245db3b2a8c 100644 --- a/docs/integrations/sources/s3.md +++ b/docs/integrations/sources/s3.md @@ -206,6 +206,7 @@ You can find details on [here](https://arrow.apache.org/docs/python/generated/py | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.9 | 2022-01-06 | [9163](https://github.com/airbytehq/airbyte/pull/9163) | Work-around for web-UI, `backslash - t` converts to `tab` for `format.delimiter` field. | | 0.1.7 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | | 0.1.6 | 2021-10-15 | [6615](https://github.com/airbytehq/airbyte/pull/6615) & [7058](https://github.com/airbytehq/airbyte/pull/7058) | Memory and performance optimisation. Advanced options for CSV parsing. | | 0.1.5 | 2021-09-24 | [6398](https://github.com/airbytehq/airbyte/pull/6398) | Support custom non Amazon S3 services | From ed46b2db787a9edbafb244baf83728eb486c9d74 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 6 Jan 2022 10:59:27 -0800 Subject: [PATCH 055/215] remove health query for migration test (#9338) * remove health query for migration test * fmt --- .../automaticMigrationAcceptance/MigrationAcceptanceTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java b/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java index fa1ba5a85e53f..13b67b4eabc22 100644 --- a/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java +++ b/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java @@ -22,7 +22,6 @@ import io.airbyte.api.client.model.ConnectionRead; import io.airbyte.api.client.model.ConnectionStatus; import io.airbyte.api.client.model.DestinationDefinitionRead; -import io.airbyte.api.client.model.HealthCheckRead; import io.airbyte.api.client.model.ImportRead; import io.airbyte.api.client.model.ImportRead.StatusEnum; import io.airbyte.api.client.model.SourceDefinitionRead; @@ -318,8 +317,7 @@ private static void populateDataForFirstRun() throws ApiException, URISyntaxExce private static void healthCheck(final ApiClient apiClient) { final HealthApi healthApi = new HealthApi(apiClient); try { - final HealthCheckRead healthCheck = healthApi.getHealthCheck(); - assertTrue(healthCheck.getAvailable()); + healthApi.getHealthCheck(); } catch (final ApiException e) { throw new RuntimeException("Health check failed, usually due to auto migration failure. Please check the logs for details."); } From 80695adb0220e756162c5167858fec148b31e65a Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Thu, 6 Jan 2022 11:23:47 -0800 Subject: [PATCH 056/215] Fix build (#9344) --- .../container_orchestrator/DbtJobOrchestrator.java | 3 ++- .../NormalizationJobOrchestrator.java | 3 ++- .../workers/normalization/NormalizationRunnerFactory.java | 2 -- .../temporal/sync/DbtTransformationActivityImpl.java | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java index c87fd499dc631..fc426adb5ed22 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java @@ -59,7 +59,8 @@ public void runJob() throws Exception { processFactory, NormalizationRunnerFactory.create( workerConfigs, destinationLauncherConfig.getDockerImage(), - processFactory)));; + processFactory, + NormalizationRunnerFactory.NORMALIZATION_VERSION))); log.info("Running dbt worker..."); final Path jobRoot = WorkerUtils.getJobRoot(configs.getWorkspaceRoot(), jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java index c8cb34206716d..6d8251e28947f 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java @@ -56,7 +56,8 @@ public void runJob() throws Exception { NormalizationRunnerFactory.create( workerConfigs, destinationLauncherConfig.getDockerImage(), - processFactory), + processFactory, + NormalizationRunnerFactory.NORMALIZATION_VERSION), configs.getWorkerEnvironment()); log.info("Running normalization worker..."); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java index 2fee47175bd0e..0e921d438a0df 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java @@ -34,8 +34,6 @@ public class NormalizationRunnerFactory { .put("airbyte/destination-postgres-strict-encrypt", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.POSTGRES)) .put("airbyte/destination-redshift", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.REDSHIFT)) .put("airbyte/destination-snowflake", ImmutablePair.of("airbyte/normalization-snowflake", DestinationType.SNOWFLAKE)) - .put("airbyte/destination-clickhouse", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) - .put("airbyte/destination-clickhouse-strict-encrypt", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) .build(); public static NormalizationRunner create(final WorkerConfigs workerConfigs, diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java index 44716c1c566f9..404eaef204125 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java @@ -111,10 +111,10 @@ private CheckedSupplier, Exception> getLegacyWork new DbtTransformationRunner( workerConfigs, jobProcessFactory, NormalizationRunnerFactory.create( - workerConfigs, - destinationLauncherConfig.getDockerImage(), - jobProcessFactory, - NormalizationRunnerFactory.NORMALIZATION_VERSION))); + workerConfigs, + destinationLauncherConfig.getDockerImage(), + jobProcessFactory, + NormalizationRunnerFactory.NORMALIZATION_VERSION))); } private CheckedSupplier, Exception> getContainerLauncherWorkerFactory( From e80d6140d2bed2d42a8a65ac0cb173c1efac4cce Mon Sep 17 00:00:00 2001 From: Charles Date: Thu, 6 Jan 2022 14:34:49 -0800 Subject: [PATCH 057/215] =?UTF-8?q?=F0=9F=93=96=20Clarify=20staging=20setu?= =?UTF-8?q?p=20guide=20for=20bq=20&=20gcs=20destination=20(#9255)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * clarify confusing parts of setting up staging for bq destination * Added Storage Admin * update gcs destination docs too * fix indentation * Update required permission list Co-authored-by: Liren Tu --- docs/integrations/destinations/bigquery.md | 19 +++++++++++++------ docs/integrations/destinations/gcs.md | 19 +++++++++++++------ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index eb1b08ce81fe9..f1eadc5d968d2 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -111,15 +111,22 @@ This is the recommended configuration for uploading data to BigQuery. It works b * **GCS Bucket Path** * **Block Size (MB) for GCS multipart upload** * **GCS Bucket Keep files after migration** - * See [this](https://cloud.google.com/storage/docs/creating-buckets) for instructions on how to create a GCS bucket. + * See [this](https://cloud.google.com/storage/docs/creating-buckets) for instructions on how to create a GCS bucket. The bucket cannot have a retention policy. Set Protection Tools to none or Object versioning. * **HMAC Key Access ID** - * See [this](https://cloud.google.com/storage/docs/authentication/hmackeys) on how to generate an access key. - * We recommend creating an Airbyte-specific user or service account. This user or account will require read and write permissions to objects in the bucket. + * See [this](https://cloud.google.com/storage/docs/authentication/managing-hmackeys) on how to generate an access key. For more information on hmac keys please reference the [GCP docs](https://cloud.google.com/storage/docs/authentication/hmackeys) + * We recommend creating an Airbyte-specific user or service account. This user or account will require the following permissions for the bucket: + ``` + storage.multipartUploads.abort + storage.multipartUploads.create + storage.objects.create + storage.objects.delete + storage.objects.get + storage.objects.list + ``` + You can set those by going to the permissions tab in the GCS bucket and adding the appropriate the email address of the service account or user and adding the aforementioned permissions. * **Secret Access Key** * Corresponding key to the above access ID. - * Make sure your GCS bucket is accessible from the machine running Airbyte. -* This depends on your networking setup. -* The easiest way to verify if Airbyte is able to connect to your GCS bucket is via the check connection tool in the UI. +* Make sure your GCS bucket is accessible from the machine running Airbyte. This depends on your networking setup. The easiest way to verify if Airbyte is able to connect to your GCS bucket is via the check connection tool in the UI. ### `Standard` uploads This uploads data directly from your source to BigQuery. While this is faster to setup initially, **we strongly recommend that you do not use this option for anything other than a quick demo**. It is more than 10x slower than the GCS uploading option and will fail for many datasets. Please be aware you may see some failures for big datasets and slow sources, e.g. if reading from source takes more than 10-12 hours. This is caused by the Google BigQuery SDK client limitations. For more details please check [https://github.com/airbytehq/airbyte/issues/3549](https://github.com/airbytehq/airbyte/issues/3549) diff --git a/docs/integrations/destinations/gcs.md b/docs/integrations/destinations/gcs.md index b030fafe64a5e..b0141d0b91566 100644 --- a/docs/integrations/destinations/gcs.md +++ b/docs/integrations/destinations/gcs.md @@ -207,16 +207,23 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A * Fill up GCS info * **GCS Bucket Name** - * See [this](https://cloud.google.com/storage/docs/creating-buckets) to create an S3 bucket. + * See [this](https://cloud.google.com/storage/docs/creating-buckets) for instructions on how to create a GCS bucket. The bucket cannot have a retention policy. Set Protection Tools to none or Object versioning. * **GCS Bucket Region** * **HMAC Key Access ID** - * See [this](https://cloud.google.com/storage/docs/authentication/hmackeys) on how to generate an access key. - * We recommend creating an Airbyte-specific user or service account. This user or account will require read and write permissions to objects in the bucket. + * See [this](https://cloud.google.com/storage/docs/authentication/managing-hmackeys) on how to generate an access key. For more information on hmac keys please reference the [GCP docs](https://cloud.google.com/storage/docs/authentication/hmackeys) + * We recommend creating an Airbyte-specific user or service account. This user or account will require the following permissions for the bucket: + ``` + storage.multipartUploads.abort + storage.multipartUploads.create + storage.objects.create + storage.objects.delete + storage.objects.get + storage.objects.list + ``` + You can set those by going to the permissions tab in the GCS bucket and adding the appropriate the email address of the service account or user and adding the aforementioned permissions. * **Secret Access Key** * Corresponding key to the above access ID. -* Make sure your GCS bucket is accessible from the machine running Airbyte. - * This depends on your networking setup. - * The easiest way to verify if Airbyte is able to connect to your GCS bucket is via the check connection tool in the UI. +* Make sure your GCS bucket is accessible from the machine running Airbyte. This depends on your networking setup. The easiest way to verify if Airbyte is able to connect to your GCS bucket is via the check connection tool in the UI. ## CHANGELOG From 678cfbe2cfaa8ced754512c0949abcc2b12b64f8 Mon Sep 17 00:00:00 2001 From: ajzo90 Date: Thu, 6 Jan 2022 23:46:51 +0100 Subject: [PATCH 058/215] Remove incorrect description for record in protocol.yaml (#9307) Co-authored-by: Christian --- .../src/main/resources/airbyte_protocol/airbyte_protocol.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-protocol/models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml b/airbyte-protocol/models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml index e3146e26b6e83..e702b1c1e5cea 100644 --- a/airbyte-protocol/models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml +++ b/airbyte-protocol/models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml @@ -35,7 +35,7 @@ definitions: connectionStatus: "$ref": "#/definitions/AirbyteConnectionStatus" catalog: - description: "log message: any kind of logging you want the platform to know about." + description: "catalog message: the calalog" "$ref": "#/definitions/AirbyteCatalog" record: description: "record message: the record" From 5b6b48ca105637e6f675b7bd3da29f8f7faaac45 Mon Sep 17 00:00:00 2001 From: Chris Wu Date: Thu, 6 Jan 2022 14:50:15 -0800 Subject: [PATCH 059/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20GitHub:=20Use?= =?UTF-8?q?=20CDK=20caching=20and=20convert=20PR-related=20streams=20to=20?= =?UTF-8?q?incremental=20(#7250)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Source GitHub: Use CDK caching and convert PR-related streams to incremental * Remove extra change * Consolidate * Address comments * Fix integration test config * Fix merge * Update sample state * Bump release version * Bump version * Address feedback * Bump version * Fix formatting --- .../resources/seed/source_definitions.yaml | 2 +- .../connectors/source-github/Dockerfile | 2 +- .../source-github/acceptance-test-config.yml | 2 + .../integration_tests/abnormal_state.json | 10 +++ .../integration_tests/configured_catalog.json | 18 +++-- .../integration_tests/sample_state.json | 10 +++ .../schemas/pull_request_stats.json | 4 + .../source-github/source_github/source.py | 4 +- .../source-github/source_github/streams.py | 80 +++++++++---------- docs/integrations/sources/github.md | 1 + 10 files changed, 82 insertions(+), 51 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 391a3d4d4a413..806adcc1b5f98 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -217,7 +217,7 @@ - name: GitHub sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e dockerRepository: airbyte/source-github - dockerImageTag: 0.2.9 + dockerImageTag: 0.2.10 documentationUrl: https://docs.airbyte.io/integrations/sources/github icon: github.svg sourceType: api diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index d9274c32c19af..3a7e115d64896 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.9 +LABEL io.airbyte.version=0.2.10 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/acceptance-test-config.yml b/airbyte-integrations/connectors/source-github/acceptance-test-config.yml index bb0bdb411995a..caf55dcbb65f3 100644 --- a/airbyte-integrations/connectors/source-github/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-github/acceptance-test-config.yml @@ -25,9 +25,11 @@ tests: issue_milestones: ["airbytehq/integration-test", "updated_at"] issues: ["airbytehq/integration-test", "updated_at"] projects: ["airbytehq/integration-test", "updated_at"] + pull_request_stats: ["airbytehq/integration-test", "updated_at"] pull_requests: ["airbytehq/integration-test", "updated_at"] releases: ["airbytehq/integration-test", "created_at"] review_comments: ["airbytehq/integration-test", "updated_at"] + reviews: ["airbytehq/integration-test", "submitted_at"] stargazers: ["airbytehq/integration-test", "starred_at"] full_refresh: - config_path: "secrets/config.json" diff --git a/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json index dfae03d57420a..48d22b3e7e831 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json @@ -39,6 +39,11 @@ "updated_at": "2121-06-28T17:24:51Z" } }, + "pull_request_stats": { + "airbytehq/integration-test": { + "updated_at": "2121-06-29T02:04:57Z" + } + }, "pull_requests": { "airbytehq/integration-test": { "updated_at": "2121-06-28T23:36:35Z" @@ -54,6 +59,11 @@ "updated_at": "2121-06-23T23:57:07Z" } }, + "reviews": { + "airbytehq/integration-test": { + "submitted_at": "2121-06-29T02:04:57Z" + } + }, "stargazers": { "airbytehq/integration-test": { "starred_at": "2121-06-29T02:04:57Z" diff --git a/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json index 61065af874119..dfe46cf083369 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json @@ -198,11 +198,14 @@ "stream": { "name": "pull_request_stats", "json_schema": {}, - "supported_sync_modes": ["full_refresh"], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updated_at"], "source_defined_primary_key": [["id"]] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "destination_sync_mode": "append", + "cursor_field": ["updated_at"] }, { "stream": { @@ -257,11 +260,14 @@ "stream": { "name": "reviews", "json_schema": {}, - "supported_sync_modes": ["full_refresh"], + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["submitted_at"], "source_defined_primary_key": [["id"]] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "destination_sync_mode": "append", + "cursor_field": ["submitted_at"] }, { "stream": { diff --git a/airbyte-integrations/connectors/source-github/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-github/integration_tests/sample_state.json index 068a99d1f7172..86698788537c7 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/sample_state.json +++ b/airbyte-integrations/connectors/source-github/integration_tests/sample_state.json @@ -29,6 +29,11 @@ "created_at": "2021-06-23T23:57:07Z" } }, + "pull_request_stats": { + "airbytehq/integration-test": { + "updated_at": "2021-08-30T12:01:15Z" + } + }, "pull_requests": { "airbytehq/integration-test": { "updated_at": "2021-06-28T23:36:35Z" @@ -53,5 +58,10 @@ "airbytehq/integration-test": { "created_at": "2021-06-30T10:04:41Z" } + }, + "reviews": { + "airbytehq/integration-test": { + "submitted_at": "2021-08-30T12:01:15Z" + } } } diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/pull_request_stats.json b/airbyte-integrations/connectors/source-github/source_github/schemas/pull_request_stats.json index 3b221876cfae4..90ebf80f14a2a 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/pull_request_stats.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/pull_request_stats.json @@ -49,6 +49,10 @@ }, "changed_files": { "type": ["null", "integer"] + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" } } } diff --git a/airbyte-integrations/connectors/source-github/source_github/source.py b/airbyte-integrations/connectors/source-github/source_github/source.py index cf1b6f4d607dc..56970d252c1ed 100644 --- a/airbyte-integrations/connectors/source-github/source_github/source.py +++ b/airbyte-integrations/connectors/source-github/source_github/source.py @@ -179,12 +179,12 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: Organizations(**organization_args), Projects(**repository_args_with_start_date), PullRequestCommentReactions(**repository_args_with_start_date), - PullRequestStats(parent=pull_requests_stream, **repository_args), + PullRequestStats(parent=pull_requests_stream, **repository_args_with_start_date), PullRequests(**repository_args_with_start_date), Releases(**repository_args_with_start_date), Repositories(**organization_args), ReviewComments(**repository_args_with_start_date), - Reviews(parent=pull_requests_stream, **repository_args), + Reviews(parent=pull_requests_stream, **repository_args_with_start_date), Stargazers(**repository_args_with_start_date), Tags(**repository_args), Teams(**organization_args), diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 8da8e84b11c86..62c65a6956888 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -2,7 +2,6 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # -import os import time from abc import ABC, abstractmethod from copy import deepcopy @@ -10,43 +9,16 @@ from urllib import parse import requests -import vcr from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream from requests.exceptions import HTTPError -from vcr.cassette import Cassette - - -def request_cache() -> Cassette: - """ - Builds VCR instance. - It deletes file everytime we create it, normally should be called only once. - We can't use NamedTemporaryFile here because yaml serializer doesn't work well with empty files. - """ - filename = "request_cache.yml" - try: - os.remove(filename) - except FileNotFoundError: - pass - - return vcr.use_cassette(str(filename), record_mode="new_episodes", serializer="yaml") class GithubStream(HttpStream, ABC): - cache = request_cache() url_base = "https://api.github.com/" - # To prevent dangerous behavior, the `vcr` library prohibits the use of nested caching. - # Here's an example of dangerous behavior: - # cache = Cassette.use('whatever') - # with cache: - # with cache: - # pass - # - # Therefore, we will only use `cache` for the top-level stream, so as not to cause possible difficulties. - top_level_stream = True - primary_key = "id" + use_cache = True # GitHub pagination could be from 1 to 100. page_size = 100 @@ -100,11 +72,7 @@ def backoff_time(self, response: requests.Response) -> Union[int, float]: def read_records(self, stream_slice: Mapping[str, any] = None, **kwargs) -> Iterable[Mapping[str, Any]]: try: - if self.top_level_stream: - with self.cache: - yield from super().read_records(stream_slice=stream_slice, **kwargs) - else: - yield from super().read_records(stream_slice=stream_slice, **kwargs) + yield from super().read_records(stream_slice=stream_slice, **kwargs) except HTTPError as e: error_msg = str(e) @@ -422,6 +390,7 @@ class PullRequests(SemiIncrementalGithubStream): """ page_size = 50 + first_read_override_key = "first_read_override" def __init__(self, **kwargs): super().__init__(**kwargs) @@ -431,7 +400,7 @@ def read_records(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iter """ Decide if this a first read or not by the presence of the state object """ - self._first_read = not bool(stream_state) + self._first_read = not bool(stream_state) or stream_state.get(self.first_read_override_key, False) yield from super().read_records(stream_state=stream_state, **kwargs) def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: @@ -459,7 +428,7 @@ def is_sorted_descending(self) -> bool: """ Depending if there any state we read stream in ascending or descending order. """ - return self._first_read + return not self._first_read class CommitComments(SemiIncrementalGithubStream): @@ -686,8 +655,8 @@ def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: # Pull request substreams -class PullRequestSubstream(HttpSubStream, GithubStream, ABC): - top_level_stream = False +class PullRequestSubstream(HttpSubStream, SemiIncrementalGithubStream, ABC): + use_cache = False def __init__(self, parent: PullRequests, **kwargs): super().__init__(parent=parent, **kwargs) @@ -695,14 +664,33 @@ def __init__(self, parent: PullRequests, **kwargs): def stream_slices( self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None ) -> Iterable[Optional[Mapping[str, Any]]]: - parent_stream_slices = super().stream_slices(sync_mode=sync_mode, cursor_field=cursor_field, stream_state=stream_state) - + """ + Override the parent PullRequests stream configuration to always fetch records in ascending order + """ + parent_state = deepcopy(stream_state) or {} + parent_state[PullRequests.first_read_override_key] = True + parent_stream_slices = super().stream_slices(sync_mode=sync_mode, cursor_field=cursor_field, stream_state=parent_state) for parent_stream_slice in parent_stream_slices: yield { "pull_request_number": parent_stream_slice["parent"]["number"], "repository": parent_stream_slice["parent"]["repository"], } + def read_records( + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, + ) -> Iterable[Mapping[str, Any]]: + """ + We've already determined the list of pull requests to run the stream against. + Skip the start_point_map and cursor_field logic in SemiIncrementalGithubStream.read_records. + """ + yield from super(SemiIncrementalGithubStream, self).read_records( + sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state + ) + class PullRequestStats(PullRequestSubstream): """ @@ -731,11 +719,21 @@ class Reviews(PullRequestSubstream): API docs: https://docs.github.com/en/rest/reference/pulls#list-reviews-for-a-pull-request """ + cursor_field = "submitted_at" + def path( self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None ) -> str: return f"repos/{stream_slice['repository']}/pulls/{stream_slice['pull_request_number']}/reviews" + # Set the parent stream state's cursor field before fetching its records + def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: + parent_state = deepcopy(stream_state) or {} + for repository in self.repositories: + if repository in parent_state and self.cursor_field in parent_state[repository]: + parent_state[repository][self.parent.cursor_field] = parent_state[repository][self.cursor_field] + yield from super().stream_slices(stream_state=parent_state, **kwargs) + # Reactions streams @@ -743,7 +741,7 @@ def path( class ReactionStream(GithubStream, ABC): parent_key = "id" - top_level_stream = False + use_cache = False def __init__(self, **kwargs): self._stream_kwargs = deepcopy(kwargs) diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 57bf544788f0e..7e6b0972a08eb 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -92,6 +92,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental | | 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses | | 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | | 0.2.7 | 2021-12-06 | [8518](https://github.com/airbytehq/airbyte/pull/8518) | Add connection retry with Github | From 3281424365c8b77bb709174dcadbab864ad0152e Mon Sep 17 00:00:00 2001 From: Marcos Marx Date: Thu, 6 Jan 2022 19:50:31 -0300 Subject: [PATCH 060/215] Source Cloud: correct typo in spec.json (#9299) * bump version * bump source spec --- .../6ff047c0-f5d5-4ce5-8c81-204a830fa7e1.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- .../init/src/main/resources/seed/source_specs.yaml | 4 ++-- .../connectors/source-aws-cloudtrail/CHANGELOG.md | 7 ------- .../connectors/source-aws-cloudtrail/Dockerfile | 2 +- .../source-aws-cloudtrail/source_aws_cloudtrail/spec.json | 2 +- 6 files changed, 6 insertions(+), 13 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-aws-cloudtrail/CHANGELOG.md diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/6ff047c0-f5d5-4ce5-8c81-204a830fa7e1.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/6ff047c0-f5d5-4ce5-8c81-204a830fa7e1.json index 5b0233a9657f4..9c859f851b97e 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/6ff047c0-f5d5-4ce5-8c81-204a830fa7e1.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/6ff047c0-f5d5-4ce5-8c81-204a830fa7e1.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "6ff047c0-f5d5-4ce5-8c81-204a830fa7e1", "name": "AWS CloudTrail", "dockerRepository": "airbyte/source-aws-cloudtrail", - "dockerImageTag": "0.1.2", + "dockerImageTag": "0.1.4", "documentationUrl": "https://docs.airbyte.io/integrations/sources/aws-cloudtrail", "icon": "awscloudtrail.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 806adcc1b5f98..6248ea2f25633 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -8,7 +8,7 @@ - name: AWS CloudTrail sourceDefinitionId: 6ff047c0-f5d5-4ce5-8c81-204a830fa7e1 dockerRepository: airbyte/source-aws-cloudtrail - dockerImageTag: 0.1.3 + dockerImageTag: 0.1.4 documentationUrl: https://docs.airbyte.io/integrations/sources/aws-cloudtrail icon: awscloudtrail.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 1557ceac0235e..718f72a3fec34 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -43,7 +43,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-aws-cloudtrail:0.1.3" +- dockerImage: "airbyte/source-aws-cloudtrail:0.1.4" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/aws-cloudtrail" connectionSpecification: @@ -78,7 +78,7 @@ start_date: type: "string" title: "Start Date" - description: "The date you would like to replicate data. Data in ClouTraid\ + description: "The date you would like to replicate data. Data in AWS CloudTrail\ \ is available for last 90 days only. Format: YYYY-MM-DD." examples: - "2021-01-01" diff --git a/airbyte-integrations/connectors/source-aws-cloudtrail/CHANGELOG.md b/airbyte-integrations/connectors/source-aws-cloudtrail/CHANGELOG.md deleted file mode 100644 index f64c33f7bfe4f..0000000000000 --- a/airbyte-integrations/connectors/source-aws-cloudtrail/CHANGELOG.md +++ /dev/null @@ -1,7 +0,0 @@ -# Changelog - -## 0.1.0 -Initial Release. - -Added Management Events incremental stream: https://docs.aws.amazon.com/awscloudtrail/latest/APIReference/API_LookupEvents.html - diff --git a/airbyte-integrations/connectors/source-aws-cloudtrail/Dockerfile b/airbyte-integrations/connectors/source-aws-cloudtrail/Dockerfile index 4bbab95f606ab..5cf2683ccefc2 100644 --- a/airbyte-integrations/connectors/source-aws-cloudtrail/Dockerfile +++ b/airbyte-integrations/connectors/source-aws-cloudtrail/Dockerfile @@ -13,5 +13,5 @@ RUN pip install . ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-aws-cloudtrail diff --git a/airbyte-integrations/connectors/source-aws-cloudtrail/source_aws_cloudtrail/spec.json b/airbyte-integrations/connectors/source-aws-cloudtrail/source_aws_cloudtrail/spec.json index 67ae215aab655..8489e44f1fe55 100644 --- a/airbyte-integrations/connectors/source-aws-cloudtrail/source_aws_cloudtrail/spec.json +++ b/airbyte-integrations/connectors/source-aws-cloudtrail/source_aws_cloudtrail/spec.json @@ -32,7 +32,7 @@ "start_date": { "type": "string", "title": "Start Date", - "description": "The date you would like to replicate data. Data in ClouTraid is available for last 90 days only. Format: YYYY-MM-DD.", + "description": "The date you would like to replicate data. Data in AWS CloudTrail is available for last 90 days only. Format: YYYY-MM-DD.", "examples": ["2021-01-01"], "default": "1970-01-01", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" From 4190dbcba243bfda6903b40e94f8cf046b46b0f7 Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Thu, 6 Jan 2022 15:10:51 -0800 Subject: [PATCH 061/215] =?UTF-8?q?=F0=9F=93=96=20Add=20service=20account?= =?UTF-8?q?=20doc=20to=20gcs=20connector=20readme=20(#9347)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update gcs readme about service accounts * Update doc --- .../connectors/destination-gcs/README.md | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/airbyte-integrations/connectors/destination-gcs/README.md b/airbyte-integrations/connectors/destination-gcs/README.md index 6ad38446997d7..fbd7eaec1ccd4 100644 --- a/airbyte-integrations/connectors/destination-gcs/README.md +++ b/airbyte-integrations/connectors/destination-gcs/README.md @@ -17,6 +17,35 @@ As a community contributor, you can follow these steps to run integration tests. - Access the `destination gcs creds` secrets on Last Pass, and put it in `sample_secrets/config.json`. - Rename the directory from `sample_secrets` to `secrets`. +### GCP Service Account for Testing +Two service accounts have been created in our GCP for testing this destination. Both of them have access to Cloud Storage through HMAC keys. The keys are persisted together with the connector integration test credentials in LastPass. + +- Account: `gcs-destination-connector-test@dataline-integration-testing.iam.gserviceaccount.com` + - This account has the required permission to pass the integration test. Note that the uploader needs `storage.multipartUploads` permissions, which may not be intuitive. + - Role: `GCS Destination User` + - Permissions: + ``` + storage.multipartUploads.abort + storage.multipartUploads.create + storage.objects.create + storage.objects.delete + storage.objects.get + storage.objects.list + ``` + - LastPass entry: `destination gcs creds` + +- Account: `gcs-destination-failure-test@dataline-integration-testing.iam.gserviceaccount.com` + - This account does not have the `storage.multipartUploads` permissions, and will fail the integration test. The purpose of this account is to test that the `check` command can correctly detect the lack of these permissions and return an error message. + - Role: `GCS Destination User Without Multipart Permission` + - Permissions: + ``` + storage.objects.create + storage.objects.delete + storage.objects.get + storage.objects.list + ``` + - LastPass entry: `destination gcs creds (no multipart permission)` + ## Add New Output Format - Add a new enum in `S3Format`. - Modify `spec.json` to specify the configuration of this new format. From 511819b5aebbfabc54e640002ffea057f5a78463 Mon Sep 17 00:00:00 2001 From: Marcos Marx Date: Thu, 6 Jan 2022 23:39:41 -0300 Subject: [PATCH 062/215] Normalization fix Prefix Tables starting with number (#9301) * add normalization-clickhouse docker build step * bump normalization version * small changes gradle * fix settings gradle * fix eof file * correct clickhouse normalization * Refactor jinja template for scd (#9278) * merge chris code and regenerate sql files * correct scd post-hook generation for snowflake * fix scd table for snowflake prefix table with number * scd fix for all destinations * use quote * use normalize column for post-hook * change logic to apply quote * add logic to handle prefix for mssql and oracle * run tests * correct unit test * bump normalization version Co-authored-by: James Zhao Co-authored-by: Edward Gao Co-authored-by: Christophe Duong --- .../bases/base-normalization/Dockerfile | 2 +- ..._columns_resulting_into_long_names_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../models/generated/sources.yml | 1 + .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_cdc_excluded_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../models/generated/sources.yml | 1 + ...eam_with_co__lting_into_long_names_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../models/generated/sources.yml | 1 + ..._stream_with_co_1g_into_long_names_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../models/generated/sources.yml | 1 + .../dedup_exchange_rate_scd.sql | 2 +- .../models/generated/sources.yml | 1 + ...ream_with_c__lting_into_long_names_scd.sql | 2 +- .../some_stream_that_was_empty_scd.sql | 2 +- .../test_simple_streams/dbt_project.yml | 2 +- .../test_simple_streams/first_dbt_project.yml | 63 --------- .../1_prefix_startwith_number_scd.sql | 85 ++++++++++++ .../1_prefix_startwith_number.sql | 24 ++++ .../1_prefix_startwith_number_stg.sql | 61 +++++++++ .../1_prefix_startwith_number_ab1.sql | 20 +++ .../1_prefix_startwith_number_ab2.sql} | 12 +- .../1_prefix_startwith_number_scd.sql} | 44 +++---- .../dedup_cdc_excluded_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../multiple_column_names_conflicts_scd.sql | 2 +- .../test_normalization/pos_dedup_cdcx_scd.sql | 2 +- .../renamed_dedup_cdc_excluded_scd.sql | 2 +- .../1_prefix_startwith_number.sql} | 15 +-- .../1_prefix_startwith_number_stg.sql} | 15 +-- .../models/generated/sources.yml | 1 + .../dedup_exchange_rate_ab1.sql | 25 ---- .../dedup_exchange_rate_ab2.sql | 25 ---- .../test_normalization/exchange_rate_ab1.sql | 25 ---- .../test_normalization/exchange_rate_ab2.sql | 25 ---- .../test_normalization/exchange_rate_ab3.sql | 25 ---- .../renamed_dedup_cdc_excluded_ab1.sql | 22 ---- .../renamed_dedup_cdc_excluded_scd.sql | 123 ------------------ .../renamed_dedup_cdc_excluded.sql | 25 ---- .../renamed_dedup_cdc_excluded_stg.sql | 22 ---- .../test_normalization/exchange_rate.sql | 26 ---- .../modified_models/generated/sources.yml | 11 -- .../1_prefix_startwith_number_scd.sql | 14 ++ .../1_prefix_startwith_number.sql | 14 ++ .../1_prefix_startwith_number_stg.sql | 14 ++ .../dedup_exchange_rate_scd.sql | 14 -- .../renamed_dedup_cdc_excluded_scd.sql | 14 -- .../dedup_exchange_rate.sql | 14 -- .../dedup_exchange_rate_stg.sql | 14 -- .../renamed_dedup_cdc_excluded.sql | 14 -- .../renamed_dedup_cdc_excluded_stg.sql | 14 -- .../test_normalization/exchange_rate.sql | 113 ---------------- ..._columns_resulting_into_long_names_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../models/generated/sources.yml | 1 + .../dedup_exchange_rate_scd.sql | 2 +- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 2 +- .../DEDUP_EXCHANGE_RATE_SCD.sql | 2 +- .../models/generated/sources.yml | 1 + .../data_input/catalog.json | 27 ++++ .../data_input/messages.txt | 8 ++ .../integration_tests/test_ephemeral.py | 6 +- .../destination_name_transformer.py | 21 ++- .../transform_catalog/stream_processor.py | 8 +- .../test_destination_name_transformer.py | 2 +- .../NormalizationRunnerFactory.java | 2 +- 69 files changed, 354 insertions(+), 708 deletions(-) delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/{modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql => models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql} (54%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/{modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql => models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql} (68%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/{modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql => models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql} (58%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/{modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql => models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql} (58%) delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql diff --git a/airbyte-integrations/bases/base-normalization/Dockerfile b/airbyte-integrations/bases/base-normalization/Dockerfile index be915f01ab0d0..4d2d25c249ad0 100644 --- a/airbyte-integrations/bases/base-normalization/Dockerfile +++ b/airbyte-integrations/bases/base-normalization/Dockerfile @@ -28,5 +28,5 @@ WORKDIR /airbyte ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" ENTRYPOINT ["/airbyte/entrypoint.sh"] -LABEL io.airbyte.version=0.1.62 +LABEL io.airbyte.version=0.1.63 LABEL io.airbyte.name=airbyte/normalization diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index a1d766dd11035..d814d04ecc61a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -3,7 +3,7 @@ partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg'], + post_hook = ["drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index c2edf0afe7961..cd673ea4b56cf 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -3,7 +3,7 @@ partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.dedup_exchange_rate_stg'], + post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_exchange_rate_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml index 0e116b2bbec5d..97bf0d05cbd40 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml @@ -6,6 +6,7 @@ sources: schema: false identifier: false tables: + - name: _airbyte_raw_1_prefix_startwith_number - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index cc6694836bb6a..ccec637092e39 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -3,7 +3,7 @@ partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.dedup_exchange_rate_stg'], + post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_exchange_rate_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index a20276296c922..99f32737436db 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -1,7 +1,7 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.dedup_cdc_excluded_stg'], + post_hook = ["drop view _airbyte_test_normalization.dedup_cdc_excluded_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_cdc_excluded_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 1d94573fc99e1..eff375bdc37d9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -1,7 +1,7 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.dedup_exchange_rate_stg'], + post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_exchange_rate_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml index 0e116b2bbec5d..97bf0d05cbd40 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml @@ -6,6 +6,7 @@ sources: schema: false identifier: false tables: + - name: _airbyte_raw_1_prefix_startwith_number - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql index b04acfdabc525..3afb1b3224823 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql @@ -1,7 +1,7 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.nested_stream_with_co__lting_into_long_names_stg'], + post_hook = ["drop view _airbyte_test_normalization.nested_stream_with_co__lting_into_long_names_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('nested_stream_with_co__lting_into_long_names_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index fcc681aa95ba8..7cb32131d4ce2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -1,7 +1,7 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.dedup_exchange_rate_stg'], + post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_exchange_rate_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml index 0e116b2bbec5d..97bf0d05cbd40 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml @@ -6,6 +6,7 @@ sources: schema: false identifier: false tables: + - name: _airbyte_raw_1_prefix_startwith_number - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index d098146930d0c..d0e8e603259f3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -1,7 +1,7 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.nested_stream_with_co_1g_into_long_names_stg'], + post_hook = ["drop view _airbyte_test_normalization.nested_stream_with_co_1g_into_long_names_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 309ac4c903fe5..9bf09bdcaa8ff 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -1,7 +1,7 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.dedup_exchange_rate_stg'], + post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_exchange_rate_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml index 0e116b2bbec5d..97bf0d05cbd40 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml @@ -6,6 +6,7 @@ sources: schema: false identifier: false tables: + - name: _airbyte_raw_1_prefix_startwith_number - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 8e8364a7b5072..712f6bd747522 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -1,7 +1,7 @@ {{ config( unique_key = "{{ quote('_AIRBYTE_UNIQUE_KEY_SCD') }}", schema = "test_normalization", - post_hook = ['drop view test_normalization.dedup_exchange_rate_stg'], + post_hook = ["drop view test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_exchange_rate_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml index 3faad76c57b34..b5460fb4d43dd 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml @@ -6,6 +6,7 @@ sources: schema: false identifier: false tables: + - name: airbyte_raw_1_prefix_startwith_number - name: airbyte_raw_dedup_cdc_excluded - name: airbyte_raw_dedup_exchange_rate - name: airbyte_raw_exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index 8772de10b5e74..03e7d58bbeab2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -2,7 +2,7 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['delete from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg)'], + post_hook = ["delete from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg)"], tags = [ "top-level" ] ) }} -- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index 1eba7ba7bd0ba..0caa4d9bfc659 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -2,7 +2,7 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['delete from _airbyte_test_normalization.some_stream_that_was_empty_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.some_stream_that_was_empty_stg)'], + post_hook = ["delete from _airbyte_test_normalization.some_stream_that_was_empty_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.some_stream_that_was_empty_stg)"], tags = [ "top-level" ] ) }} -- depends_on: ref('some_stream_that_was_empty_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml index 522b1e595e8ac..9ad8158759001 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml @@ -15,7 +15,7 @@ profile: 'normalize' # These configurations specify where dbt should look for different types of files. # The `source-paths` config, for example, states that source models can be found # in the "models/" directory. You probably won't need to change these! -source-paths: ["modified_models"] +source-paths: ["models"] docs-paths: ["docs"] analysis-paths: ["analysis"] test-paths: ["tests"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml deleted file mode 100644 index 9ad8158759001..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml +++ /dev/null @@ -1,63 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: 'airbyte_utils' -version: '1.0' -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: 'normalize' - -# These configurations specify where dbt should look for different types of files. -# The `source-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -source-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -data-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true -# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) -# all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `source-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -dispatch: - - macro_namespace: dbt_utils - search_order: ['airbyte_utils', 'dbt_utils'] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql new file mode 100644 index 0000000000000..203534b3d53b5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -0,0 +1,85 @@ + + + + create table "postgres".test_normalization."1_prefix_startwith_number_scd" + as ( + +-- depends_on: ref('1_prefix_startwith_number_stg') +with + +input_data as ( + select * + from "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" + -- 1_prefix_startwith_number from "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast("id" as + varchar +), '') as + varchar +)) as _airbyte_unique_key, + "id", + "date", + "text", + "date" as _airbyte_start_at, + lag("date") over ( + partition by "id" + order by + "date" is null asc, + "date" desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by "id" + order by + "date" is null asc, + "date" desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_1_prefix_startwith_number_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as + varchar +), '') || '-' || coalesce(cast(_airbyte_start_at as + varchar +), '') || '-' || coalesce(cast(_airbyte_emitted_at as + varchar +), '') as + varchar +)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + "id", + "date", + "text", + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_1_prefix_startwith_number_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql new file mode 100644 index 0000000000000..aad38834ac949 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql @@ -0,0 +1,24 @@ + + + + create table "postgres".test_normalization."1_prefix_startwith_number" + as ( + +-- Final base SQL model +-- depends_on: "postgres".test_normalization."1_prefix_startwith_number_scd" +select + _airbyte_unique_key, + "id", + "date", + "text", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_1_prefix_startwith_number_hashid +from "postgres".test_normalization."1_prefix_startwith_number_scd" +-- 1_prefix_startwith_number from "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql new file mode 100644 index 0000000000000..1d6a4096615f8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql @@ -0,0 +1,61 @@ + + + + create table "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" + as ( + +with __dbt__cte__1_prefix_startwith_number_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + jsonb_extract_path_text(_airbyte_data, 'text') as "text", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number as table_alias +-- 1_prefix_startwith_number +where 1 = 1 + +), __dbt__cte__1_prefix_startwith_number_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__1_prefix_startwith_number_ab1 +select + cast("id" as + bigint +) as "id", + cast(nullif("date", '') as + date +) as "date", + cast("text" as + varchar +) as "text", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__1_prefix_startwith_number_ab1 +-- 1_prefix_startwith_number +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__1_prefix_startwith_number_ab2 +select + md5(cast(coalesce(cast("id" as + varchar +), '') || '-' || coalesce(cast("date" as + varchar +), '') || '-' || coalesce(cast("text" as + varchar +), '') as + varchar +)) as _airbyte_1_prefix_startwith_number_hashid, + tmp.* +from __dbt__cte__1_prefix_startwith_number_ab2 tmp +-- 1_prefix_startwith_number +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql new file mode 100644 index 0000000000000..080ffcc0b14c6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract_scalar('_airbyte_data', ['text'], ['text']) }} as {{ adapter.quote('text') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} as table_alias +-- 1_prefix_startwith_number +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql similarity index 54% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql index 0718ac05fcbf9..5402072233ba4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql @@ -5,18 +5,16 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} +-- depends_on: {{ ref('1_prefix_startwith_number_ab1') }} select cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - cast({{ adapter.quote('name') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('name') }}, - cast(_ab_cdc_lsn as {{ dbt_utils.type_float() }}) as _ab_cdc_lsn, - cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, - cast(_ab_cdc_deleted_at as {{ dbt_utils.type_float() }}) as _ab_cdc_deleted_at, + cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, + cast({{ adapter.quote('text') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('text') }}, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('renamed_dedup_cdc_excluded_ab1') }} --- renamed_dedup_cdc_excluded +from {{ ref('1_prefix_startwith_number_ab1') }} +-- 1_prefix_startwith_number where 1 = 1 {{ incremental_clause('_airbyte_emitted_at') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql similarity index 68% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql index 5db2e106d7ddf..e2ade95cd401b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -2,18 +2,18 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)'], + post_hook = ["delete from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }})"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('1_prefix_startwith_number_stg') with {% if is_incremental() %} new_data as ( -- retrieve incremental "new" data select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + from {{ ref('1_prefix_startwith_number_stg') }} + -- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} where 1 = 1 {{ incremental_clause('_airbyte_emitted_at') }} ), @@ -22,8 +22,6 @@ new_data_ids as ( select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), - 'currency', - 'nzd', ]) }} as _airbyte_unique_key from new_data ), @@ -34,7 +32,7 @@ empty_new_data as ( previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes select - {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + {{ star_intersect(ref('1_prefix_startwith_number_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} from {{ this }} as this_data -- make a join with new_data using primary key to filter active data that need to be updated only join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key @@ -43,15 +41,15 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from new_data union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data + select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from previous_active_scd_data ), {% else %} input_data as ( select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + from {{ ref('1_prefix_startwith_number_stg') }} + -- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} ), {% endif %} scd_data as ( @@ -59,27 +57,20 @@ scd_data as ( select {{ dbt_utils.surrogate_key([ adapter.quote('id'), - 'currency', - 'nzd', ]) }} as _airbyte_unique_key, {{ adapter.quote('id') }}, - currency, - new_column, {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, + {{ adapter.quote('text') }}, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( - partition by cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) + partition by {{ adapter.quote('id') }} order by {{ adapter.quote('date') }} is null asc, {{ adapter.quote('date') }} desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( - partition by cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) + partition by {{ adapter.quote('id') }} order by {{ adapter.quote('date') }} is null asc, {{ adapter.quote('date') }} desc, @@ -87,7 +78,7 @@ scd_data as ( ) = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid + _airbyte_1_prefix_startwith_number_hashid from input_data ), dedup_data as ( @@ -113,19 +104,14 @@ select _airbyte_unique_key, _airbyte_unique_key_scd, {{ adapter.quote('id') }}, - currency, - new_column, {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, + {{ adapter.quote('text') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid + _airbyte_1_prefix_startwith_number_hashid from dedup_data where _airbyte_row_num = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 98325193a5f00..2fb3816fb87fa 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -2,7 +2,7 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)'], + post_hook = ["delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_cdc_excluded_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 87453dc261145..7234b26c0f810 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -2,7 +2,7 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)'], + post_hook = ["delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_exchange_rate_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index 3fcf2e971cbc6..736e25452ae3b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -2,7 +2,7 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['delete from _airbyte_test_normalization.multiple_column_names_conflicts_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.multiple_column_names_conflicts_stg)'], + post_hook = ["delete from _airbyte_test_normalization.multiple_column_names_conflicts_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.multiple_column_names_conflicts_stg)"], tags = [ "top-level" ] ) }} -- depends_on: ref('multiple_column_names_conflicts_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index 36ce51399a3f3..1512b6fe8546a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -2,7 +2,7 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['delete from _airbyte_test_normalization.pos_dedup_cdcx_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.pos_dedup_cdcx_stg)'], + post_hook = ["delete from _airbyte_test_normalization.pos_dedup_cdcx_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.pos_dedup_cdcx_stg)"], tags = [ "top-level" ] ) }} -- depends_on: ref('pos_dedup_cdcx_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 0c50939426f79..4fbd681d8ee61 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -2,7 +2,7 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)'], + post_hook = ["delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], tags = [ "top-level" ] ) }} -- depends_on: ref('renamed_dedup_cdc_excluded_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql similarity index 58% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql index 8529ede3dcfac..77aba25edc2a7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql @@ -5,23 +5,18 @@ tags = [ "top-level" ] ) }} -- Final base SQL model --- depends_on: {{ ref('dedup_exchange_rate_scd') }} +-- depends_on: {{ ref('1_prefix_startwith_number_scd') }} select _airbyte_unique_key, {{ adapter.quote('id') }}, - currency, - new_column, {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, + {{ adapter.quote('text') }}, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + _airbyte_1_prefix_startwith_number_hashid +from {{ ref('1_prefix_startwith_number_scd') }} +-- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} where 1 = 1 and _airbyte_active_row = 1 {{ incremental_clause('_airbyte_emitted_at') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql similarity index 58% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql index a4c1e8816f8ae..69bff1d44aaa7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql @@ -5,21 +5,16 @@ tags = [ "top-level-intermediate" ] ) }} -- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +-- depends_on: {{ ref('1_prefix_startwith_number_ab2') }} select {{ dbt_utils.surrogate_key([ adapter.quote('id'), - 'currency', - 'new_column', adapter.quote('date'), - 'timestamp_col', - adapter.quote('HKD@spéçiäl & characters'), - 'nzd', - 'usd', - ]) }} as _airbyte_dedup_exchange_rate_hashid, + adapter.quote('text'), + ]) }} as _airbyte_1_prefix_startwith_number_hashid, tmp.* -from {{ ref('dedup_exchange_rate_ab2') }} tmp --- dedup_exchange_rate +from {{ ref('1_prefix_startwith_number_ab2') }} tmp +-- 1_prefix_startwith_number where 1 = 1 {{ incremental_clause('_airbyte_emitted_at') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml index 0e116b2bbec5d..97bf0d05cbd40 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml @@ -6,6 +6,7 @@ sources: schema: false identifier: false tables: + - name: _airbyte_raw_1_prefix_startwith_number - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index 23e1bb70c5879..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index b43312b67ebf1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_exchange_rate_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('id') }}, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - cast(new_column as {{ dbt_utils.type_float() }}) as new_column, - cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, - cast(nzd as {{ dbt_utils.type_float() }}) as nzd, - cast(usd as {{ dbt_utils.type_bigint() }}) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_exchange_rate_ab1') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql deleted file mode 100644 index ba88ffa22b0d9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, - {{ json_extract_scalar('_airbyte_data', ['column`_\'with"_quotes'], ['column___with__quotes']) }} as {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} as table_alias --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql deleted file mode 100644 index e6cf7ee1e5760..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('exchange_rate_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('id') }}, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - cast(new_column as {{ dbt_utils.type_float() }}) as new_column, - cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, - cast(nzd as {{ dbt_utils.type_float() }}) as nzd, - cast(usd as {{ dbt_utils.type_float() }}) as usd, - cast({{ adapter.quote('column`_\'with""_quotes') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('exchange_rate_ab1') }} --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql deleted file mode 100644 index 96c96a4d4799c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'new_column', - adapter.quote('date'), - 'timestamp_col', - adapter.quote('HKD@spéçiäl & characters'), - 'nzd', - 'usd', - adapter.quote('column`_\'with""_quotes'), - ]) }} as _airbyte_exchange_rate_hashid, - tmp.* -from {{ ref('exchange_rate_ab2') }} tmp --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql deleted file mode 100644 index 590e1e755b5c4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as {{ adapter.quote('name') }}, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias --- renamed_dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql deleted file mode 100644 index 3803571720588..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,123 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = ['delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)'], - tags = [ "top-level" ] -) }} --- depends_on: ref('renamed_dedup_cdc_excluded_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('renamed_dedup_cdc_excluded_stg') }} - -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('renamed_dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('renamed_dedup_cdc_excluded_stg') }} - -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_updated_at as _airbyte_start_at, - lag(_ab_cdc_updated_at) over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_renamed_dedup_cdc_excluded_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql deleted file mode 100644 index 80ff3fc2138ca..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('renamed_dedup_cdc_excluded_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid -from {{ ref('renamed_dedup_cdc_excluded_scd') }} --- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql deleted file mode 100644 index 86d0e6f4451d0..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - adapter.quote('name'), - '_ab_cdc_lsn', - '_ab_cdc_updated_at', - '_ab_cdc_deleted_at', - ]) }} as _airbyte_renamed_dedup_cdc_excluded_hashid, - tmp.* -from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp --- renamed_dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 40b5ffb3f87d9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - {{ adapter.quote('id') }}, - currency, - new_column, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, - {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml deleted file mode 100644 index dd538a80131ae..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml +++ /dev/null @@ -1,11 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql new file mode 100644 index 0000000000000..3eb10166dd315 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -0,0 +1,14 @@ + + delete + from "postgres".test_normalization."1_prefix_startwith_number_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "1_prefix_startwith_number_scd__dbt_tmp" + ); + + insert into "postgres".test_normalization."1_prefix_startwith_number_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "text", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "text", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid" + from "1_prefix_startwith_number_scd__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql new file mode 100644 index 0000000000000..a95bdd8ef269c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql @@ -0,0 +1,14 @@ + + delete + from "postgres".test_normalization."1_prefix_startwith_number" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "1_prefix_startwith_number__dbt_tmp" + ); + + insert into "postgres".test_normalization."1_prefix_startwith_number" ("_airbyte_unique_key", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid") + ( + select "_airbyte_unique_key", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid" + from "1_prefix_startwith_number__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql new file mode 100644 index 0000000000000..c69b8a6554952 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql @@ -0,0 +1,14 @@ + + delete + from "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "1_prefix_startwith_number_stg__dbt_tmp" + ); + + insert into "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" ("_airbyte_1_prefix_startwith_number_hashid", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_1_prefix_startwith_number_hashid", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "1_prefix_startwith_number_stg__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index a5de1de2333db..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,14 +0,0 @@ - - delete - from "postgres".test_normalization."dedup_exchange_rate_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "dedup_exchange_rate_scd__dbt_tmp" - ); - - insert into "postgres".test_normalization."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid", "new_column", "id") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid", "new_column", "id" - from "dedup_exchange_rate_scd__dbt_tmp" - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql deleted file mode 100644 index dfe10c6da794d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,14 +0,0 @@ - - delete - from "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "renamed_dedup_cdc_excluded_scd__dbt_tmp" - ); - - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" - from "renamed_dedup_cdc_excluded_scd__dbt_tmp" - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index ecc81c8883b07..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,14 +0,0 @@ - - delete - from "postgres".test_normalization."dedup_exchange_rate" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "dedup_exchange_rate__dbt_tmp" - ); - - insert into "postgres".test_normalization."dedup_exchange_rate" ("_airbyte_unique_key", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid", "new_column", "id") - ( - select "_airbyte_unique_key", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid", "new_column", "id" - from "dedup_exchange_rate__dbt_tmp" - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index f35951198e0fb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,14 +0,0 @@ - - delete - from "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "dedup_exchange_rate_stg__dbt_tmp" - ); - - insert into "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" ("_airbyte_dedup_exchange_rate_hashid", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "new_column", "id", "usd") - ( - select "_airbyte_dedup_exchange_rate_hashid", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "new_column", "id", "usd" - from "dedup_exchange_rate_stg__dbt_tmp" - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql deleted file mode 100644 index c1d1c310179d3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ /dev/null @@ -1,14 +0,0 @@ - - delete - from "postgres".test_normalization."renamed_dedup_cdc_excluded" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "renamed_dedup_cdc_excluded__dbt_tmp" - ); - - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") - ( - select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" - from "renamed_dedup_cdc_excluded__dbt_tmp" - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql deleted file mode 100644 index 55db812277ae0..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,14 +0,0 @@ - - delete - from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "renamed_dedup_cdc_excluded_stg__dbt_tmp" - ); - - insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") - ( - select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" - from "renamed_dedup_cdc_excluded_stg__dbt_tmp" - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index d7f0d50be215f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,113 +0,0 @@ - - - create table "postgres".test_normalization."exchange_rate__dbt_tmp" - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_exchange_rate -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'currency') as currency, - jsonb_extract_path_text(_airbyte_data, 'new_column') as new_column, - jsonb_extract_path_text(_airbyte_data, 'date') as "date", - jsonb_extract_path_text(_airbyte_data, 'timestamp_col') as timestamp_col, - jsonb_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", - jsonb_extract_path_text(_airbyte_data, 'NZD') as nzd, - jsonb_extract_path_text(_airbyte_data, 'USD') as usd, - jsonb_extract_path_text(_airbyte_data, 'column`_''with"_quotes') as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast("id" as - float -) as "id", - cast(currency as - varchar -) as currency, - cast(new_column as - float -) as new_column, - cast(nullif("date", '') as - date -) as "date", - cast(nullif(timestamp_col, '') as - timestamp with time zone -) as timestamp_col, - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast("column`_'with""_quotes" as - varchar -) as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(currency as - varchar -), '') || '-' || coalesce(cast(new_column as - varchar -), '') || '-' || coalesce(cast("date" as - varchar -), '') || '-' || coalesce(cast(timestamp_col as - varchar -), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as - varchar -), '') || '-' || coalesce(cast(nzd as - varchar -), '') || '-' || coalesce(cast(usd as - varchar -), '') || '-' || coalesce(cast("column`_'with""_quotes" as - varchar -), '') as - varchar -)) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - "id", - currency, - new_column, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - nzd, - usd, - "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "postgres".test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 1e30bf57e4595..e89e97f58fea2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -2,7 +2,7 @@ sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg'], + post_hook = ["drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 81b85e492cd51..3cb089de2de1a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -2,7 +2,7 @@ sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.dedup_exchange_rate_stg'], + post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_exchange_rate_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml index 0e116b2bbec5d..97bf0d05cbd40 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml @@ -6,6 +6,7 @@ sources: schema: false identifier: false tables: + - name: _airbyte_raw_1_prefix_startwith_number - name: _airbyte_raw_dedup_cdc_excluded - name: _airbyte_raw_dedup_exchange_rate - name: _airbyte_raw_exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 363a39ec25fe7..0145a94818b0a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -2,7 +2,7 @@ sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ['drop view _airbyte_test_normalization.dedup_exchange_rate_stg'], + post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_exchange_rate_stg') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 167cdb066cb4c..9435ebaf2bc1d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -2,7 +2,7 @@ cluster_by = ["_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_UNIQUE_KEY_SCD", "_AIRBYTE_EMITTED_AT"], unique_key = "_AIRBYTE_UNIQUE_KEY_SCD", schema = "TEST_NORMALIZATION", - post_hook = ['drop view _AIRBYTE_TEST_NORMALIZATION.NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG'], + post_hook = ["drop view _AIRBYTE_TEST_NORMALIZATION.NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG"], tags = [ "top-level" ] ) }} -- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index 2b62f6776a223..688926bdcab0f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -2,7 +2,7 @@ cluster_by = ["_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_UNIQUE_KEY_SCD", "_AIRBYTE_EMITTED_AT"], unique_key = "_AIRBYTE_UNIQUE_KEY_SCD", schema = "TEST_NORMALIZATION", - post_hook = ['drop view _AIRBYTE_TEST_NORMALIZATION.DEDUP_EXCHANGE_RATE_STG'], + post_hook = ["drop view _AIRBYTE_TEST_NORMALIZATION.DEDUP_EXCHANGE_RATE_STG"], tags = [ "top-level" ] ) }} -- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml index bec4269ba6bf8..1d9d7ae323bbb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml @@ -6,6 +6,7 @@ sources: schema: false identifier: false tables: + - name: _AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER - name: _AIRBYTE_RAW_DEDUP_CDC_EXCLUDED - name: _AIRBYTE_RAW_DEDUP_EXCHANGE_RATE - name: _AIRBYTE_RAW_EXCHANGE_RATE diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json index dc25bf3713604..a193f98bd8e86 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json @@ -178,6 +178,33 @@ "destination_sync_mode": "append_dedup", "primary_key": [["id"]] }, + { + "stream": { + "name": "1_prefix_startwith_number", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "date": { + "type": "string", + "format": "date" + }, + "text": { + "type": "string" + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["date"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + }, { "stream": { "name": "multiple_column_names_conflicts", diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt index e84ca1f63c79a..c4b5b4d8543a1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt @@ -42,4 +42,12 @@ {"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":5,"name":"lotus","_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010048,"_ab_cdc_log_pos": 33280,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} {"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":5,"name":"lily","_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33281,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637589000, "data": { "id": 1, "date": "2020-08-29", "text": "hi 1"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637689100, "data": { "id": 1, "date": "2020-08-30", "text": "hi 2"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637789200, "data": { "id": 2, "date": "2020-08-31", "text": "hi 1"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637889300, "data": { "id": 2, "date": "2020-08-31", "text": "hi 2"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637989400, "data": { "id": 2, "date": "2020-09-01", "text": "hi 3"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637990700, "data": { "id": 1, "date": "2020-09-01", "text": "hi 3"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637990800, "data": { "id": 2, "date": "2020-09-01", "text": "hi 4"}}} + {"type":"RECORD","record":{"stream":"multiple_column_names_conflicts","data":{"id":1,"User Id":"chris","user_id":42,"User id":300,"user id": 102,"UserId":101},"emitted_at":1623959926}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py index 287789e378104..f8eb6a324f8e9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py @@ -54,7 +54,9 @@ def test_destination_supported_limits(destination_type: DestinationType, column_ pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable (MYSQL is also skipped)") if destination_type.value == DestinationType.ORACLE.value: # Airbyte uses a few columns for metadata and Oracle limits are right at 1000 - column_count = 995 + column_count = 993 + if destination_type.value == DestinationType.MSSQL.value: + column_count = 999 run_test(destination_type, column_count) @@ -62,7 +64,7 @@ def test_destination_supported_limits(destination_type: DestinationType, column_ "integration_type, column_count, expected_exception_message", [ ("Postgres", 1665, "target lists can have at most 1664 entries"), - ("BigQuery", 2500, "The view is too large."), + ("BigQuery", 3000, "The view is too large."), ("Snowflake", 2000, "Operation failed because soft limit on objects of type 'Column' per table was exceeded."), ("Redshift", 1665, "target lists can have at most 1664 entries"), ("MySQL", 250, "Row size too large"), diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py index ab7b3894d6134..66a1372bfcdff 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py @@ -150,6 +150,11 @@ def __normalize_non_column_identifier_name( if truncate: result = self.truncate_identifier_name(input_name=result, conflict=conflict, conflict_level=conflict_level) result = self.__normalize_identifier_case(result, is_quoted=False) + if result[0].isdigit(): + if self.destination_type == DestinationType.MSSQL: + result = "_" + result + elif self.destination_type == DestinationType.ORACLE: + result = "ab_" + result return result def __normalize_identifier_name( @@ -165,13 +170,7 @@ def __normalize_identifier_name( result = result.replace("`", "_") result = result.replace("'", "\\'") result = self.__normalize_identifier_case(result, is_quoted=True) - if self.destination_type == DestinationType.ORACLE: - # Oracle dbt lib doesn't implemented adapter quote yet. - result = f"quote('{result}')" - elif self.destination_type == DestinationType.CLICKHOUSE: - result = f"quote('{result}')" - else: - result = f"adapter.quote('{result}')" + result = self.apply_quote(result) if not in_jinja: result = jinja_call(result) return result @@ -182,6 +181,14 @@ def __normalize_identifier_name( return f"'{result}'" return result + def apply_quote(self, input: str) -> str: + if self.destination_type == DestinationType.ORACLE: + # Oracle dbt lib doesn't implemented adapter quote yet. + return f"quote('{input}')" + elif self.destination_type == DestinationType.CLICKHOUSE: + return f"quote('{input}')" + return f"adapter.quote('{input}')" + def __normalize_naming_conventions(self, input_name: str) -> str: result = input_name if self.destination_type.value == DestinationType.ORACLE.value: diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index d5ba201b9746b..bb3a689ff0c8b 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1064,15 +1064,17 @@ def add_to_outputs( if suffix == "scd": stg_schema = self.get_schema(True) stg_table = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "stg", truncate_name) + if self.name_transformer.needs_quotes(stg_table): + stg_table = jinja_call(self.name_transformer.apply_quote(stg_table)) if self.destination_type.value == DestinationType.POSTGRES.value: # Keep only rows with the max emitted_at to keep incremental behavior config["post_hook"] = ( - f"['delete from {stg_schema}.{stg_table} " + f'["delete from {stg_schema}.{stg_table} ' + f"where {self.airbyte_emitted_at} != (select max({self.airbyte_emitted_at}) " - + f"from {stg_schema}.{stg_table})']" + + f'from {stg_schema}.{stg_table})"]' ) else: - config["post_hook"] = f"['drop view {stg_schema}.{stg_table}']" + config["post_hook"] = f'["drop view {stg_schema}.{stg_table}"]' else: # incremental is handled in the SCD SQL already sql = self.add_incremental_clause(sql) diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/test_destination_name_transformer.py b/airbyte-integrations/bases/base-normalization/unit_tests/test_destination_name_transformer.py index e5c4a0d2eccd5..61754fc2fc599 100644 --- a/airbyte-integrations/bases/base-normalization/unit_tests/test_destination_name_transformer.py +++ b/airbyte-integrations/bases/base-normalization/unit_tests/test_destination_name_transformer.py @@ -121,7 +121,7 @@ def test_transform_standard_naming(input_str: str, expected: str): ("100x2003", "Snowflake", "100x2003", "{{ adapter.quote('100x2003') }}"), ("100x2004", "Redshift", "100x2004", "{{ adapter.quote('100x2004') }}"), ("100x2005", "MySQL", "100x2005", "{{ adapter.quote('100x2005') }}"), - ("100x2006", "MSSQL", "100x2006", "{{ adapter.quote('100x2006') }}"), + ("100x2006", "MSSQL", "_100x2006", "{{ adapter.quote('100x2006') }}"), # Reserved Keywords in BQ and MySQL ("Groups", "Postgres", "groups", "groups"), ("Groups", "BigQuery", "Groups", "{{ adapter.quote('Groups') }}"), diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java index 0e921d438a0df..a433e4c3a8f05 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java @@ -14,7 +14,7 @@ public class NormalizationRunnerFactory { public static final String BASE_NORMALIZATION_IMAGE_NAME = "airbyte/normalization"; - public static final String NORMALIZATION_VERSION = "0.1.62"; + public static final String NORMALIZATION_VERSION = "0.1.63"; static final Map> NORMALIZATION_MAPPING = ImmutableMap.>builder() From dfb7b8cd3f22426ed0bc55f7b7fe83de4524a1d6 Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Thu, 6 Jan 2022 20:22:56 -0800 Subject: [PATCH 063/215] =?UTF-8?q?=F0=9F=90=9E=20Destination=20databricks?= =?UTF-8?q?:=20update=20jdbc=20driver=20to=20patch=20log4j=20(#7622)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Download spark jdbc driver in build cmd * Download jdbc driver in ci integration test * Update comments * Set up cloud sdk * Add comments * Download jdbc driver from databricks directly * Update readme * Use unzip command * Install unzip for databricks * Add databricks build status * Close database * Log more error information * Close database when checking connection * Update spec --- .github/workflows/publish-command.yml | 4 ++++ .github/workflows/test-command.yml | 4 ++++ .../src/main/resources/seed/source_specs.yaml | 2 +- airbyte-integrations/builds.md | 2 +- .../destination-databricks/README.md | 2 ++ .../DatabricksDestinationAcceptanceTest.java | 12 ++++++---- .../jdbc/copy/CopyConsumerFactory.java | 1 + .../jdbc/copy/CopyDestination.java | 3 +-- .../workers/DefaultCheckConnectionWorker.java | 2 +- tools/bin/ci_integration_test.sh | 6 +++++ tools/integrations/manage.sh | 6 +++++ tools/lib/databricks.sh | 24 +++++++++++++++++++ 12 files changed, 59 insertions(+), 9 deletions(-) create mode 100644 tools/lib/databricks.sh diff --git a/.github/workflows/publish-command.yml b/.github/workflows/publish-command.yml index 3b4cd245a375f..f73a4dacd4161 100644 --- a/.github/workflows/publish-command.yml +++ b/.github/workflows/publish-command.yml @@ -66,6 +66,10 @@ jobs: uses: actions/checkout@v2 with: repository: ${{github.event.pull_request.head.repo.full_name}} # always use the branch's repository + - name: Install Unzip for Databricks + if: github.event.inputs.connector == 'connectors/destination-databricks' + run: | + apt-get update && apt-get install -y unzip - name: Install Java uses: actions/setup-java@v1 with: diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index 000c6dd993b08..8c747eaf76451 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -61,6 +61,10 @@ jobs: uses: actions/checkout@v2 with: repository: ${{ github.event.inputs.repo }} + - name: Install Unzip for Databricks + if: github.event.inputs.connector == 'connectors/destination-databricks' + run: | + apt-get update && apt-get install -y unzip - name: Install Java uses: actions/setup-java@v1 with: diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 718f72a3fec34..49af7a5f0f6c6 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1985,7 +1985,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-github:0.2.9" +- dockerImage: "airbyte/source-github:0.2.10" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/github" connectionSpecification: diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index 794c8fa333846..001ae80fd3248 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -114,7 +114,7 @@ | BigQuery | [![destination-bigquery](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-bigquery%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-bigquery) | | ClickHouse | [![destination-clickhouse](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-clickhouse%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-clickhouse) | | Cassandra | [![destination-cassandra](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-cassandra%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-cassandra) | -| Databricks | (Temporarily Not Available) | +| Databricks | [![destination-databricks](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-databricks%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-databricks) | | Dev Null | [![destination-dev-null](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-dev-null%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-dev-null) | | Elasticsearch | (Temporarily Not Available) | | End-to-End Testing | [![destination-e2e-test](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-e2e-test%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-e2e-test) | diff --git a/airbyte-integrations/connectors/destination-databricks/README.md b/airbyte-integrations/connectors/destination-databricks/README.md index 5a9ab5bf1cb1e..5d96d1c7d0d31 100644 --- a/airbyte-integrations/connectors/destination-databricks/README.md +++ b/airbyte-integrations/connectors/destination-databricks/README.md @@ -6,6 +6,8 @@ For information about how to use this connector within Airbyte, see [the User Do ## Databricks JDBC Driver This connector requires a JDBC driver to connect to Databricks cluster. The driver is developed by Simba. Before downloading and using this driver, you must agree to the [JDBC ODBC driver license](https://databricks.com/jdbc-odbc-driver-license). This means that you can only use this driver to connector third party applications to Apache Spark SQL within a Databricks offering using the ODBC and/or JDBC protocols. The driver can be downloaded from [here](https://databricks.com/spark/jdbc-drivers-download). +The CI downloads the JDBC driver in [this script](https://github.com/airbytehq/airbyte/blob/master/tools/lib/databricks.sh). + This is currently a private connector that is only available in Airbyte Cloud. To build and publish this connector, first download the driver and put it under the `lib` directory. Please do not publish this connector publicly. We are working on a solution to publicize it. ## Local development diff --git a/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java index 1bdda60327006..55f03862fa300 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java @@ -127,13 +127,17 @@ protected void tearDown(final TestDestinationEnv testEnv) throws SQLException { .deleteObjects(new DeleteObjectsRequest(s3Config.getBucketName()).withKeys(keysToDelete)); LOGGER.info("Deleted {} file(s).", result.getDeletedObjects().size()); } + s3Client.shutdown(); // clean up database LOGGER.info("Dropping database schema {}", databricksConfig.getDatabaseSchema()); - final Database database = getDatabase(databricksConfig); - // we cannot use jooq dropSchemaIfExists method here because there is no proper dialect for - // Databricks, and it incorrectly quotes the schema name - database.query(ctx -> ctx.execute(String.format("DROP SCHEMA IF EXISTS %s CASCADE;", databricksConfig.getDatabaseSchema()))); + try (final Database database = getDatabase(databricksConfig)) { + // we cannot use jooq dropSchemaIfExists method here because there is no proper dialect for + // Databricks, and it incorrectly quotes the schema name + database.query(ctx -> ctx.execute(String.format("DROP SCHEMA IF EXISTS %s CASCADE;", databricksConfig.getDatabaseSchema()))); + } catch (final Exception e) { + throw new SQLException(e); + } } private static Database getDatabase(final DatabricksDestinationConfig databricksConfig) { diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyConsumerFactory.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyConsumerFactory.java index 2a336586c8283..f27678d52d2d5 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyConsumerFactory.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyConsumerFactory.java @@ -148,6 +148,7 @@ private static void closeAsOneTransaction(final List streamCopiers for (final var copier : streamCopiers) { copier.removeFileAndDropTmpTable(); } + db.close(); } if (firstException != null) { throw firstException; diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyDestination.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyDestination.java index 33bad47fdd9a1..fe3736956aed5 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyDestination.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyDestination.java @@ -54,10 +54,9 @@ public AirbyteConnectionStatus check(final JsonNode config) { .withMessage("Could not connect to the staging persistence with the provided configuration. \n" + e.getMessage()); } - try { + try (final JdbcDatabase database = getDatabase(config)) { final var nameTransformer = getNameTransformer(); final var outputSchema = nameTransformer.convertStreamName(config.get(schemaFieldName).asText()); - final JdbcDatabase database = getDatabase(config); AbstractJdbcDestination.attemptSQLCreateAndDropTableOperations(outputSchema, database, nameTransformer, getSqlOperations()); return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/DefaultCheckConnectionWorker.java b/airbyte-workers/src/main/java/io/airbyte/workers/DefaultCheckConnectionWorker.java index 1acb9ff03b414..d6a5204a1ff25 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/DefaultCheckConnectionWorker.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/DefaultCheckConnectionWorker.java @@ -77,7 +77,7 @@ public StandardCheckConnectionOutput run(final StandardCheckConnectionInput inpu LOGGER.debug("Check connection job received output: {}", output); return output; } else { - throw new WorkerException("Error while getting checking connection."); + throw new WorkerException(String.format("Error checking connection, status: %s, exit code: %d", status, exitCode)); } } catch (final Exception e) { diff --git a/tools/bin/ci_integration_test.sh b/tools/bin/ci_integration_test.sh index b0f651c59b837..6c66eac1bdec3 100755 --- a/tools/bin/ci_integration_test.sh +++ b/tools/bin/ci_integration_test.sh @@ -3,6 +3,7 @@ set -e . tools/lib/lib.sh +. tools/lib/databricks.sh # runs integration tests for an integration name @@ -10,6 +11,7 @@ connector="$1" all_integration_tests=$(./gradlew integrationTest --dry-run | grep 'integrationTest SKIPPED' | cut -d: -f 4) run() { if [[ "$connector" == "all" ]] ; then + _get_databricks_jdbc_driver echo "Running: ./gradlew --no-daemon --scan integrationTest" ./gradlew --no-daemon --scan integrationTest else @@ -34,6 +36,10 @@ else integrationTestCommand=":airbyte-integrations:connectors:$connector:integrationTest" fi if [ -n "$selected_integration_test" ] ; then + if [[ "$selected_integration_test" == *"databricks"* ]] ; then + _get_databricks_jdbc_driver + fi + echo "Running: ./gradlew --no-daemon --scan $integrationTestCommand" ./gradlew --no-daemon --scan "$integrationTestCommand" else diff --git a/tools/integrations/manage.sh b/tools/integrations/manage.sh index 38395b060e068..f6ae667e3a240 100755 --- a/tools/integrations/manage.sh +++ b/tools/integrations/manage.sh @@ -4,6 +4,7 @@ set -e set -x . tools/lib/lib.sh +. tools/lib/databricks.sh USAGE=" Usage: $(basename "$0") @@ -37,6 +38,11 @@ cmd_build() { [ -d "$path" ] || error "Path must be the root path of the integration" local run_tests=$1; shift || run_tests=true + + if [[ "airbyte-integrations/connectors/destination-databricks" == "${path}" ]]; then + _get_databricks_jdbc_driver + fi + echo "Building $path" ./gradlew --no-daemon "$(_to_gradle_path "$path" clean)" ./gradlew --no-daemon "$(_to_gradle_path "$path" build)" diff --git a/tools/lib/databricks.sh b/tools/lib/databricks.sh new file mode 100644 index 0000000000000..72bb57c951246 --- /dev/null +++ b/tools/lib/databricks.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +. tools/lib/lib.sh + +# Whoever runs this script must accept the following terms & conditions: +# https://databricks.com/jdbc-odbc-driver-license +_get_databricks_jdbc_driver() { + local driver_zip="SimbaSparkJDBC42-2.6.21.1039.zip" + local driver_file="SparkJDBC42.jar" + local driver_url="https://databricks-bi-artifacts.s3.us-east-2.amazonaws.com/simbaspark-drivers/jdbc/2.6.21/${driver_zip}" + local connector_path="airbyte-integrations/connectors/destination-databricks" + + if [[ -f "${connector_path}/lib/${driver_file}" ]] ; then + echo "[Databricks] Spark JDBC driver already exists" + else + echo "[Databricks] Downloading Spark JDBC driver..." + curl -o "${connector_path}/lib/${driver_zip}" "${driver_url}" + + echo "[Databricks] Extracting Spark JDBC driver..." + unzip "${connector_path}/lib/${driver_zip}" "${driver_file}" + mv "${driver_file}" "${connector_path}/lib/" + rm "${connector_path}/lib/${driver_zip}" + fi +} From 8237a70ce9b366daa5f779f9187520872a1584ed Mon Sep 17 00:00:00 2001 From: Iryna Grankova <87977540+igrankova@users.noreply.github.com> Date: Fri, 7 Jan 2022 07:16:46 +0200 Subject: [PATCH 064/215] =?UTF-8?q?=F0=9F=8E=89=20Destination=20databricks?= =?UTF-8?q?:=20update=20fields=20specifications=20(#9153)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Files title/description update for issue # 8948 * Version update for issue # 8948 * Changelogs update for PR #9153 * Update changelog Co-authored-by: LiRen Tu Co-authored-by: Liren Tu --- .../destination-databricks/Dockerfile | 2 +- .../src/main/resources/spec.json | 22 +++++++++---------- docs/integrations/destinations/databricks.md | 1 + 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/connectors/destination-databricks/Dockerfile b/airbyte-integrations/connectors/destination-databricks/Dockerfile index 4dd20ccbdaa10..c3405768633ef 100644 --- a/airbyte-integrations/connectors/destination-databricks/Dockerfile +++ b/airbyte-integrations/connectors/destination-databricks/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-databricks COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/destination-databricks diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json index 4112fb86fd89e..4254f7ad7f0da 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json @@ -24,28 +24,28 @@ "default": false }, "databricks_server_hostname": { - "title": "Databricks Cluster Server Hostname", + "title": "Server Hostname", "type": "string", - "description": "", + "description": "Databricks Cluster Server Hostname.", "examples": ["abc-12345678-wxyz.cloud.databricks.com"] }, "databricks_http_path": { - "title": "Databricks Cluster HTTP Path", + "title": "HTTP Path", "type": "string", - "description": "", + "description": "Databricks Cluster HTTP Path.", "examples": ["sql/protocolvx/o/1234567489/0000-1111111-abcd90"] }, "databricks_port": { - "title": "Databricks Cluster Port", + "title": "Port", "type": "string", - "description": "", + "description": "Databricks Cluster Port.", "default": "443", "examples": ["443"] }, "databricks_personal_access_token": { - "title": "Databricks Personal Access Token", + "title": "Access Token", "type": "string", - "description": "", + "description": "Databricks Personal Access Token for making authenticated requests.", "examples": ["dapi0123456789abcdefghij0123456789AB"], "airbyte_secret": true }, @@ -59,7 +59,7 @@ "data_source": { "title": "Data Source", "type": "object", - "description": "Storage on which the delta lake is built", + "description": "Storage on which the delta lake is built.", "oneOf": [ { "title": "Amazon S3", @@ -126,12 +126,12 @@ "s3_access_key_id": { "type": "string", "description": "The Access Key Id granting allow one to access the above S3 staging bucket. Airbyte requires Read and Write permissions to the given bucket.", - "title": "S3 Key Id", + "title": "S3 Access Key ID", "examples": ["A012345678910EXAMPLE"], "airbyte_secret": true }, "s3_secret_access_key": { - "title": "S3 Access Key", + "title": "S3 Secret Access Key", "type": "string", "description": "The corresponding secret to the above access key id.", "examples": ["a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"], diff --git a/docs/integrations/destinations/databricks.md b/docs/integrations/destinations/databricks.md index e14fd99fe7fde..949969576c003 100644 --- a/docs/integrations/destinations/databricks.md +++ b/docs/integrations/destinations/databricks.md @@ -103,6 +103,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.3 | 2022-01-06 | [\#7622](https://github.com/airbytehq/airbyte/pull/7622) [\#9153](https://github.com/airbytehq/airbyte/issues/9153) | Upgrade Spark JDBC driver to `2.6.21` to patch Log4j vulnerability; update connector fields title/description. | | 0.1.2 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | | 0.1.1 | 2021-10-05 | [\#6792](https://github.com/airbytehq/airbyte/pull/6792) | Require users to accept Databricks JDBC Driver [Terms & Conditions](https://databricks.com/jdbc-odbc-driver-license). | | 0.1.0 | 2021-09-14 | [\#5998](https://github.com/airbytehq/airbyte/pull/5998) | Initial private release. | From 968ae343e631b2066c359877a93058cc1b2eff6c Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Fri, 7 Jan 2022 01:14:07 -0800 Subject: [PATCH 065/215] Bump Airbyte version from 0.35.3-alpha to 0.35.4-alpha (#9353) Co-authored-by: sherifnada --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-bootloader/Dockerfile | 4 ++-- airbyte-container-orchestrator/Dockerfile | 6 +++--- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 4 ++-- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 10 +++++----- charts/airbyte/values.yaml | 10 +++++----- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 12 ++++++------ kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 12 ++++++------ 17 files changed, 42 insertions(+), 42 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 82c6923033783..6140765cd1819 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.35.3-alpha +current_version = 0.35.4-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index 2dc357e36e1d0..a538aedd6eedc 100644 --- a/.env +++ b/.env @@ -10,7 +10,7 @@ ### SHARED ### -VERSION=0.35.3-alpha +VERSION=0.35.4-alpha # When using the airbyte-db via default docker image CONFIG_ROOT=/data diff --git a/airbyte-bootloader/Dockerfile b/airbyte-bootloader/Dockerfile index 8534ee8eab9f8..4bb6ba12ebe05 100644 --- a/airbyte-bootloader/Dockerfile +++ b/airbyte-bootloader/Dockerfile @@ -5,6 +5,6 @@ ENV APPLICATION airbyte-bootloader WORKDIR /app -ADD bin/${APPLICATION}-0.35.3-alpha.tar /app +ADD bin/${APPLICATION}-0.35.4-alpha.tar /app -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}"] diff --git a/airbyte-container-orchestrator/Dockerfile b/airbyte-container-orchestrator/Dockerfile index 7f34019030c4b..303ccb61a1c64 100644 --- a/airbyte-container-orchestrator/Dockerfile +++ b/airbyte-container-orchestrator/Dockerfile @@ -19,12 +19,12 @@ RUN add-apt-repository \ RUN apt-get update && apt-get install -y docker-ce-cli jq ENV APPLICATION airbyte-container-orchestrator -ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}" +ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}" WORKDIR /app # Move orchestrator app -ADD bin/${APPLICATION}-0.35.3-alpha.tar /app +ADD bin/${APPLICATION}-0.35.4-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 09cc13d39c00b..b5bc7078bee92 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD bin/${APPLICATION}-0.35.3-alpha.tar /app +ADD bin/${APPLICATION}-0.35.4-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 9d3ce0481a510..d68bdbccb7298 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD bin/${APPLICATION}-0.35.3-alpha.tar /app +ADD bin/${APPLICATION}-0.35.4-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 72d6b1b47d7da..5c8026758dec6 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,12 +1,12 @@ { "name": "airbyte-webapp", - "version": "0.35.3-alpha", + "version": "0.35.4-alpha", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "airbyte-webapp", - "version": "0.35.3-alpha", + "version": "0.35.4-alpha", "dependencies": { "@fortawesome/fontawesome-svg-core": "^1.2.36", "@fortawesome/free-brands-svg-icons": "^5.15.4", diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 74657a7d6a19b..1ad7582e5899c 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.35.3-alpha", + "version": "0.35.4-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 993740bba1d82..21a62fc319937 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -30,7 +30,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD bin/${APPLICATION}-0.35.3-alpha.tar /app +ADD bin/${APPLICATION}-0.35.4-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.3-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index d4fdb10626390..ac6ab7f7bd7cd 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.35.3-alpha" +appVersion: "0.35.4-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 58fe3b6b6a0ab..de420001da11d 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.3-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.4-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -55,7 +55,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.3-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.4-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -86,7 +86,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.3-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.4-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -120,7 +120,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.3-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.4-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -148,7 +148,7 @@ | ----------------------------- | -------------------------------------------------------------------- | -------------------- | | `bootloader.image.repository` | The repository to use for the airbyte bootloader image. | `airbyte/bootloader` | | `bootloader.image.pullPolicy` | the pull policy to use for the airbyte bootloader image | `IfNotPresent` | -| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.3-alpha` | +| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.4-alpha` | ### Temporal parameters diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 31d9bc0c1a5cf..617c953f31cef 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -43,7 +43,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.35.3-alpha + tag: 0.35.4-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -140,7 +140,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.35.3-alpha + tag: 0.35.4-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -245,7 +245,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.35.3-alpha + tag: 0.35.4-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -357,7 +357,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.35.3-alpha + tag: 0.35.4-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## @@ -446,7 +446,7 @@ bootloader: image: repository: airbyte/bootloader pullPolicy: IfNotPresent - tag: 0.35.3-alpha + tag: 0.35.4-alpha ## @section Temporal parameters ## TODO: Move to consuming temporal from a dedicated helm chart diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 99d7c703f9ecf..027a90826be38 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -99,7 +99,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.35.3-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.35.4-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index bb6f2dfe836a1..561e0493afc98 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.3-alpha +AIRBYTE_VERSION=0.35.4-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index c82ddcd10811e..481a7bd157321 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: airbyte/bootloader - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: airbyte/scheduler - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: airbyte/server - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: airbyte/webapp - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: airbyte/worker - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index bb6f2dfe836a1..561e0493afc98 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.3-alpha +AIRBYTE_VERSION=0.35.4-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index c284f922b3142..c22629b231506 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: airbyte/bootloader - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: airbyte/scheduler - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: airbyte/server - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: airbyte/webapp - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: airbyte/worker - newTag: 0.35.3-alpha + newTag: 0.35.4-alpha - name: temporalio/auto-setup newTag: 1.7.0 From 086426339ca1c8af32be7b58cfd3c5a5ce65f0eb Mon Sep 17 00:00:00 2001 From: Charles Date: Fri, 7 Jan 2022 07:39:55 -0800 Subject: [PATCH 066/215] remove helm install build steps (#9024) --- .github/workflows/helm.yaml | 148 ++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 74 deletions(-) diff --git a/.github/workflows/helm.yaml b/.github/workflows/helm.yaml index 8a2a3629fbe3e..8aa51fd9ead63 100644 --- a/.github/workflows/helm.yaml +++ b/.github/workflows/helm.yaml @@ -1,74 +1,74 @@ -name: Helm -on: - push: - paths: - - ".github/workflows/helm.yaml" - - "charts/**" - pull_request: - paths: - - ".github/workflows/helm.yaml" - - "charts/**" -jobs: - lint: - name: Lint - runs-on: ubuntu-latest - timeout-minutes: 20 - steps: - - uses: actions/checkout@v2 - - name: Setup Kubectl - uses: azure/setup-kubectl@v1 - - name: Setup Helm - uses: azure/setup-helm@v1 - with: - version: "3.6.3" - - name: Lint Chart - working-directory: ./charts/airbyte - run: ./ci.sh lint - - generate-docs: - name: Generate Docs Parameters - runs-on: ubuntu-latest - timeout-minutes: 10 - steps: - - uses: actions/checkout@v2 - - name: Checkout bitnami-labs/readme-generator-for-helm - uses: actions/checkout@v2 - with: - repository: "bitnami-labs/readme-generator-for-helm" - ref: "55cab5dd2191c4ffa7245cfefa428d4d9bb12730" - path: readme-generator-for-helm - - name: Install readme-generator-for-helm dependencies - working-directory: readme-generator-for-helm - run: npm install -g - - name: Test can update README with generated parameters - working-directory: charts/airbyte - run: echo Temporarily disabled ./ci.sh check-docs-updated - - install: - name: Install - runs-on: ubuntu-latest - timeout-minutes: 20 - steps: - - uses: actions/checkout@v2 - - name: Setup Kubectl - uses: azure/setup-kubectl@v1 - - name: Setup Helm - uses: azure/setup-helm@v1 - with: - version: "3.6.3" - - name: Setup Kind Cluster - uses: helm/kind-action@v1.2.0 - with: - version: "v0.11.1" - image: "kindest/node:v1.21.1" - - name: Install airbyte chart - working-directory: ./charts/airbyte - run: ./ci.sh install - - if: always() - name: Print diagnostics - working-directory: ./charts/airbyte - run: ./ci.sh diagnostics - - if: success() - name: Test airbyte chart - working-directory: ./charts/airbyte - run: ./ci.sh test +#name: Helm +#on: +# push: +# paths: +# - ".github/workflows/helm.yaml" +# - "charts/**" +# pull_request: +# paths: +# - ".github/workflows/helm.yaml" +# - "charts/**" +#jobs: +# lint: +# name: Lint +# runs-on: ubuntu-latest +# timeout-minutes: 20 +# steps: +# - uses: actions/checkout@v2 +# - name: Setup Kubectl +# uses: azure/setup-kubectl@v1 +# - name: Setup Helm +# uses: azure/setup-helm@v1 +# with: +# version: "3.6.3" +# - name: Lint Chart +# working-directory: ./charts/airbyte +# run: ./ci.sh lint +# +# generate-docs: +# name: Generate Docs Parameters +# runs-on: ubuntu-latest +# timeout-minutes: 10 +# steps: +# - uses: actions/checkout@v2 +# - name: Checkout bitnami-labs/readme-generator-for-helm +# uses: actions/checkout@v2 +# with: +# repository: "bitnami-labs/readme-generator-for-helm" +# ref: "55cab5dd2191c4ffa7245cfefa428d4d9bb12730" +# path: readme-generator-for-helm +# - name: Install readme-generator-for-helm dependencies +# working-directory: readme-generator-for-helm +# run: npm install -g +# - name: Test can update README with generated parameters +# working-directory: charts/airbyte +# run: echo Temporarily disabled ./ci.sh check-docs-updated +# +# install: +# name: Install +# runs-on: ubuntu-latest +# timeout-minutes: 20 +# steps: +# - uses: actions/checkout@v2 +# - name: Setup Kubectl +# uses: azure/setup-kubectl@v1 +# - name: Setup Helm +# uses: azure/setup-helm@v1 +# with: +# version: "3.6.3" +# - name: Setup Kind Cluster +# uses: helm/kind-action@v1.2.0 +# with: +# version: "v0.11.1" +# image: "kindest/node:v1.21.1" +# - name: Install airbyte chart +# working-directory: ./charts/airbyte +# run: ./ci.sh install +# - if: always() +# name: Print diagnostics +# working-directory: ./charts/airbyte +# run: ./ci.sh diagnostics +# - if: success() +# name: Test airbyte chart +# working-directory: ./charts/airbyte +# run: ./ci.sh test From 04a113ea8c75ceecede91df6d79e11814e215ddc Mon Sep 17 00:00:00 2001 From: Christophe Duong Date: Fri, 7 Jan 2022 18:03:53 +0100 Subject: [PATCH 067/215] Clean up normalization (#9355) --- .../test_simple_streams/dbt_project.yml | 2 +- .../test_simple_streams/first_dbt_project.yml | 63 +++++++++ .../dedup_exchange_rate_ab1.sql | 25 ++++ .../dedup_exchange_rate_ab2.sql | 25 ++++ .../test_normalization/exchange_rate_ab1.sql | 25 ++++ .../test_normalization/exchange_rate_ab2.sql | 25 ++++ .../test_normalization/exchange_rate_ab3.sql | 25 ++++ .../renamed_dedup_cdc_excluded_ab1.sql | 22 +++ .../renamed_dedup_cdc_excluded_ab2.sql | 22 +++ .../dedup_exchange_rate_scd.sql | 131 ++++++++++++++++++ .../renamed_dedup_cdc_excluded_scd.sql | 123 ++++++++++++++++ .../dedup_exchange_rate.sql | 28 ++++ .../dedup_exchange_rate_stg.sql | 25 ++++ .../renamed_dedup_cdc_excluded.sql | 25 ++++ .../renamed_dedup_cdc_excluded_stg.sql | 22 +++ .../test_normalization/exchange_rate.sql | 26 ++++ .../modified_models/generated/sources.yml | 11 ++ .../dedup_exchange_rate_scd.sql | 14 ++ .../renamed_dedup_cdc_excluded_scd.sql | 14 ++ .../dedup_exchange_rate.sql | 14 ++ .../dedup_exchange_rate_stg.sql | 14 ++ .../renamed_dedup_cdc_excluded.sql | 14 ++ .../renamed_dedup_cdc_excluded_stg.sql | 14 ++ .../test_normalization/exchange_rate.sql | 113 +++++++++++++++ .../normalization/destination_type.py | 2 +- .../src/main/resources/spec.json | 2 +- .../basic-normalization.md | 8 +- 27 files changed, 829 insertions(+), 5 deletions(-) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml index 9ad8158759001..522b1e595e8ac 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml @@ -15,7 +15,7 @@ profile: 'normalize' # These configurations specify where dbt should look for different types of files. # The `source-paths` config, for example, states that source models can be found # in the "models/" directory. You probably won't need to change these! -source-paths: ["models"] +source-paths: ["modified_models"] docs-paths: ["docs"] analysis-paths: ["analysis"] test-paths: ["tests"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml new file mode 100644 index 0000000000000..9ad8158759001 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml @@ -0,0 +1,63 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'airbyte_utils' +version: '1.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: 'normalize' + +# These configurations specify where dbt should look for different types of files. +# The `source-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +source-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +data-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true +# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) +# all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `source-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ['airbyte_utils', 'dbt_utils'] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..23e1bb70c5879 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..b43312b67ebf1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('id') }}, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + cast(new_column as {{ dbt_utils.type_float() }}) as new_column, + cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_bigint() }}) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql new file mode 100644 index 0000000000000..ba88ffa22b0d9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, + {{ json_extract_scalar('_airbyte_data', ['column`_\'with"_quotes'], ['column___with__quotes']) }} as {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} as table_alias +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql new file mode 100644 index 0000000000000..e6cf7ee1e5760 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('exchange_rate_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('id') }}, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + cast(new_column as {{ dbt_utils.type_float() }}) as new_column, + cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_float() }}) as usd, + cast({{ adapter.quote('column`_\'with""_quotes') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('exchange_rate_ab1') }} +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql new file mode 100644 index 0000000000000..96c96a4d4799c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'new_column', + adapter.quote('date'), + 'timestamp_col', + adapter.quote('HKD@spéçiäl & characters'), + 'nzd', + 'usd', + adapter.quote('column`_\'with""_quotes'), + ]) }} as _airbyte_exchange_rate_hashid, + tmp.* +from {{ ref('exchange_rate_ab2') }} tmp +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql new file mode 100644 index 0000000000000..590e1e755b5c4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as {{ adapter.quote('name') }}, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql new file mode 100644 index 0000000000000..0718ac05fcbf9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('name') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('name') }}, + cast(_ab_cdc_lsn as {{ dbt_utils.type_float() }}) as _ab_cdc_lsn, + cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, + cast(_ab_cdc_deleted_at as {{ dbt_utils.type_float() }}) as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('renamed_dedup_cdc_excluded_ab1') }} +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..bf5adb993db9e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,131 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = ["delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at') }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..c0bcd34d32027 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,123 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = ["delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('renamed_dedup_cdc_excluded_stg') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at') }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('renamed_dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('renamed_dedup_cdc_excluded_stg') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_renamed_dedup_cdc_excluded_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..8529ede3dcfac --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,28 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..a4c1e8816f8ae --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'new_column', + adapter.quote('date'), + 'timestamp_col', + adapter.quote('HKD@spéçiäl & characters'), + 'nzd', + 'usd', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..80ff3fc2138ca --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from {{ ref('renamed_dedup_cdc_excluded_scd') }} +-- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..86d0e6f4451d0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('name'), + '_ab_cdc_lsn', + '_ab_cdc_updated_at', + '_ab_cdc_deleted_at', + ]) }} as _airbyte_renamed_dedup_cdc_excluded_hashid, + tmp.* +from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..40b5ffb3f87d9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,26 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, + {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml new file mode 100644 index 0000000000000..dd538a80131ae --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml @@ -0,0 +1,11 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..a5de1de2333db --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,14 @@ + + delete + from "postgres".test_normalization."dedup_exchange_rate_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "dedup_exchange_rate_scd__dbt_tmp" + ); + + insert into "postgres".test_normalization."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid", "new_column", "id") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid", "new_column", "id" + from "dedup_exchange_rate_scd__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..dfe10c6da794d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,14 @@ + + delete + from "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "renamed_dedup_cdc_excluded_scd__dbt_tmp" + ); + + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" + from "renamed_dedup_cdc_excluded_scd__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..ecc81c8883b07 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,14 @@ + + delete + from "postgres".test_normalization."dedup_exchange_rate" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "dedup_exchange_rate__dbt_tmp" + ); + + insert into "postgres".test_normalization."dedup_exchange_rate" ("_airbyte_unique_key", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid", "new_column", "id") + ( + select "_airbyte_unique_key", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid", "new_column", "id" + from "dedup_exchange_rate__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..f35951198e0fb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,14 @@ + + delete + from "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "dedup_exchange_rate_stg__dbt_tmp" + ); + + insert into "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" ("_airbyte_dedup_exchange_rate_hashid", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "new_column", "id", "usd") + ( + select "_airbyte_dedup_exchange_rate_hashid", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "new_column", "id", "usd" + from "dedup_exchange_rate_stg__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..c1d1c310179d3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,14 @@ + + delete + from "postgres".test_normalization."renamed_dedup_cdc_excluded" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "renamed_dedup_cdc_excluded__dbt_tmp" + ); + + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") + ( + select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" + from "renamed_dedup_cdc_excluded__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..55db812277ae0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -0,0 +1,14 @@ + + delete + from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "renamed_dedup_cdc_excluded_stg__dbt_tmp" + ); + + insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") + ( + select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" + from "renamed_dedup_cdc_excluded_stg__dbt_tmp" + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..d7f0d50be215f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,113 @@ + + + create table "postgres".test_normalization."exchange_rate__dbt_tmp" + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_exchange_rate +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'currency') as currency, + jsonb_extract_path_text(_airbyte_data, 'new_column') as new_column, + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + jsonb_extract_path_text(_airbyte_data, 'timestamp_col') as timestamp_col, + jsonb_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + jsonb_extract_path_text(_airbyte_data, 'NZD') as nzd, + jsonb_extract_path_text(_airbyte_data, 'USD') as usd, + jsonb_extract_path_text(_airbyte_data, 'column`_''with"_quotes') as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast("id" as + float +) as "id", + cast(currency as + varchar +) as currency, + cast(new_column as + float +) as new_column, + cast(nullif("date", '') as + date +) as "date", + cast(nullif(timestamp_col, '') as + timestamp with time zone +) as timestamp_col, + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_'with""_quotes" as + varchar +) as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(coalesce(cast("id" as + varchar +), '') || '-' || coalesce(cast(currency as + varchar +), '') || '-' || coalesce(cast(new_column as + varchar +), '') || '-' || coalesce(cast("date" as + varchar +), '') || '-' || coalesce(cast(timestamp_col as + varchar +), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as + varchar +), '') || '-' || coalesce(cast(nzd as + varchar +), '') || '-' || coalesce(cast(usd as + varchar +), '') || '-' || coalesce(cast("column`_'with""_quotes" as + varchar +), '') as + varchar +)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + "id", + currency, + new_column, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + nzd, + usd, + "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "postgres".test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/normalization/destination_type.py b/airbyte-integrations/bases/base-normalization/normalization/destination_type.py index ef66c789495af..c3d7d7b7f2ea6 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/destination_type.py +++ b/airbyte-integrations/bases/base-normalization/normalization/destination_type.py @@ -8,13 +8,13 @@ class DestinationType(Enum): BIGQUERY = "bigquery" + CLICKHOUSE = "clickhouse" MSSQL = "mssql" MYSQL = "mysql" ORACLE = "oracle" POSTGRES = "postgres" REDSHIFT = "redshift" SNOWFLAKE = "snowflake" - CLICKHOUSE = "clickhouse" @classmethod def from_string(cls, string_value: str) -> "DestinationType": diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json index e0345d4780fe5..1edbbb8465d94 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json @@ -2,7 +2,7 @@ "documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse", "supportsIncremental": true, "supportsNormalization": true, - "supportsDBT": true, + "supportsDBT": false, "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", diff --git a/docs/understanding-airbyte/basic-normalization.md b/docs/understanding-airbyte/basic-normalization.md index 9122b06ea525a..7d8dac248665d 100644 --- a/docs/understanding-airbyte/basic-normalization.md +++ b/docs/understanding-airbyte/basic-normalization.md @@ -350,8 +350,12 @@ Therefore, in order to "upgrade" to the desired normalization version, you need | Airbyte Version | Normalization Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | :--- | -| 0.32.11-alpha | 0.1.62 | 2021-12-23 | [\#9063](https://github.com/airbytehq/airbyte/pull/9063) | Add Snowflake-specific normalization | -| | 0.1.61 | 2021-12-02 | [\#8394](https://github.com/airbytehq/airbyte/pull/8394) | Fix incremental queries not updating empty tables | +| 0.35.4-alpha | 0.1.63 | 2021-01-07 | [\#9301](https://github.com/airbytehq/airbyte/pull/9301) | Fix Snowflake prefix tables starting with numbers | +| | 0.1.62 | 2021-01-07 | [\#9340](https://github.com/airbytehq/airbyte/pull/9340) | Use TCP-port support for clickhouse | +| | 0.1.62 | 2021-01-07 | [\#9063](https://github.com/airbytehq/airbyte/pull/9063) | Change Snowflake-specific materialization settings | +| | 0.1.62 | 2021-01-07 | [\#9317](https://github.com/airbytehq/airbyte/pull/9317) | Fix issue with quoted & case sensitive columns | +| | 0.1.62 | 2021-01-07 | [\#9281](https://github.com/airbytehq/airbyte/pull/9281) | Fix SCD partition by float columns in BigQuery | +| 0.32.11-alpha | 0.1.61 | 2021-12-02 | [\#8394](https://github.com/airbytehq/airbyte/pull/8394) | Fix incremental queries not updating empty tables | | | 0.1.61 | 2021-12-01 | [\#8378](https://github.com/airbytehq/airbyte/pull/8378) | Fix un-nesting queries and add proper ref hints | | 0.32.5-alpha | 0.1.60 | 2021-11-22 | [\#8088](https://github.com/airbytehq/airbyte/pull/8088) | Speed-up incremental queries for SCD table on Snowflake | | 0.30.32-alpha | 0.1.59 | 2021-11-08 | [\#7669](https://github.com/airbytehq/airbyte/pull/7169) | Fix nested incremental dbt | From 1877e2aba28f344503e25439d17e266ed37ebd3a Mon Sep 17 00:00:00 2001 From: Eugene Date: Fri, 7 Jan 2022 19:17:29 +0200 Subject: [PATCH 068/215] =?UTF-8?q?=F0=9F=90=9BSource-mssql:=20added=20sup?= =?UTF-8?q?port=20for=20missed=20data=20types=20(#9094)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [7728] Source-mssql: added support for missed data types --- .../b5ea17b1-f170-46dc-bc31-cc744ca984c1.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../java/io/airbyte/db/DataTypeUtils.java | 4 + .../java/io/airbyte/db/jdbc/JdbcDatabase.java | 16 ++ .../debezium/internals/MSSQLConverter.java | 30 ++- .../source-mssql-strict-encrypt/Dockerfile | 2 +- .../connectors/source-mssql/Dockerfile | 2 +- .../source/mssql/MssqlSource.java | 180 +++++++++++++++--- .../source/mssql/MssqlSourceOperations.java | 110 ++++++++++- .../mssql/CdcMssqlSourceDatatypeTest.java | 62 +++--- .../source/mssql/MssqlSourceDatatypeTest.java | 52 ++--- docs/integrations/sources/mssql.md | 1 + 13 files changed, 365 insertions(+), 100 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json index 0e8eaa23e5237..d56bf69b98ace 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "b5ea17b1-f170-46dc-bc31-cc744ca984c1", "name": "Microsoft SQL Server (MSSQL)", "dockerRepository": "airbyte/source-mssql", - "dockerImageTag": "0.3.12", + "dockerImageTag": "0.3.13", "documentationUrl": "https://docs.airbyte.io/integrations/sources/mssql", "icon": "mssql.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 6248ea2f25633..adf054c624ccc 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -398,7 +398,7 @@ - name: Microsoft SQL Server (MSSQL) sourceDefinitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1 dockerRepository: airbyte/source-mssql - dockerImageTag: 0.3.12 + dockerImageTag: 0.3.13 documentationUrl: https://docs.airbyte.io/integrations/sources/mssql icon: mssql.svg sourceType: database diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 49af7a5f0f6c6..6738f46447979 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3846,7 +3846,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-mssql:0.3.12" +- dockerImage: "airbyte/source-mssql:0.3.13" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/mssql" connectionSpecification: diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/DataTypeUtils.java b/airbyte-db/lib/src/main/java/io/airbyte/db/DataTypeUtils.java index 476b74154abbf..873b65f364e0a 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/DataTypeUtils.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/DataTypeUtils.java @@ -52,6 +52,10 @@ public static String toISO8601String(final java.util.Date date) { return DATE_FORMAT.format(date); } + public static String toISOTimeString(final LocalDateTime dateTime) { + return DateTimeFormatter.ISO_TIME.format(dateTime.toLocalTime()); + } + public static String toISO8601String(final LocalDate date) { return toISO8601String(date.atStartOfDay()); } diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java index f8337d4bfa17d..9ce081dd8536d 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java @@ -13,6 +13,7 @@ import java.sql.DatabaseMetaData; import java.sql.PreparedStatement; import java.sql.ResultSet; +import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.List; import java.util.Spliterator; @@ -166,6 +167,21 @@ public Stream query(final String sql, final String... params) throws S }, sourceOperations::rowToJson); } + public ResultSetMetaData queryMetadata(final String sql, final String... params) throws SQLException { + try (final Stream q = query(c -> { + PreparedStatement statement = c.prepareStatement(sql); + int i = 1; + for (String param : params) { + statement.setString(i, param); + ++i; + } + return statement; + }, + ResultSet::getMetaData)) { + return q.findFirst().orElse(null); + } + } + public abstract DatabaseMetaData getMetaData() throws SQLException; } diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MSSQLConverter.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MSSQLConverter.java index e162262ba9fb5..e8550f03710a4 100644 --- a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MSSQLConverter.java +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MSSQLConverter.java @@ -4,20 +4,24 @@ package io.airbyte.integrations.debezium.internals; +import io.airbyte.db.DataTypeUtils; import io.debezium.spi.converter.CustomConverter; import io.debezium.spi.converter.RelationalColumn; import java.math.BigDecimal; +import java.sql.Timestamp; import java.util.Objects; import java.util.Properties; +import java.util.Set; import org.apache.kafka.connect.data.SchemaBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class MSSQLConverter implements CustomConverter { - private final Logger LOGGER = LoggerFactory.getLogger(MSSQLConverter.class);; + private final Logger LOGGER = LoggerFactory.getLogger(MSSQLConverter.class); - private final String SMALLDATETIME_TYPE = "SMALLDATETIME"; + private final Set DATE_TYPES = Set.of("DATE", "DATETIME", "DATETIME2", "DATETIMEOFFSET", "SMALLDATETIME"); + private final String TIME_TYPE = "TIME"; private final String SMALLMONEY_TYPE = "SMALLMONEY"; @Override @@ -26,12 +30,13 @@ public void configure(Properties props) {} @Override public void converterFor(final RelationalColumn field, final ConverterRegistration registration) { - if (SMALLDATETIME_TYPE.equalsIgnoreCase(field.typeName())) { + if (DATE_TYPES.contains(field.typeName().toUpperCase())) { registerDate(field, registration); } else if (SMALLMONEY_TYPE.equalsIgnoreCase(field.typeName())) { registerMoney(field, registration); + } else if (TIME_TYPE.equalsIgnoreCase(field.typeName())) { + registerTime(field, registration); } - } private void registerDate(final RelationalColumn field, @@ -45,6 +50,23 @@ private void registerDate(final RelationalColumn field, }); } + private void registerTime(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + if (input instanceof Timestamp) { + return DataTypeUtils.toISOTimeString(((Timestamp) input).toLocalDateTime()); + } + + LOGGER.warn("Uncovered time class type '{}'. Use default converter", + input.getClass().getName()); + return input.toString(); + }); + } + private void registerMoney(final RelationalColumn field, final ConverterRegistration registration) { registration.register(SchemaBuilder.float64(), input -> { diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile index daaf6dd152332..a218fc52e1aae 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-mssql-strict-encrypt COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.5 +LABEL io.airbyte.version=0.1.6 LABEL io.airbyte.name=airbyte/source-mssql-strict-encrypt diff --git a/airbyte-integrations/connectors/source-mssql/Dockerfile b/airbyte-integrations/connectors/source-mssql/Dockerfile index 8b6be50732f10..5691347a99a61 100644 --- a/airbyte-integrations/connectors/source-mssql/Dockerfile +++ b/airbyte-integrations/connectors/source-mssql/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-mssql COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.3.12 +LABEL io.airbyte.version=0.3.13 LABEL io.airbyte.name=airbyte/source-mssql diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java index 86df7d3775b07..cbc62a3c2fe66 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java @@ -11,9 +11,11 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableMap; +import com.microsoft.sqlserver.jdbc.SQLServerResultSetMetaData; import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.commons.util.AutoCloseableIterators; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.base.Source; @@ -39,6 +41,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.stream.Stream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,6 +55,7 @@ public class MssqlSource extends AbstractJdbcSource implements Source public static final String CDC_LSN = "_ab_cdc_lsn"; public static final List HOST_KEY = List.of("host"); public static final List PORT_KEY = List.of("port"); + private static final String HIERARCHYID = "hierarchyid"; public static Source sshWrappedSource() { return new SshWrappedSource(new MssqlSource(), HOST_KEY, PORT_KEY); @@ -61,14 +65,120 @@ public static Source sshWrappedSource() { super(DRIVER_CLASS, new MssqlJdbcStreamingQueryConfiguration(), new MssqlSourceOperations()); } + @Override + public AutoCloseableIterator queryTableFullRefresh(JdbcDatabase database, + List columnNames, + String schemaName, + String tableName) { + LOGGER.info("Queueing query for table: {}", tableName); + + List newIdentifiersList = getWrappedColumn(database, + columnNames, + schemaName, tableName, "\""); + String preparedSqlQuery = String + .format("SELECT %s FROM %s", String.join(",", newIdentifiersList), + getFullTableName(schemaName, tableName)); + + LOGGER.info("Prepared SQL query for TableFullRefresh is: " + preparedSqlQuery); + return queryTable(database, preparedSqlQuery); + } + + @Override + public AutoCloseableIterator queryTableIncremental(JdbcDatabase database, + List columnNames, + String schemaName, + String tableName, + String cursorField, + JDBCType cursorFieldType, + String cursor) { + LOGGER.info("Queueing query for table: {}", tableName); + return AutoCloseableIterators.lazyIterator(() -> { + try { + final Stream stream = database.query( + connection -> { + LOGGER.info("Preparing query for table: {}", tableName); + + final String identifierQuoteString = connection.getMetaData() + .getIdentifierQuoteString(); + List newColumnNames = getWrappedColumn(database, + columnNames, schemaName, tableName, identifierQuoteString); + + final String sql = String.format("SELECT %s FROM %s WHERE %s > ?", + String.join(",", newColumnNames), + sourceOperations + .getFullyQualifiedTableNameWithQuoting(connection, schemaName, tableName), + sourceOperations.enquoteIdentifier(connection, cursorField)); + LOGGER.info("Prepared SQL query for queryTableIncremental is: " + sql); + + final PreparedStatement preparedStatement = connection.prepareStatement(sql); + sourceOperations.setStatementField(preparedStatement, 1, cursorFieldType, cursor); + LOGGER.info("Executing query for table: {}", tableName); + return preparedStatement; + }, + sourceOperations::rowToJson); + return AutoCloseableIterators.fromStream(stream); + } catch (final SQLException e) { + throw new RuntimeException(e); + } + }); + } + + /** + * There is no support for hierarchyid even in the native SQL Server JDBC driver. Its value can be + * converted to a nvarchar(4000) data type by calling the ToString() method. So we make a separate + * query to get Table's MetaData, check is there any hierarchyid columns, and wrap required fields + * with the ToString() function in the final Select query. Reference: + * https://docs.microsoft.com/en-us/sql/t-sql/data-types/hierarchyid-data-type-method-reference?view=sql-server-ver15#data-type-conversion + * + * @return the list with Column names updated to handle functions (if nay) properly + */ + private List getWrappedColumn(JdbcDatabase database, + List columnNames, + String schemaName, + String tableName, + String enquoteSymbol) { + List hierarchyIdColumns = new ArrayList<>(); + try { + SQLServerResultSetMetaData sqlServerResultSetMetaData = (SQLServerResultSetMetaData) database + .queryMetadata(String + .format("SELECT TOP 1 %s FROM %s", // only first row is enough to get field's type + enquoteIdentifierList(columnNames), + getFullTableName(schemaName, tableName))); + + // metadata will be null if table doesn't contain records + if (sqlServerResultSetMetaData != null) { + for (int i = 1; i <= sqlServerResultSetMetaData.getColumnCount(); i++) { + if (HIERARCHYID.equals(sqlServerResultSetMetaData.getColumnTypeName(i))) { + hierarchyIdColumns.add(sqlServerResultSetMetaData.getColumnName(i)); + } + } + } + + } catch (SQLException e) { + LOGGER.error("Failed to fetch metadata to prepare a proper request.", e); + } + + // iterate through names and replace Hierarchyid field for query is with toString() function + // Eventually would get columns like this: testColumn.toString as "testColumn" + // toString function in SQL server is the only way to get human readable value, but not mssql + // specific HEX value + return columnNames.stream() + .map( + el -> hierarchyIdColumns.contains(el) ? String + .format("%s.ToString() as %s%s%s", el, enquoteSymbol, el, enquoteSymbol) + : getIdentifierWithQuoting(el)) + .collect(toList()); + } + @Override public JsonNode toDatabaseConfig(final JsonNode mssqlConfig) { final List additionalParameters = new ArrayList<>(); - final StringBuilder jdbcUrl = new StringBuilder(String.format("jdbc:sqlserver://%s:%s;databaseName=%s;", - mssqlConfig.get("host").asText(), - mssqlConfig.get("port").asText(), - mssqlConfig.get("database").asText())); + final StringBuilder jdbcUrl = new StringBuilder( + String.format("jdbc:sqlserver://%s:%s;databaseName=%s;", + mssqlConfig.get("host").asText(), + mssqlConfig.get("port").asText(), + mssqlConfig.get("database").asText())); if (mssqlConfig.has("ssl_method")) { readSsl(mssqlConfig, additionalParameters); @@ -117,8 +227,10 @@ public AirbyteCatalog discover(final JsonNode config) throws Exception { } @Override - public List> getCheckOperations(final JsonNode config) throws Exception { - final List> checkOperations = new ArrayList<>(super.getCheckOperations(config)); + public List> getCheckOperations(final JsonNode config) + throws Exception { + final List> checkOperations = new ArrayList<>( + super.getCheckOperations(config)); if (isCdc(config)) { checkOperations.add(database -> assertCdcEnabledInDb(config, database)); @@ -130,13 +242,15 @@ public List> getCheckOperations(final J return checkOperations; } - protected void assertCdcEnabledInDb(final JsonNode config, final JdbcDatabase database) throws SQLException { + protected void assertCdcEnabledInDb(final JsonNode config, final JdbcDatabase database) + throws SQLException { final List queryResponse = database.query(connection -> { final String sql = "SELECT name, is_cdc_enabled FROM sys.databases WHERE name = ?"; final PreparedStatement ps = connection.prepareStatement(sql); ps.setString(1, config.get("database").asText()); - LOGGER.info(String.format("Checking that cdc is enabled on database '%s' using the query: '%s'", - config.get("database").asText(), sql)); + LOGGER + .info(String.format("Checking that cdc is enabled on database '%s' using the query: '%s'", + config.get("database").asText(), sql)); return ps; }, sourceOperations::rowToJson).collect(toList()); if (queryResponse.size() < 1) { @@ -151,17 +265,21 @@ protected void assertCdcEnabledInDb(final JsonNode config, final JdbcDatabase da } } - protected void assertCdcSchemaQueryable(final JsonNode config, final JdbcDatabase database) throws SQLException { + protected void assertCdcSchemaQueryable(final JsonNode config, final JdbcDatabase database) + throws SQLException { final List queryResponse = database.query(connection -> { - final String sql = "USE " + config.get("database").asText() + "; SELECT * FROM cdc.change_tables"; + final String sql = + "USE " + config.get("database").asText() + "; SELECT * FROM cdc.change_tables"; final PreparedStatement ps = connection.prepareStatement(sql); - LOGGER.info(String.format("Checking user '%s' can query the cdc schema and that we have at least 1 cdc enabled table using the query: '%s'", + LOGGER.info(String.format( + "Checking user '%s' can query the cdc schema and that we have at least 1 cdc enabled table using the query: '%s'", config.get("username").asText(), sql)); return ps; }, sourceOperations::rowToJson).collect(toList()); // Ensure at least one available CDC table if (queryResponse.size() < 1) { - throw new RuntimeException("No cdc-enabled tables found. Please check the documentation on how to enable CDC on MS SQL Server."); + throw new RuntimeException( + "No cdc-enabled tables found. Please check the documentation on how to enable CDC on MS SQL Server."); } } @@ -171,7 +289,8 @@ protected void assertSqlServerAgentRunning(final JdbcDatabase database) throws S final List queryResponse = database.query(connection -> { final String sql = "SELECT status_desc FROM sys.dm_server_services WHERE [servicename] LIKE 'SQL Server Agent%'"; final PreparedStatement ps = connection.prepareStatement(sql); - LOGGER.info(String.format("Checking that the SQL Server Agent is running using the query: '%s'", sql)); + LOGGER.info(String + .format("Checking that the SQL Server Agent is running using the query: '%s'", sql)); return ps; }, sourceOperations::rowToJson).collect(toList()); if (!(queryResponse.get(0).get("status_desc").toString().contains("Running"))) { @@ -180,8 +299,10 @@ protected void assertSqlServerAgentRunning(final JdbcDatabase database) throws S queryResponse.get(0).get("status_desc").toString())); } } catch (final Exception e) { - if (e.getCause() != null && e.getCause().getClass().equals(com.microsoft.sqlserver.jdbc.SQLServerException.class)) { - LOGGER.warn(String.format("Skipping check for whether the SQL Server Agent is running, SQLServerException thrown: '%s'", + if (e.getCause() != null && e.getCause().getClass() + .equals(com.microsoft.sqlserver.jdbc.SQLServerException.class)) { + LOGGER.warn(String.format( + "Skipping check for whether the SQL Server Agent is running, SQLServerException thrown: '%s'", e.getMessage())); } else { throw e; @@ -189,12 +310,14 @@ protected void assertSqlServerAgentRunning(final JdbcDatabase database) throws S } } - protected void assertSnapshotIsolationAllowed(final JsonNode config, final JdbcDatabase database) throws SQLException { + protected void assertSnapshotIsolationAllowed(final JsonNode config, final JdbcDatabase database) + throws SQLException { final List queryResponse = database.query(connection -> { final String sql = "SELECT name, snapshot_isolation_state FROM sys.databases WHERE name = ?"; final PreparedStatement ps = connection.prepareStatement(sql); ps.setString(1, config.get("database").asText()); - LOGGER.info(String.format("Checking that snapshot isolation is enabled on database '%s' using the query: '%s'", + LOGGER.info(String.format( + "Checking that snapshot isolation is enabled on database '%s' using the query: '%s'", config.get("database").asText(), sql)); return ps; }, sourceOperations::rowToJson).collect(toList()); @@ -212,7 +335,8 @@ protected void assertSnapshotIsolationAllowed(final JsonNode config, final JdbcD } @Override - public List> getIncrementalIterators(final JdbcDatabase database, + public List> getIncrementalIterators( + final JdbcDatabase database, final ConfiguredAirbyteCatalog catalog, final Map>> tableNameToTable, final StateManager stateManager, @@ -223,8 +347,10 @@ public List> getIncrementalIterators(final final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler(sourceConfig, MssqlCdcTargetPosition.getTargetPosition(database, sourceConfig.get("database").asText()), MssqlCdcProperties.getDebeziumProperties(), catalog, true); - return handler.getIncrementalIterators(new MssqlCdcSavedInfoFetcher(stateManager.getCdcStateManager().getCdcState()), - new MssqlCdcStateHandler(stateManager), new MssqlCdcConnectorMetadataInjector(), emittedAt); + return handler.getIncrementalIterators( + new MssqlCdcSavedInfoFetcher(stateManager.getCdcStateManager().getCdcState()), + new MssqlCdcStateHandler(stateManager), new MssqlCdcConnectorMetadataInjector(), + emittedAt); } else { LOGGER.info("using CDC: {}", false); return super.getIncrementalIterators(database, catalog, tableNameToTable, stateManager, emittedAt); @@ -238,7 +364,8 @@ private static boolean isCdc(final JsonNode config) { } private static boolean shouldUseCDC(final ConfiguredAirbyteCatalog catalog) { - final Optional any = catalog.getStreams().stream().map(ConfiguredAirbyteStream::getSyncMode) + final Optional any = catalog.getStreams().stream() + .map(ConfiguredAirbyteStream::getSyncMode) .filter(syncMode -> syncMode == SyncMode.INCREMENTAL).findAny(); return any.isPresent(); } @@ -287,7 +414,8 @@ private void readSsl(final JsonNode sslMethod, final List additionalPara additionalParameters.add("encrypt=true"); // trust store location code found at https://stackoverflow.com/a/56570588 - final String trustStoreLocation = Optional.ofNullable(System.getProperty("javax.net.ssl.trustStore")) + final String trustStoreLocation = Optional + .ofNullable(System.getProperty("javax.net.ssl.trustStore")) .orElseGet(() -> System.getProperty("java.home") + "/lib/security/cacerts"); final File trustStoreFile = new File(trustStoreLocation); if (!trustStoreFile.exists()) { @@ -298,10 +426,12 @@ private void readSsl(final JsonNode sslMethod, final List additionalPara final String trustStorePassword = System.getProperty("javax.net.ssl.trustStorePassword"); additionalParameters.add("trustStore=" + trustStoreLocation); if (trustStorePassword != null && !trustStorePassword.isEmpty()) { - additionalParameters.add("trustStorePassword=" + config.get("trustStorePassword").asText()); + additionalParameters + .add("trustStorePassword=" + config.get("trustStorePassword").asText()); } if (config.has("hostNameInCertificate")) { - additionalParameters.add("hostNameInCertificate=" + config.get("hostNameInCertificate").asText()); + additionalParameters + .add("hostNameInCertificate=" + config.get("hostNameInCertificate").asText()); } } } diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java index 17ca2d0c96e8e..c114aaa8304eb 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java @@ -4,17 +4,125 @@ package io.airbyte.integrations.source.mssql; +import static io.airbyte.db.jdbc.JdbcConstants.INTERNAL_COLUMN_NAME; +import static io.airbyte.db.jdbc.JdbcConstants.INTERNAL_COLUMN_TYPE; +import static io.airbyte.db.jdbc.JdbcConstants.INTERNAL_COLUMN_TYPE_NAME; +import static io.airbyte.db.jdbc.JdbcConstants.INTERNAL_SCHEMA_NAME; +import static io.airbyte.db.jdbc.JdbcConstants.INTERNAL_TABLE_NAME; + +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.microsoft.sqlserver.jdbc.Geography; +import com.microsoft.sqlserver.jdbc.Geometry; +import com.microsoft.sqlserver.jdbc.SQLServerResultSetMetaData; import io.airbyte.db.jdbc.JdbcSourceOperations; +import java.sql.JDBCType; import java.sql.ResultSet; import java.sql.SQLException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class MssqlSourceOperations extends JdbcSourceOperations { - protected void putBinary(final ObjectNode node, final String columnName, final ResultSet resultSet, final int index) throws SQLException { + private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSourceOperations.class); + + /** + * The method is used to set json value by type. Need to be overridden as MSSQL has some its own + * specific types (ex. Geometry, Geography, Hierarchyid, etc) + * + * @throws SQLException + */ + @Override + public void setJsonField(final ResultSet resultSet, final int colIndex, final ObjectNode json) + throws SQLException { + + final SQLServerResultSetMetaData metadata = (SQLServerResultSetMetaData) resultSet + .getMetaData(); + final String columnName = metadata.getColumnName(colIndex); + final String columnTypeName = metadata.getColumnTypeName(colIndex); + final JDBCType columnType = safeGetJdbcType(metadata.getColumnType(colIndex)); + + if (columnTypeName.equalsIgnoreCase("time")) { + putString(json, columnName, resultSet, colIndex); + } else if (columnTypeName.equalsIgnoreCase("geometry")) { + putGeometry(json, columnName, resultSet, colIndex); + } else if (columnTypeName.equalsIgnoreCase("geography")) { + putGeography(json, columnName, resultSet, colIndex); + } else { + putValue(columnType, resultSet, columnName, colIndex, json); + } + } + + private void putValue(JDBCType columnType, + final ResultSet resultSet, + final String columnName, + final int colIndex, + final ObjectNode json) + throws SQLException { + switch (columnType) { + case BIT, BOOLEAN -> putBoolean(json, columnName, resultSet, colIndex); + case TINYINT, SMALLINT -> putShortInt(json, columnName, resultSet, colIndex); + case INTEGER -> putInteger(json, columnName, resultSet, colIndex); + case BIGINT -> putBigInt(json, columnName, resultSet, colIndex); + case FLOAT, DOUBLE -> putDouble(json, columnName, resultSet, colIndex); + case REAL -> putFloat(json, columnName, resultSet, colIndex); + case NUMERIC, DECIMAL -> putBigDecimal(json, columnName, resultSet, colIndex); + case CHAR, NVARCHAR, VARCHAR, LONGVARCHAR -> putString(json, columnName, resultSet, colIndex); + case DATE -> putDate(json, columnName, resultSet, colIndex); + case TIME -> putTime(json, columnName, resultSet, colIndex); + case TIMESTAMP -> putTimestamp(json, columnName, resultSet, colIndex); + case BLOB, BINARY, VARBINARY, LONGVARBINARY -> putBinary(json, columnName, resultSet, + colIndex); + case ARRAY -> putArray(json, columnName, resultSet, colIndex); + default -> putDefault(json, columnName, resultSet, colIndex); + } + } + + @Override + public JDBCType getFieldType(final JsonNode field) { + try { + final String typeName = field.get(INTERNAL_COLUMN_TYPE_NAME).asText(); + if (typeName.equalsIgnoreCase("geography") + || typeName.equalsIgnoreCase("geometry") + || typeName.equalsIgnoreCase("hierarchyid")) { + return JDBCType.VARCHAR; + } + return JDBCType.valueOf(field.get(INTERNAL_COLUMN_TYPE).asInt()); + } catch (final IllegalArgumentException ex) { + LOGGER.warn(String.format("Could not convert column: %s from table: %s.%s with type: %s. Casting to VARCHAR.", + field.get(INTERNAL_COLUMN_NAME), + field.get(INTERNAL_SCHEMA_NAME), + field.get(INTERNAL_TABLE_NAME), + field.get(INTERNAL_COLUMN_TYPE))); + return JDBCType.VARCHAR; + } + } + + @Override + protected void putBinary(final ObjectNode node, + final String columnName, + final ResultSet resultSet, + final int index) + throws SQLException { byte[] bytes = resultSet.getBytes(index); String value = new String(bytes); node.put(columnName, value); } + protected void putGeometry(final ObjectNode node, + final String columnName, + final ResultSet resultSet, + final int index) + throws SQLException { + node.put(columnName, Geometry.deserialize(resultSet.getBytes(index)).toString()); + } + + protected void putGeography(final ObjectNode node, + final String columnName, + final ResultSet resultSet, + final int index) + throws SQLException { + node.put(columnName, Geography.deserialize(resultSet.getBytes(index)).toString()); + } + } diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java index cb062b970228f..ab31da0972538 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java @@ -22,7 +22,8 @@ public class CdcMssqlSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { private static final String DB_NAME = "comprehensive"; private static final String SCHEMA_NAME = "dbo"; - private static final String CREATE_TABLE_SQL = "USE " + DB_NAME + "\nCREATE TABLE %1$s(%2$s INTEGER PRIMARY KEY, %3$s %4$s)"; + private static final String CREATE_TABLE_SQL = + "USE " + DB_NAME + "\nCREATE TABLE %1$s(%2$s INTEGER PRIMARY KEY, %3$s %4$s)"; @Override protected JsonNode getConfig() { @@ -41,7 +42,8 @@ protected String getImageName() { @Override protected Database setupDatabase() throws Exception { - container = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense(); + container = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest") + .acceptLicense(); container.addEnv("MSSQL_AGENT_ENABLED", "True"); // need this running for cdc to work container.start(); @@ -150,8 +152,10 @@ protected void initTests() { TestDataHolder.builder() .sourceType("real") .airbyteType(JsonSchemaPrimitive.NUMBER) - .addInsertValues("null", "power(1e1, 38)*-3.4", "power(1e1, -38)*-1.18", "power(1e1, -38)*1.18", "power(1e1, 38)*3.4") - .addExpectedValues(null, String.valueOf(Math.pow(10, 38) * -3.4), String.valueOf(Math.pow(10, -38) * -1.18), + .addInsertValues("null", "power(1e1, 38)*-3.4", "power(1e1, -38)*-1.18", + "power(1e1, -38)*1.18", "power(1e1, 38)*3.4") + .addExpectedValues(null, String.valueOf(Math.pow(10, 38) * -3.4), + String.valueOf(Math.pow(10, -38) * -1.18), String.valueOf(Math.pow(10, -38) * 1.18), String.valueOf(Math.pow(10, 38) * 3.4)) .createTablePatternSql(CREATE_TABLE_SQL) .build()); @@ -161,8 +165,10 @@ protected void initTests() { .sourceType("float") .airbyteType(JsonSchemaPrimitive.NUMBER) .fullSourceDataType("float(24)") - .addInsertValues("null", "power(1e1, 38)*-3.4", "power(1e1, -38)*-1.18", "power(1e1, -38)*1.18", "power(1e1, 38)*3.4") - .addExpectedValues(null, String.valueOf(Math.pow(10, 38) * -3.4), String.valueOf(Math.pow(10, -38) * -1.18), + .addInsertValues("null", "power(1e1, 38)*-3.4", "power(1e1, -38)*-1.18", + "power(1e1, -38)*1.18", "power(1e1, 38)*3.4") + .addExpectedValues(null, String.valueOf(Math.pow(10, 38) * -3.4), + String.valueOf(Math.pow(10, -38) * -1.18), String.valueOf(Math.pow(10, -38) * 1.18), String.valueOf(Math.pow(10, 38) * 3.4)) .createTablePatternSql(CREATE_TABLE_SQL) .build()); @@ -174,7 +180,8 @@ protected void initTests() { .fullSourceDataType("float(53)") .addInsertValues("null", "power(1e1, 308)*-1.79", "power(1e1, -308)*-2.23", "power(1e1, -308)*2.23", "power(1e1, 308)*1.79") - .addExpectedValues(null, String.valueOf(Math.pow(10, 308) * -1.79), String.valueOf(Math.pow(10, -308) * -2.23), + .addExpectedValues(null, String.valueOf(Math.pow(10, 308) * -1.79), + String.valueOf(Math.pow(10, -308) * -2.23), String.valueOf(Math.pow(10, -308) * 2.23), String.valueOf(Math.pow(10, 308) * 1.79)) .createTablePatternSql(CREATE_TABLE_SQL) .build()); @@ -323,14 +330,8 @@ protected void initTests() { TestDataHolder.builder() .sourceType("date") .airbyteType(JsonSchemaPrimitive.STRING) - .addInsertValues("'0001-01-01'", "'9999-12-31'", "'1999-01-08'", - "null") - // TODO: Debezium is returning DATE/DATETIME from mssql as integers (days or milli/micro/nanoseconds - // since the epoch) - // still useable but requires transformation if true date/datetime type required in destination - // https://debezium.io/documentation/reference/1.4/connectors/sqlserver.html#sqlserver-data-types - // .addExpectedValues("0001-01-01T00:00:00Z", "9999-12-31T00:00:00Z", - // "1999-01-08T00:00:00Z", null) + .addInsertValues("'0001-01-01'", "'9999-12-31'", "'1999-01-08'", "null") + .addExpectedValues("0001-01-01", "9999-12-31", "1999-01-08", null) .createTablePatternSql(CREATE_TABLE_SQL) .build()); @@ -339,11 +340,7 @@ protected void initTests() { .sourceType("smalldatetime") .airbyteType(JsonSchemaPrimitive.STRING) .addInsertValues("'1900-01-01'", "'2079-06-06'", "null") - // TODO: Debezium is returning DATE/DATETIME from mssql as integers (days or milli/micro/nanoseconds - // since the epoch) - // still useable but requires transformation if true date/datetime type required in destination - // https://debezium.io/documentation/reference/1.4/connectors/sqlserver.html#sqlserver-data-types - // .addExpectedValues("1900-01-01T00:00:00Z", "2079-06-06T00:00:00Z", null) + .addExpectedValues("1900-01-01T00:00:00Z", "2079-06-06T00:00:00Z", null) .createTablePatternSql(CREATE_TABLE_SQL) .build()); @@ -352,11 +349,7 @@ protected void initTests() { .sourceType("datetime") .airbyteType(JsonSchemaPrimitive.STRING) .addInsertValues("'1753-01-01'", "'9999-12-31'", "null") - // TODO: Debezium is returning DATE/DATETIME from mssql as integers (days or milli/micro/nanoseconds - // since the epoch) - // still useable but requires transformation if true date/datetime type required in destination - // https://debezium.io/documentation/reference/1.4/connectors/sqlserver.html#sqlserver-data-types - // .addExpectedValues("1753-01-01T00:00:00Z", "9999-12-31T00:00:00Z", null) + .addExpectedValues("1753-01-01T00:00:00Z", "9999-12-31T00:00:00Z", null) .createTablePatternSql(CREATE_TABLE_SQL) .build()); @@ -365,11 +358,7 @@ protected void initTests() { .sourceType("datetime2") .airbyteType(JsonSchemaPrimitive.STRING) .addInsertValues("'0001-01-01'", "'9999-12-31'", "null") - // TODO: Debezium is returning DATE/DATETIME from mssql as integers (days or milli/micro/nanoseconds - // since the epoch) - // still useable but requires transformation if true date/datetime type required in destination - // https://debezium.io/documentation/reference/1.4/connectors/sqlserver.html#sqlserver-data-types - // .addExpectedValues("0001-01-01T00:00:00Z", "9999-12-31T00:00:00Z", null) + .addExpectedValues("0001-01-01T00:00:00Z", "9999-12-31T00:00:00Z", null) .createTablePatternSql(CREATE_TABLE_SQL) .build()); @@ -377,12 +366,9 @@ protected void initTests() { TestDataHolder.builder() .sourceType("time") .airbyteType(JsonSchemaPrimitive.STRING) - .addInsertValues("null") - // TODO: Debezium is returning DATE/DATETIME from mssql as integers (days or milli/micro/nanoseconds - // since the epoch) - // still useable but requires transformation if true date/datetime type required in destination - // https://debezium.io/documentation/reference/1.4/connectors/sqlserver.html#sqlserver-data-types - .addNullExpectedValue() + .addInsertValues("'00:00:00.0000000'", "'23:59:59.9999999'", "'00:00:00'", "'23:58'", + "null") + .addExpectedValues("00:00:00", "23:59:59.9999999", "00:00:00", "23:58:00", null) .createTablePatternSql(CREATE_TABLE_SQL) .build()); @@ -391,9 +377,7 @@ protected void initTests() { .sourceType("datetimeoffset") .airbyteType(JsonSchemaPrimitive.STRING) .addInsertValues("'0001-01-10 00:00:00 +01:00'", "'9999-01-10 00:00:00 +01:00'", "null") - // TODO: BUG - seem to be getting back 0001-01-08T00:00:00+01:00 ... this is clearly wrong - // .addExpectedValues("0001-01-10 00:00:00.0000000 +01:00", - // "9999-01-10 00:00:00.0000000 +01:00", null) + .addExpectedValues("0001-01-10 00:00:00 +01:00", "9999-01-10 00:00:00 +01:00", null) .createTablePatternSql(CREATE_TABLE_SQL) .build()); diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java index 796a7f2a6ff26..fb3690a9c67d5 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java @@ -81,6 +81,11 @@ protected void tearDown(final TestDestinationEnv testEnv) throws Exception { container.close(); } + @Override + public boolean testCatalog() { + return true; + } + @Override protected void initTests() { addDataTypeTestData( @@ -118,7 +123,7 @@ protected void initTests() { addDataTypeTestData( TestDataHolder.builder() .sourceType("bit") - .airbyteType(JsonSchemaPrimitive.NUMBER) + .airbyteType(JsonSchemaPrimitive.BOOLEAN) .addInsertValues("null", "0", "1", "'true'", "'false'") .addExpectedValues(null, "false", "true", "true", "false") .build()); @@ -172,8 +177,6 @@ protected void initTests() { .addExpectedValues("123.0", "1.23456794E9", null) .build()); - // TODO JdbcUtils-> DATE_FORMAT is set as ""yyyy-MM-dd'T'HH:mm:ss'Z'"" so dates would be - // always represented as a datetime with 00:00:00 time addDataTypeTestData( TestDataHolder.builder() .sourceType("date") @@ -208,14 +211,12 @@ protected void initTests() { .addExpectedValues("0001-01-01T00:00:00Z", "9999-12-31T00:00:00Z", null) .build()); - // TODO JdbcUtils-> DATE_FORMAT is set as ""yyyy-MM-dd'T'HH:mm:ss'Z'"" for both Date and Time types. - // So Time only (04:05:06) would be represented like "1970-01-01T04:05:06Z" which is incorrect addDataTypeTestData( TestDataHolder.builder() .sourceType("time") .airbyteType(JsonSchemaPrimitive.STRING) - .addInsertValues("null") - .addNullExpectedValue() + .addInsertValues("null", "'13:00:01'", "'13:00:04Z'") + .addExpectedValues(null, "13:00:01.0000000", "13:00:04.0000000") .build()); addDataTypeTestData( @@ -286,7 +287,7 @@ protected void initTests() { addDataTypeTestData( TestDataHolder.builder() .sourceType("binary") - .airbyteType(JsonSchemaPrimitive.STRING) + .airbyteType(JsonSchemaPrimitive.STRING_BINARY) .addInsertValues("CAST( 'A' AS BINARY(1))", "null") .addExpectedValues("A", null) .build()); @@ -295,12 +296,11 @@ protected void initTests() { TestDataHolder.builder() .sourceType("varbinary") .fullSourceDataType("varbinary(3)") - .airbyteType(JsonSchemaPrimitive.STRING) + .airbyteType(JsonSchemaPrimitive.STRING_BINARY) .addInsertValues("CAST( 'ABC' AS VARBINARY)", "null") .addExpectedValues("ABC", null) .build()); - // TODO BUG: airbyte returns binary representation instead of readable one // create table dbo_1_hierarchyid1 (test_column hierarchyid); // insert dbo_1_hierarchyid1 values ('/1/1/'); // select test_column ,test_column.ToString() AS [Node Text],test_column.GetLevel() [Node Level] @@ -309,10 +309,8 @@ protected void initTests() { TestDataHolder.builder() .sourceType("hierarchyid") .airbyteType(JsonSchemaPrimitive.STRING) - .addInsertValues("null") - .addNullExpectedValue() - // .addInsertValues("null","'/1/1/'") - // .addExpectedValues(null, "/1/1/") + .addInsertValues("'/1/1/'", "null") + .addExpectedValues("/1/1/", null) .build()); addDataTypeTestData( @@ -325,17 +323,14 @@ protected void initTests() { null, "\\xF0\\x9F\\x9A\\x80") .build()); - // TODO BUG: Airbyte returns binary representation instead of text one. // Proper select query example: SELECT test_column.STAsText() from dbo_1_geometry; addDataTypeTestData( TestDataHolder.builder() .sourceType("geometry") .airbyteType(JsonSchemaPrimitive.STRING) - .addInsertValues("null") - .addNullExpectedValue() - // .addInsertValues("geometry::STGeomFromText('LINESTRING (100 100, 20 180, 180 180)', 0)") - // .addExpectedValues("LINESTRING (100 100, 20 180, 180 180)", - // "POLYGON ((0 0, 150 0, 150 150, 0 150, 0 0)", null) + .addInsertValues("geometry::STGeomFromText('LINESTRING (100 100, 20 180, 180 180)', 0)", + "null") + .addExpectedValues("LINESTRING(100 100, 20 180, 180 180)", null) .build()); addDataTypeTestData( @@ -355,17 +350,22 @@ protected void initTests() { .addExpectedValues("1", null, "") .build()); - // TODO BUG: Airbyte returns binary representation instead of text one. // Proper select query example: SELECT test_column.STAsText() from dbo_1_geography; addDataTypeTestData( TestDataHolder.builder() .sourceType("geography") .airbyteType(JsonSchemaPrimitive.STRING) - .addInsertValues("null") - .addNullExpectedValue() - // .addInsertValues("geography::STGeomFromText('LINESTRING(-122.360 47.656, -122.343 47.656 )', - // 4326)") - // .addExpectedValues("LINESTRING(-122.360 47.656, -122.343 47.656 )", null) + .addInsertValues( + "geography::STGeomFromText('LINESTRING(-122.360 47.656, -122.343 47.656 )', 4326)", + "null") + .addExpectedValues("LINESTRING(-122.36 47.656, -122.343 47.656)", null) + .build()); + + // test the case when table is empty, should not crash on pre-flight (get MetaData) sql request + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("hierarchyid") + .airbyteType(JsonSchemaPrimitive.STRING) .build()); } diff --git a/docs/integrations/sources/mssql.md b/docs/integrations/sources/mssql.md index fc7236880cae0..d20309068c259 100644 --- a/docs/integrations/sources/mssql.md +++ b/docs/integrations/sources/mssql.md @@ -294,6 +294,7 @@ If you do not see a type in this list, assume that it is coerced into a string. | Version | Date | Pull Request | Subject | | |:--------| :--- | :--- | :--- | :-- | +| 0.3.13 | 2022-01-07 | [9094](https://github.com/airbytehq/airbyte/pull/9094) | Added support for missed data types | | 0.3.12 | 2021-12-30 | [9206](https://github.com/airbytehq/airbyte/pull/9206) | Update connector fields title/description | | 0.3.11 | 2021-12-24 | [8958](https://github.com/airbytehq/airbyte/pull/8958) | Add support for JdbcType.ARRAY | | 0.3.10 | 2021-12-01 | [8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | | From c071d7322115879927ddfbc346a94714ecbfd4b8 Mon Sep 17 00:00:00 2001 From: Parker Mossman Date: Fri, 7 Jan 2022 14:16:24 -0800 Subject: [PATCH 069/215] specify python3 instead of python3.7 for pre-commit (#9293) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 360ef9ad0e0ba..5bf2849642590 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ default_language_version: - python: python3.7 + python: python3 repos: - repo: https://github.com/johann-petrak/licenseheaders.git From 22ef236ab8fe3a7d76d1278ecbb15ca090070e0a Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Sun, 9 Jan 2022 10:56:54 -0800 Subject: [PATCH 070/215] Refactor and clean up json avro schema converter (#9363) * Default isRootNode to true and rename parameters * Default appendAirbyteFields to true * Rename methods and add javadoc * Simplify namespace generation and delete obtainPaths method * Simplify test case * Add namespace for objects in array * Merge object definitions in type union (need improvement) * Fix merging of record fields * Switch test case to minimize changes * Mark assembler and builder as final variables * Adjust test case * Remove unused code * Keep field doc * Update comment * Merge record schemas recursively * Add nested test case * Add docuementation * Add test case for issue #5564 * Format code --- .../gcs/writer/ProductionWriterFactory.java | 2 +- .../s3/avro/JsonToAvroSchemaConverter.java | 270 ++++++++--- .../destination/s3/util/AvroRecordHelper.java | 35 +- .../s3/writer/ProductionWriterFactory.java | 2 +- .../s3/avro/JsonToAvroConverterTest.java | 4 +- .../json_conversion_test_cases.json | 436 +++++++++++++++--- .../json-avro-conversion.md | 143 ++++++ 7 files changed, 719 insertions(+), 173 deletions(-) diff --git a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java index bd5e03d0b71f5..c33bbf38082ca 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java +++ b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java @@ -41,7 +41,7 @@ public S3Writer create(final GcsDestinationConfig config, return new GcsAvroWriter(config, s3Client, configuredStream, uploadTimestamp, AvroConstants.JSON_CONVERTER, stream.getJsonSchema()); } else { final JsonToAvroSchemaConverter schemaConverter = new JsonToAvroSchemaConverter(); - final Schema avroSchema = schemaConverter.getAvroSchema(stream.getJsonSchema(), stream.getName(), stream.getNamespace(), true, true); + final Schema avroSchema = schemaConverter.getAvroSchema(stream.getJsonSchema(), stream.getName(), stream.getNamespace()); LOGGER.info("Avro schema for stream {}: {}", stream.getName(), avroSchema.toString(false)); return new GcsParquetWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, AvroConstants.JSON_CONVERTER); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java index 77547aefafb23..7b6b4e7220b41 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java @@ -4,8 +4,6 @@ package io.airbyte.integrations.destination.s3.avro; -import static io.airbyte.integrations.destination.s3.util.AvroRecordHelper.obtainPaths; - import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ArrayNode; import com.google.common.base.Preconditions; @@ -14,6 +12,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; @@ -23,7 +23,6 @@ import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; -import org.apache.avro.SchemaBuilder.RecordBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import tech.allegro.schema.json2avro.converter.AdditionalPropertyField; @@ -31,9 +30,7 @@ /** * The main function of this class is to convert a JsonSchema to Avro schema. It can also * standardize schema names, and keep track of a mapping from the original names to the standardized - * ones, which is needed for unit tests. - *

- *

+ * ones, which is needed for unit tests.
* For limitations of this converter, see the README of this connector: * https://docs.airbyte.io/integrations/destinations/s3#avro */ @@ -48,7 +45,6 @@ public class JsonToAvroSchemaConverter { .addToSchema(Schema.create(Schema.Type.LONG)); private final Map standardizedNames = new HashMap<>(); - private final Map jsonNodePathMap = new HashMap<>(); static List getNonNullTypes(final String fieldName, final JsonNode fieldDefinition) { return getTypes(fieldName, fieldDefinition).stream() @@ -96,95 +92,111 @@ public Map getStandardizedNames() { return standardizedNames; } + /** + * @return Avro schema based on the input {@code jsonSchema}. + */ public Schema getAvroSchema(final JsonNode jsonSchema, - final String name, - @Nullable final String namespace, - final boolean appendAirbyteFields, - final boolean isRootNode) { - return getAvroSchema(jsonSchema, name, namespace, appendAirbyteFields, true, true, isRootNode); + final String streamName, + @Nullable final String namespace) { + return getAvroSchema(jsonSchema, streamName, namespace, true, true, true, true); } /** - * @return - Avro schema based on the input {@code jsonSchema}. + * @param appendAirbyteFields Add default airbyte fields (e.g. _airbyte_id) to the output Avro + * schema. + * @param appendExtraProps Add default additional property field to the output Avro schema. + * @param addStringToLogicalTypes Default logical type field to string. + * @param isRootNode Whether it is the root field in the input Json schema. + * @return Avro schema based on the input {@code jsonSchema}. */ public Schema getAvroSchema(final JsonNode jsonSchema, - final String name, - @Nullable final String namespace, + final String fieldName, + @Nullable final String fieldNamespace, final boolean appendAirbyteFields, final boolean appendExtraProps, final boolean addStringToLogicalTypes, final boolean isRootNode) { - final String stdName = AvroConstants.NAME_TRANSFORMER.getIdentifier(name); - RecordBuilder builder = SchemaBuilder.record(stdName); - if (isRootNode) { - obtainPaths("", jsonSchema, jsonNodePathMap); - } - if (!stdName.equals(name)) { - standardizedNames.put(name, stdName); - LOGGER.warn("Schema name contains illegal character(s) and is standardized: {} -> {}", name, + final String stdName = AvroConstants.NAME_TRANSFORMER.getIdentifier(fieldName); + final SchemaBuilder.RecordBuilder builder = SchemaBuilder.record(stdName); + if (!stdName.equals(fieldName)) { + standardizedNames.put(fieldName, stdName); + LOGGER.warn("Schema name contains illegal character(s) and is standardized: {} -> {}", fieldName, stdName); - builder = builder.doc( + builder.doc( String.format("%s%s%s", AvroConstants.DOC_KEY_ORIGINAL_NAME, AvroConstants.DOC_KEY_VALUE_DELIMITER, - name)); + fieldName)); } - if (namespace != null) { - builder = builder.namespace(namespace); + if (fieldNamespace != null) { + builder.namespace(fieldNamespace); } final JsonNode properties = jsonSchema.get("properties"); // object field with no "properties" will be handled by the default additional properties // field during object conversion; so it is fine if there is no "properties" - final List fieldNames = properties == null + final List subfieldNames = properties == null ? Collections.emptyList() : new ArrayList<>(MoreIterators.toList(properties.fieldNames())); - SchemaBuilder.FieldAssembler assembler = builder.fields(); + final SchemaBuilder.FieldAssembler assembler = builder.fields(); if (appendAirbyteFields) { - assembler = assembler.name(JavaBaseConstants.COLUMN_NAME_AB_ID).type(UUID_SCHEMA).noDefault(); - assembler = assembler.name(JavaBaseConstants.COLUMN_NAME_EMITTED_AT) + assembler.name(JavaBaseConstants.COLUMN_NAME_AB_ID).type(UUID_SCHEMA).noDefault(); + assembler.name(JavaBaseConstants.COLUMN_NAME_EMITTED_AT) .type(TIMESTAMP_MILLIS_SCHEMA).noDefault(); } - for (final String fieldName : fieldNames) { + for (final String subfieldName : subfieldNames) { // ignore additional properties fields, which will be consolidated // into one field at the end - if (AvroConstants.JSON_EXTRA_PROPS_FIELDS.contains(fieldName)) { + if (AvroConstants.JSON_EXTRA_PROPS_FIELDS.contains(subfieldName)) { continue; } - final String stdFieldName = AvroConstants.NAME_TRANSFORMER.getIdentifier(fieldName); - final JsonNode fieldDefinition = properties.get(fieldName); - SchemaBuilder.FieldBuilder fieldBuilder = assembler.name(stdFieldName); - if (!stdFieldName.equals(fieldName)) { - standardizedNames.put(fieldName, stdFieldName); + final String stdFieldName = AvroConstants.NAME_TRANSFORMER.getIdentifier(subfieldName); + final JsonNode subfieldDefinition = properties.get(subfieldName); + final SchemaBuilder.FieldBuilder fieldBuilder = assembler.name(stdFieldName); + if (!stdFieldName.equals(subfieldName)) { + standardizedNames.put(subfieldName, stdFieldName); LOGGER.warn("Field name contains illegal character(s) and is standardized: {} -> {}", - fieldName, stdFieldName); - fieldBuilder = fieldBuilder.doc(String.format("%s%s%s", + subfieldName, stdFieldName); + fieldBuilder.doc(String.format("%s%s%s", AvroConstants.DOC_KEY_ORIGINAL_NAME, AvroConstants.DOC_KEY_VALUE_DELIMITER, - fieldName)); + subfieldName)); } - assembler = fieldBuilder.type(getNullableFieldTypes(fieldName, fieldDefinition, appendExtraProps, addStringToLogicalTypes)) + final String subfieldNamespace = isRootNode + // Omit the namespace for root level fields, because it is directly assigned in the builder above. + // This may not be the correct choice. + ? null + : (fieldNamespace == null ? fieldName : (fieldNamespace + "." + fieldName)); + fieldBuilder.type(parseJsonField(subfieldName, subfieldNamespace, subfieldDefinition, appendExtraProps, addStringToLogicalTypes)) .withDefault(null); } if (appendExtraProps) { // support additional properties in one field - assembler = assembler.name(AvroConstants.AVRO_EXTRA_PROPS_FIELD) + assembler.name(AvroConstants.AVRO_EXTRA_PROPS_FIELD) .type(AdditionalPropertyField.FIELD_SCHEMA).withDefault(null); } return assembler.endRecord(); } - Schema getSingleFieldType(final String fieldName, - final JsonSchemaType fieldType, - final JsonNode fieldDefinition, - final boolean appendExtraProps, - final boolean addStringToLogicalTypes) { + /** + * Generate Avro schema for a single Json field type. For example: + * + *
+   * "number" -> ["double"]
+   * 
+ */ + Schema parseSingleType(final String fieldName, + @Nullable final String fieldNamespace, + final JsonSchemaType fieldType, + final JsonNode fieldDefinition, + final boolean appendExtraProps, + final boolean addStringToLogicalTypes) { Preconditions .checkState(fieldType != JsonSchemaType.NULL, "Null types should have been filtered out"); @@ -213,19 +225,21 @@ Schema getSingleFieldType(final String fieldName, case COMBINED -> { final Optional combinedRestriction = getCombinedRestriction(fieldDefinition); final List unionTypes = - getSchemasFromTypes(fieldName, (ArrayNode) combinedRestriction.get(), appendExtraProps, addStringToLogicalTypes); + parseJsonTypeUnion(fieldName, fieldNamespace, (ArrayNode) combinedRestriction.get(), appendExtraProps, addStringToLogicalTypes); fieldSchema = Schema.createUnion(unionTypes); } case ARRAY -> { final JsonNode items = fieldDefinition.get("items"); if (items == null) { - LOGGER.warn("Source connector provided schema for ARRAY with missed \"items\", will assume that it's a String type"); + LOGGER.warn("Array field {} does not specify the items type. It will be assumed to be an array of strings", fieldName); fieldSchema = Schema.createArray(Schema.createUnion(NULL_SCHEMA, STRING_SCHEMA)); } else if (items.isObject()) { fieldSchema = - Schema.createArray(getNullableFieldTypes(String.format("%s.items", fieldName), items, appendExtraProps, addStringToLogicalTypes)); + Schema.createArray( + parseJsonField(String.format("%s.items", fieldName), fieldNamespace, items, appendExtraProps, addStringToLogicalTypes)); } else if (items.isArray()) { - final List arrayElementTypes = getSchemasFromTypes(fieldName, (ArrayNode) items, appendExtraProps, addStringToLogicalTypes); + final List arrayElementTypes = + parseJsonTypeUnion(fieldName, fieldNamespace, (ArrayNode) items, appendExtraProps, addStringToLogicalTypes); arrayElementTypes.add(0, NULL_SCHEMA); fieldSchema = Schema.createArray(Schema.createUnion(arrayElementTypes)); } else { @@ -234,21 +248,34 @@ Schema getSingleFieldType(final String fieldName, } } case OBJECT -> fieldSchema = - getAvroSchema(fieldDefinition, fieldName, jsonNodePathMap.get(fieldDefinition), false, appendExtraProps, addStringToLogicalTypes, false); + getAvroSchema(fieldDefinition, fieldName, fieldNamespace, false, appendExtraProps, addStringToLogicalTypes, false); default -> throw new IllegalStateException( String.format("Unexpected type for field %s: %s", fieldName, fieldType)); } return fieldSchema; } - List getSchemasFromTypes(final String fieldName, - final ArrayNode types, - final boolean appendExtraProps, - final boolean addStringToLogicalTypes) { - return MoreIterators.toList(types.elements()) + /** + * Take in a union of Json field definitions, and generate Avro field schema unions. For example: + * + *
+   * ["number", { ... }] -> ["double", { ... }]
+   * 
+ */ + List parseJsonTypeUnion(final String fieldName, + @Nullable final String fieldNamespace, + final ArrayNode types, + final boolean appendExtraProps, + final boolean addStringToLogicalTypes) { + final List typeList = MoreIterators.toList(types.elements()); + final List schemas = MoreIterators.toList(types.elements()) .stream() .flatMap(definition -> getNonNullTypes(fieldName, definition).stream().flatMap(type -> { - final Schema singleFieldSchema = getSingleFieldType(fieldName, type, definition, appendExtraProps, addStringToLogicalTypes); + final String namespace = fieldNamespace == null + ? fieldName + : fieldNamespace + "." + fieldName; + final Schema singleFieldSchema = parseSingleType(fieldName, namespace, type, definition, appendExtraProps, addStringToLogicalTypes); + if (singleFieldSchema.isUnion()) { return singleFieldSchema.getTypes().stream(); } else { @@ -257,20 +284,133 @@ List getSchemasFromTypes(final String fieldName, })) .distinct() .collect(Collectors.toList()); + + return mergeRecordSchemas(fieldName, fieldNamespace, schemas, appendExtraProps); + } + + /** + * If there are multiple object fields, those fields are combined into one Avro record. This is + * because Avro does not allow specifying a tuple of types (i.e. the first element is type x, the + * second element is type y, and so on). For example, the following Json field types: + * + *
+   * [
+   *   {
+   *     "type": "object",
+   *     "properties": {
+   *       "id": { "type": "integer" }
+   *     }
+   *   },
+   *   {
+   *     "type": "object",
+   *     "properties": {
+   *       "id": { "type": "string" }
+   *       "message": { "type": "string" }
+   *     }
+   *   }
+   * ]
+   * 
+ * + * is converted to this Avro schema: + * + *
+   * {
+   *   "type": "record",
+   *   "fields": [
+   *     { "name": "id", "type": ["int", "string"] },
+   *     { "name": "message", "type": "string" }
+   *   ]
+   * }
+   * 
+ */ + List mergeRecordSchemas(final String fieldName, + @Nullable final String fieldNamespace, + final List schemas, + final boolean appendExtraProps) { + final LinkedHashMap> recordFieldSchemas = new LinkedHashMap<>(); + final Map> recordFieldDocs = new HashMap<>(); + + final List mergedSchemas = schemas.stream() + // gather record schemas to construct a single record schema later on + .peek(schema -> { + if (schema.getType() == Schema.Type.RECORD) { + for (final Schema.Field field : schema.getFields()) { + recordFieldSchemas.putIfAbsent(field.name(), new LinkedList<>()); + recordFieldSchemas.get(field.name()).add(field.schema()); + if (field.doc() != null) { + recordFieldDocs.putIfAbsent(field.name(), new LinkedList<>()); + recordFieldDocs.get(field.name()).add(field.doc()); + } + } + } + }) + // remove record schemas because they will be merged into one + .filter(schema -> schema.getType() != Schema.Type.RECORD) + .collect(Collectors.toList()); + + // create one record schema from all the record fields + if (!recordFieldSchemas.isEmpty()) { + final SchemaBuilder.RecordBuilder builder = SchemaBuilder.record(fieldName); + if (fieldNamespace != null) { + builder.namespace(fieldNamespace); + } + + final SchemaBuilder.FieldAssembler assembler = builder.fields(); + + for (final Map.Entry> entry : recordFieldSchemas.entrySet()) { + final String subfieldName = entry.getKey(); + // ignore additional properties fields, which will be consolidated + // into one field at the end + if (AvroConstants.JSON_EXTRA_PROPS_FIELDS.contains(subfieldName)) { + continue; + } + + final SchemaBuilder.FieldBuilder subfieldBuilder = assembler.name(subfieldName); + final List subfieldDocs = recordFieldDocs.getOrDefault(subfieldName, Collections.emptyList()); + if (!subfieldDocs.isEmpty()) { + subfieldBuilder.doc(String.join("; ", subfieldDocs)); + } + final List subfieldSchemas = entry.getValue().stream() + .flatMap(schema -> schema.getTypes().stream() + // filter out null and add it later on as the first element + .filter(s -> !s.equals(NULL_SCHEMA))) + .distinct() + .collect(Collectors.toList()); + // recursively merge schemas of a subfield because they may include multiple record schemas as well + final List mergedSubfieldSchemas = mergeRecordSchemas(subfieldName, fieldNamespace, subfieldSchemas, appendExtraProps); + mergedSubfieldSchemas.add(0, NULL_SCHEMA); + subfieldBuilder.type(Schema.createUnion(mergedSubfieldSchemas)).withDefault(null); + } + + if (appendExtraProps) { + // add back additional properties + assembler.name(AvroConstants.AVRO_EXTRA_PROPS_FIELD) + .type(AdditionalPropertyField.FIELD_SCHEMA).withDefault(null); + } + mergedSchemas.add(assembler.endRecord()); + } + + return mergedSchemas; } /** - * @param fieldDefinition - Json schema field definition. E.g. { type: "number" }. + * Take in a Json field definition, and generate a nullable Avro field schema. For example: + * + *
+   * {"type": ["number", { ... }]} -> ["null", "double", { ... }]
+   * 
*/ - Schema getNullableFieldTypes(final String fieldName, - final JsonNode fieldDefinition, - final boolean appendExtraProps, - final boolean addStringToLogicalTypes) { + Schema parseJsonField(final String fieldName, + @Nullable final String fieldNamespace, + final JsonNode fieldDefinition, + final boolean appendExtraProps, + final boolean addStringToLogicalTypes) { // Filter out null types, which will be added back in the end. final List nonNullFieldTypes = getNonNullTypes(fieldName, fieldDefinition) .stream() .flatMap(fieldType -> { - final Schema singleFieldSchema = getSingleFieldType(fieldName, fieldType, fieldDefinition, appendExtraProps, addStringToLogicalTypes); + final Schema singleFieldSchema = + parseSingleType(fieldName, fieldNamespace, fieldType, fieldDefinition, appendExtraProps, addStringToLogicalTypes); if (singleFieldSchema.isUnion()) { return singleFieldSchema.getTypes().stream(); } else { diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/util/AvroRecordHelper.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/util/AvroRecordHelper.java index 059d08176d8ec..56fa772159fc8 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/util/AvroRecordHelper.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/util/AvroRecordHelper.java @@ -5,16 +5,11 @@ package io.airbyte.integrations.destination.s3.util; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; import io.airbyte.commons.util.MoreIterators; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.avro.JsonToAvroSchemaConverter; -import java.util.Arrays; -import java.util.Iterator; -import java.util.Map; -import java.util.stream.Collectors; /** * Helper methods for unit tests. This is needed by multiple modules, so it is in the src directory. @@ -23,7 +18,7 @@ public class AvroRecordHelper { public static JsonFieldNameUpdater getFieldNameUpdater(final String streamName, final String namespace, final JsonNode streamSchema) { final JsonToAvroSchemaConverter schemaConverter = new JsonToAvroSchemaConverter(); - schemaConverter.getAvroSchema(streamSchema, streamName, namespace, true, true); + schemaConverter.getAvroSchema(streamSchema, streamName, namespace); return new JsonFieldNameUpdater(schemaConverter.getStandardizedNames()); } @@ -52,32 +47,4 @@ public static JsonNode pruneAirbyteJson(final JsonNode input) { return output; } - public static void obtainPaths(String currentPath, JsonNode jsonNode, Map jsonNodePathMap) { - if (jsonNode.isObject()) { - ObjectNode objectNode = (ObjectNode) jsonNode; - Iterator> iter = objectNode.fields(); - String pathPrefix = currentPath.isEmpty() ? "" : currentPath + "/"; - String[] pathFieldsArray = currentPath.split("/"); - String parent = Arrays.stream(pathFieldsArray) - .filter(x -> !x.equals("items")) - .filter(x -> !x.equals("properties")) - .filter(x -> !x.equals(pathFieldsArray[pathFieldsArray.length - 1])) - .collect(Collectors.joining(".")); - if (!parent.isEmpty()) { - jsonNodePathMap.put(jsonNode, parent); - } - while (iter.hasNext()) { - Map.Entry entry = iter.next(); - obtainPaths(pathPrefix + entry.getKey(), entry.getValue(), jsonNodePathMap); - } - } else if (jsonNode.isArray()) { - ArrayNode arrayNode = (ArrayNode) jsonNode; - - for (int i = 0; i < arrayNode.size(); i++) { - String arrayPath = currentPath + "/" + i; - obtainPaths(arrayPath, arrayNode.get(i), jsonNodePathMap); - } - } - } - } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java index 26c88bee33652..9d09fd5053d95 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java @@ -37,7 +37,7 @@ public S3Writer create(final S3DestinationConfig config, LOGGER.info("Json schema for stream {}: {}", stream.getName(), stream.getJsonSchema()); final JsonToAvroSchemaConverter schemaConverter = new JsonToAvroSchemaConverter(); - final Schema avroSchema = schemaConverter.getAvroSchema(stream.getJsonSchema(), stream.getName(), stream.getNamespace(), true, true); + final Schema avroSchema = schemaConverter.getAvroSchema(stream.getJsonSchema(), stream.getName(), stream.getNamespace()); LOGGER.info("Avro schema for stream {}: {}", stream.getName(), avroSchema.toString(false)); diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java index de7b77d26079c..20ffbdd15c2a4 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java @@ -77,7 +77,7 @@ public Stream provideArguments(final ExtensionContext conte public void testFieldTypeConversion(final String fieldName, final JsonNode jsonFieldSchema, final JsonNode avroFieldType) { assertEquals( avroFieldType, - Jsons.deserialize(SCHEMA_CONVERTER.getNullableFieldTypes(fieldName, jsonFieldSchema, true, true).toString()), + Jsons.deserialize(SCHEMA_CONVERTER.parseJsonField(fieldName, null, jsonFieldSchema, true, true).toString()), String.format("Test for %s failed", fieldName)); } @@ -111,7 +111,7 @@ public void testJsonAvroConversion(final String schemaName, final JsonNode avroSchema, final JsonNode avroObject) throws Exception { - final Schema actualAvroSchema = SCHEMA_CONVERTER.getAvroSchema(jsonSchema, schemaName, namespace, appendAirbyteFields, true); + final Schema actualAvroSchema = SCHEMA_CONVERTER.getAvroSchema(jsonSchema, schemaName, namespace, appendAirbyteFields, true, true, true); assertEquals( avroSchema, Jsons.deserialize(actualAvroSchema.toString()), diff --git a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json index 5a1c8961c862d..92e989ec1ab48 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json +++ b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json @@ -904,8 +904,8 @@ } }, { - "schemaName": "schema_with_the_same_object_names", - "namespace": "namespace17", + "schemaName": "schema_with_same_object_name", + "namespace": "namespace16", "appendAirbyteFields": false, "jsonSchema": { "type": "object", @@ -913,48 +913,35 @@ "author": { "type": "object", "properties": { - "login": { - "type": ["null", "string"] - }, "id": { "type": ["null", "integer"] - }, - "node_id": { - "type": ["null", "string"] } } }, "commit": { "type": ["null", "object"], "properties": { + "message": { + "type": ["null", "string"] + }, "author": { "type": ["null", "object"], "properties": { "name": { "type": ["null", "string"] }, - "email": { - "type": ["null", "string"] - }, - "date": { - "type": ["null", "string"], - "format": "date-time" - }, "pr": { "type": ["null", "object"], "properties": { "id": { "type": ["null", "string"] }, - "title": { + "message": { "type": ["null", "string"] } } } } - }, - "message": { - "type": ["null", "string"] } } } @@ -962,27 +949,23 @@ }, "jsonObject": { "author": { - "login": "test", - "id": 12345, - "node_id": "abc123" + "id": 12345 }, "commit": { "message": "test commit message", "author": { - "name": "Test Author", - "email": "test@example.com", - "date": "2021-01-01T01:01:01+01:00", + "name": "test author", "pr": { "id": "random id", - "title": "Conversion test" + "message": "test pr description" } } } }, "avroSchema": { "type": "record", - "name": "schema_with_the_same_object_names", - "namespace": "namespace17", + "name": "schema_with_same_object_name", + "namespace": "namespace16", "fields": [ { "name": "author", @@ -993,21 +976,11 @@ "name": "author", "namespace": "", "fields": [ - { - "name": "login", - "type": ["null", "string"], - "default": null - }, { "name": "id", "type": ["null", "int"], "default": null }, - { - "name": "node_id", - "type": ["null", "string"], - "default": null - }, { "name": "_airbyte_additional_properties", "type": [ @@ -1033,6 +1006,11 @@ "name": "commit", "namespace": "", "fields": [ + { + "name": "message", + "type": ["null", "string"], + "default": null + }, { "name": "author", "type": [ @@ -1047,23 +1025,6 @@ "type": ["null", "string"], "default": null }, - { - "name": "email", - "type": ["null", "string"], - "default": null - }, - { - "name": "date", - "type": [ - "null", - { - "type": "long", - "logicalType": "timestamp-micros" - }, - "string" - ], - "default": null - }, { "name": "pr", "type": [ @@ -1079,7 +1040,7 @@ "default": null }, { - "name": "title", + "name": "message", "type": ["null", "string"], "default": null }, @@ -1115,11 +1076,6 @@ ], "default": null }, - { - "name": "message", - "type": ["null", "string"], - "default": null - }, { "name": "_airbyte_additional_properties", "type": [ @@ -1151,24 +1107,20 @@ }, "avroObject": { "author": { - "login": "test", "id": 12345, - "node_id": "abc123", "_airbyte_additional_properties": null }, "commit": { + "message": "test commit message", "author": { - "name": "Test Author", - "email": "test@example.com", - "date": 1609459261000000, + "name": "test author", "pr": { "id": "random id", - "title": "Conversion test", + "message": "test pr description", "_airbyte_additional_properties": null }, "_airbyte_additional_properties": null }, - "message": "test commit message", "_airbyte_additional_properties": null }, "_airbyte_additional_properties": null @@ -1176,7 +1128,7 @@ }, { "schemaName": "array_without_items_in_schema", - "namespace": "namespace16", + "namespace": "namespace17", "appendAirbyteFields": false, "jsonSchema": { "type": "object", @@ -1192,7 +1144,7 @@ "avroSchema": { "type": "record", "name": "array_without_items_in_schema", - "namespace": "namespace16", + "namespace": "namespace17", "fields": [ { "name": "identifier", @@ -1216,5 +1168,349 @@ "identifier": ["151", "152", "true", "{\"id\":153}"], "_airbyte_additional_properties": null } + }, + { + "schemaName": "array_with_same_object_name", + "namespace": "namespace18", + "appendAirbyteFields": false, + "jsonSchema": { + "properties": { + "parent_object": { + "type": "object", + "properties": { + "object_array": { + "type": "array", + "items": [ + { "type": "integer" }, + { "type": "boolean" }, + { + "type": "object", + "properties": { + "id": { + "type": "object", + "properties": { + "id_part_1": { + "type": "integer" + }, + "id_part_2": { + "type": "string" + } + } + } + } + }, + { + "type": "object", + "properties": { + "id": { + "type": "object", + "properties": { + "id_part_1": { + "type": "string" + }, + "id_part_2": { + "type": "integer" + } + } + }, + ":message": { + "type": "string" + } + } + } + ] + } + } + } + } + }, + "jsonObject": { + "parent_object": { + "object_array": [ + 1234, + true, + { + "id": { + "id_part_1": 1000, + "id_part_2": "abcde" + } + }, + { + "id": { + "id_part_1": "wxyz", + "id_part_2": 2000 + }, + ":message": "test message" + } + ] + } + }, + "avroSchema": { + "type": "record", + "name": "array_with_same_object_name", + "namespace": "namespace18", + "fields": [ + { + "name": "parent_object", + "type": [ + "null", + { + "type": "record", + "name": "parent_object", + "namespace": "", + "fields": [ + { + "name": "object_array", + "type": [ + "null", + { + "type": "array", + "items": [ + "null", + "int", + "boolean", + { + "type": "record", + "name": "object_array", + "namespace": "parent_object", + "fields": [ + { + "name": "id", + "type": [ + "null", + { + "type": "record", + "name": "id", + "fields": [ + { + "name": "id_part_1", + "type": ["null", "int", "string"], + "default": null + }, + { + "name": "id_part_2", + "type": ["null", "string", "int"], + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": [ + "null", + { + "type": "map", + "values": "string" + } + ], + "default": null + } + ] + } + ], + "default": null + }, + { + "name": "_message", + "type": ["null", "string"], + "doc": "_airbyte_original_name::message", + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": [ + "null", + { + "type": "map", + "values": "string" + } + ], + "default": null + } + ] + } + ] + } + ], + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": [ + "null", + { + "type": "map", + "values": "string" + } + ], + "default": null + } + ] + } + ], + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": [ + "null", + { + "type": "map", + "values": "string" + } + ], + "default": null + } + ] + }, + "avroObject": { + "parent_object": { + "object_array": [ + 1234, + true, + { + "id": { + "id_part_1": 1000, + "id_part_2": "abcde", + "_airbyte_additional_properties": null + }, + "_message": null, + "_airbyte_additional_properties": null + }, + { + "id": { + "id_part_1": "wxyz", + "id_part_2": 2000, + "_airbyte_additional_properties": null + }, + "_message": "test message", + "_airbyte_additional_properties": null + } + ], + "_airbyte_additional_properties": null + }, + "_airbyte_additional_properties": null + } + }, + { + "schemaName": "object_inside_array_inside_array", + "namespace": "namespace19", + "appendAirbyteFields": false, + "jsonSchema": { + "type": "object", + "properties": { + "filters": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "filterFamily": { + "type": ["null", "string"] + } + } + } + } + } + } + }, + "avroSchema": { + "type": "record", + "name": "object_inside_array_inside_array", + "namespace": "namespace19", + "fields": [ + { + "name": "filters", + "type": [ + "null", + { + "type": "array", + "items": [ + "null", + { + "type": "array", + "items": [ + "null", + { + "type": "record", + "name": "filters_items_items", + "namespace": "", + "doc": "_airbyte_original_name:filters.items.items", + "fields": [ + { + "name": "filterFamily", + "type": ["null", "string"], + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": [ + "null", + { + "type": "map", + "values": "string" + } + ], + "default": null + } + ] + } + ] + } + ] + } + ], + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": [ + "null", + { + "type": "map", + "values": "string" + } + ], + "default": null + } + ] + }, + "jsonObject": { + "filters": [ + [ + { + "filterFamily": "type_a" + }, + { + "filterFamily": "type_b" + } + ], + [ + { + "filterFamily": "type_b" + } + ] + ] + }, + "avroObject": { + "filters": [ + [ + { + "filterFamily": "type_a", + "_airbyte_additional_properties": null + }, + { + "filterFamily": "type_b", + "_airbyte_additional_properties": null + } + ], + [ + { + "filterFamily": "type_b", + "_airbyte_additional_properties": null + } + ] + ], + "_airbyte_additional_properties": null + } } ] diff --git a/docs/understanding-airbyte/json-avro-conversion.md b/docs/understanding-airbyte/json-avro-conversion.md index 306c8b4a59b28..d5e48d3f6bb7f 100644 --- a/docs/understanding-airbyte/json-avro-conversion.md +++ b/docs/understanding-airbyte/json-avro-conversion.md @@ -161,6 +161,149 @@ This is not supported in Avro schema. As a compromise, the converter creates a u } ``` +If the Json array has multiple object items, these objects will be recursively merged into one Avro record. For example, the following Json array expects two different objects, each with a different `id` field. + +Json schema: + +```json +{ + "array_field": { + "type": "array", + "items": [ + { + "type": "object", + "properties": { + "id": { + "type": "object", + "properties": { + "id_part_1": { "type": "integer" }, + "id_part_2": { "type": "string" } + } + } + } + }, + { + "type": "object", + "properties": { + "id": { + "type": "object", + "properties": { + "id_part_1": { "type": "string" }, + "id_part_2": { "type": "integer" } + } + }, + "message": { + "type": "string" + } + } + } + ] + } +} +``` + +Json object: + +```json +{ + "array_field": [ + { + "id": { + "id_part_1": 1000, + "id_part_2": "abcde" + } + }, { + "id": { + "id_part_1": "wxyz", + "id_part_2": 2000 + }, + "message": "test message" + } + ] +} +``` + +Furthermore, the fields under the `id` record, `id_part_1` and `id_part_2`, will also have their schemas merged. + +Avro schema: + +```json +{ + "name": "array_field", + "type": [ + "null", + { + "type": "array", + "items": [ + "boolean", + { + "type": "record", + "name": "array_field", + "fields": [ + { + "name": "id", + "type": [ + "null", + { + "type": "record", + "name": "id", + "fields": [ + { + "name": "id_part_1", + "type": ["null", "int", "string"], + "default": null + }, + { + "name": "id_part_2", + "type": ["null", "string", "int"], + "default": null + } + ] + } + ], + "default": null + }, + { + "name": "message", + "type": ["null", "string"], + "default": null + } + ] + } + ] + } + ], + "default": null +} +``` + +Note that `id_part_1` is a union of `int` and `string`, which comes from the first and second `id` definitions, respectively, in the original Json `items` specification. + +Avro object: + +```json +{ + "array_field": [ + { + "id": { + "id_part_1": 1000, + "id_part_2": "abcde" + }, + "message": null + }, + { + "id": { + "id_part_1": "wxyz", + "id_part_2": 2000 + }, + "message": "test message" + } + ] +} +``` + +Note that the first object in `array_field` originally does not have a `message` field. However, because its schema is merged with the second object definition, it has a null `message` field in the Avro record. + ### Untyped Array When a Json array field has no `items`, the element in that array field may have any type. However, Avro requires that each array has a clear type specification. To solve this problem, the elements in the array are forced to be `string`s. From e556697141cb2be7105f4b5253cbaa5f496103f3 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Sun, 9 Jan 2022 16:42:19 -0800 Subject: [PATCH 071/215] add ability to inject environment variables globally into launched processes (#9329) * add global process factory env var injection support * add envconfig support * fix commented out piece * use prefix instead of json map * clean up constructor and add javadocs --- .../main/java/io/airbyte/config/Configs.java | 7 + .../java/io/airbyte/config/EnvConfigs.java | 25 +++- .../io/airbyte/config/EnvConfigsTest.java | 120 ++++++++++-------- .../ContainerOrchestratorApp.java | 2 +- .../io/airbyte/workers/WorkerConfigs.java | 6 + .../workers/process/DockerProcessFactory.java | 6 + .../workers/process/KubePodProcess.java | 12 +- .../workers/process/KubeProcessFactory.java | 1 + .../temporal/sync/OrchestratorConstants.java | 1 + .../KubePodProcessIntegrationTest.java | 30 ++++- .../AirbyteIntegrationLauncherTest.java | 8 +- .../process/DockerProcessFactoryTest.java | 48 +++++++ 12 files changed, 201 insertions(+), 65 deletions(-) diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java index 2a7e10cf26a78..297744faaf138 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java @@ -228,6 +228,13 @@ public interface Configs { */ String getJobMainContainerMemoryLimit(); + /** + * Defines a default map of environment variables to use for any launched job containers. The + * expected format is a JSON encoded String -> String map. Make sure to escape properly. Defaults to + * an empty map. + */ + Map getJobDefaultEnvMap(); + // Jobs - Kube only /** * Define one or more Job pod tolerations. Tolerations are separated by ';'. Each toleration diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java index e5ab6f06a5fa7..065c50963f902 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java @@ -23,6 +23,7 @@ import java.util.Optional; import java.util.Set; import java.util.function.Function; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; import org.slf4j.Logger; @@ -77,6 +78,8 @@ public class EnvConfigs implements Configs { public static final String JOB_MAIN_CONTAINER_CPU_LIMIT = "JOB_MAIN_CONTAINER_CPU_LIMIT"; public static final String JOB_MAIN_CONTAINER_MEMORY_REQUEST = "JOB_MAIN_CONTAINER_MEMORY_REQUEST"; public static final String JOB_MAIN_CONTAINER_MEMORY_LIMIT = "JOB_MAIN_CONTAINER_MEMORY_LIMIT"; + public static final String JOB_DEFAULT_ENV_MAP = "JOB_DEFAULT_ENV_MAP"; + public static final String JOB_DEFAULT_ENV_PREFIX = "JOB_DEFAULT_ENV_"; private static final String SECRET_PERSISTENCE = "SECRET_PERSISTENCE"; public static final String JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_SECRET = "JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_SECRET"; private static final String PUBLISH_METRICS = "PUBLISH_METRICS"; @@ -121,15 +124,24 @@ public class EnvConfigs implements Configs { public static final String DEFAULT_NETWORK = "host"; private final Function getEnv; + private final Supplier> getAllEnvKeys; private final LogConfigs logConfigs; private final CloudStorageConfigs stateStorageCloudConfigs; + /** + * Constructs {@link EnvConfigs} from actual environment variables. + */ public EnvConfigs() { - this(System::getenv); + this(System.getenv()); } - public EnvConfigs(final Function getEnv) { - this.getEnv = getEnv; + /** + * Constructs {@link EnvConfigs} from a provided map. This can be used for testing or getting + * variables from a non-envvar source. + */ + public EnvConfigs(final Map envMap) { + this.getEnv = envMap::get; + this.getAllEnvKeys = envMap::keySet; this.logConfigs = new LogConfigs(getLogConfiguration().orElse(null)); this.stateStorageCloudConfigs = getStateStorageConfiguration().orElse(null); } @@ -481,6 +493,13 @@ public String getJobMainContainerMemoryLimit() { return getEnvOrDefault(JOB_MAIN_CONTAINER_MEMORY_LIMIT, DEFAULT_JOB_MEMORY_REQUIREMENT); } + @Override + public Map getJobDefaultEnvMap() { + return getAllEnvKeys.get().stream() + .filter(key -> key.startsWith(JOB_DEFAULT_ENV_PREFIX)) + .collect(Collectors.toMap(key -> key.replace(JOB_DEFAULT_ENV_PREFIX, ""), getEnv)); + } + @Override public LogConfigs getLogConfigs() { return logConfigs; diff --git a/airbyte-config/models/src/test/java/io/airbyte/config/EnvConfigsTest.java b/airbyte-config/models/src/test/java/io/airbyte/config/EnvConfigsTest.java index 726ea236c314e..717096c864266 100644 --- a/airbyte-config/models/src/test/java/io/airbyte/config/EnvConfigsTest.java +++ b/airbyte-config/models/src/test/java/io/airbyte/config/EnvConfigsTest.java @@ -4,28 +4,24 @@ package io.airbyte.config; -import static org.mockito.Mockito.when; - import io.airbyte.commons.version.AirbyteVersion; import java.nio.file.Paths; +import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.function.Function; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.mockito.Mockito; class EnvConfigsTest { - private Function function; + private Map envMap; private EnvConfigs config; - @SuppressWarnings("unchecked") @BeforeEach void setUp() { - function = Mockito.mock(Function.class); - config = new EnvConfigs(function); + envMap = new HashMap<>(); + config = new EnvConfigs(envMap); } @Test @@ -35,152 +31,152 @@ void ensureGetEnvBehavior() { @Test void testAirbyteRole() { - when(function.apply(EnvConfigs.AIRBYTE_ROLE)).thenReturn(null); + envMap.put(EnvConfigs.AIRBYTE_ROLE, null); Assertions.assertNull(config.getAirbyteRole()); - when(function.apply(EnvConfigs.AIRBYTE_ROLE)).thenReturn("dev"); + envMap.put(EnvConfigs.AIRBYTE_ROLE, "dev"); Assertions.assertEquals("dev", config.getAirbyteRole()); } @Test void testAirbyteVersion() { - when(function.apply(EnvConfigs.AIRBYTE_VERSION)).thenReturn(null); + envMap.put(EnvConfigs.AIRBYTE_VERSION, null); Assertions.assertThrows(IllegalArgumentException.class, () -> config.getAirbyteVersion()); - when(function.apply(EnvConfigs.AIRBYTE_VERSION)).thenReturn("dev"); + envMap.put(EnvConfigs.AIRBYTE_VERSION, "dev"); Assertions.assertEquals(new AirbyteVersion("dev"), config.getAirbyteVersion()); } @Test void testWorkspaceRoot() { - when(function.apply(EnvConfigs.WORKSPACE_ROOT)).thenReturn(null); + envMap.put(EnvConfigs.WORKSPACE_ROOT, null); Assertions.assertThrows(IllegalArgumentException.class, () -> config.getWorkspaceRoot()); - when(function.apply(EnvConfigs.WORKSPACE_ROOT)).thenReturn("abc/def"); + envMap.put(EnvConfigs.WORKSPACE_ROOT, "abc/def"); Assertions.assertEquals(Paths.get("abc/def"), config.getWorkspaceRoot()); } @Test void testLocalRoot() { - when(function.apply(EnvConfigs.LOCAL_ROOT)).thenReturn(null); + envMap.put(EnvConfigs.LOCAL_ROOT, null); Assertions.assertThrows(IllegalArgumentException.class, () -> config.getLocalRoot()); - when(function.apply(EnvConfigs.LOCAL_ROOT)).thenReturn("abc/def"); + envMap.put(EnvConfigs.LOCAL_ROOT, "abc/def"); Assertions.assertEquals(Paths.get("abc/def"), config.getLocalRoot()); } @Test void testConfigRoot() { - when(function.apply(EnvConfigs.CONFIG_ROOT)).thenReturn(null); + envMap.put(EnvConfigs.CONFIG_ROOT, null); Assertions.assertThrows(IllegalArgumentException.class, () -> config.getConfigRoot()); - when(function.apply(EnvConfigs.CONFIG_ROOT)).thenReturn("a/b"); + envMap.put(EnvConfigs.CONFIG_ROOT, "a/b"); Assertions.assertEquals(Paths.get("a/b"), config.getConfigRoot()); } @Test void testGetDatabaseUser() { - when(function.apply(EnvConfigs.DATABASE_USER)).thenReturn(null); + envMap.put(EnvConfigs.DATABASE_USER, null); Assertions.assertThrows(IllegalArgumentException.class, () -> config.getDatabaseUser()); - when(function.apply(EnvConfigs.DATABASE_USER)).thenReturn("user"); + envMap.put(EnvConfigs.DATABASE_USER, "user"); Assertions.assertEquals("user", config.getDatabaseUser()); } @Test void testGetDatabasePassword() { - when(function.apply(EnvConfigs.DATABASE_PASSWORD)).thenReturn(null); + envMap.put(EnvConfigs.DATABASE_PASSWORD, null); Assertions.assertThrows(IllegalArgumentException.class, () -> config.getDatabasePassword()); - when(function.apply(EnvConfigs.DATABASE_PASSWORD)).thenReturn("password"); + envMap.put(EnvConfigs.DATABASE_PASSWORD, "password"); Assertions.assertEquals("password", config.getDatabasePassword()); } @Test void testGetDatabaseUrl() { - when(function.apply(EnvConfigs.DATABASE_URL)).thenReturn(null); + envMap.put(EnvConfigs.DATABASE_URL, null); Assertions.assertThrows(IllegalArgumentException.class, () -> config.getDatabaseUrl()); - when(function.apply(EnvConfigs.DATABASE_URL)).thenReturn("url"); + envMap.put(EnvConfigs.DATABASE_URL, "url"); Assertions.assertEquals("url", config.getDatabaseUrl()); } @Test void testGetWorkspaceDockerMount() { - when(function.apply(EnvConfigs.WORKSPACE_DOCKER_MOUNT)).thenReturn(null); - when(function.apply(EnvConfigs.WORKSPACE_ROOT)).thenReturn("abc/def"); + envMap.put(EnvConfigs.WORKSPACE_DOCKER_MOUNT, null); + envMap.put(EnvConfigs.WORKSPACE_ROOT, "abc/def"); Assertions.assertEquals("abc/def", config.getWorkspaceDockerMount()); - when(function.apply(EnvConfigs.WORKSPACE_DOCKER_MOUNT)).thenReturn("root"); - when(function.apply(EnvConfigs.WORKSPACE_ROOT)).thenReturn("abc/def"); + envMap.put(EnvConfigs.WORKSPACE_DOCKER_MOUNT, "root"); + envMap.put(EnvConfigs.WORKSPACE_ROOT, "abc/def"); Assertions.assertEquals("root", config.getWorkspaceDockerMount()); - when(function.apply(EnvConfigs.WORKSPACE_DOCKER_MOUNT)).thenReturn(null); - when(function.apply(EnvConfigs.WORKSPACE_ROOT)).thenReturn(null); + envMap.put(EnvConfigs.WORKSPACE_DOCKER_MOUNT, null); + envMap.put(EnvConfigs.WORKSPACE_ROOT, null); Assertions.assertThrows(IllegalArgumentException.class, () -> config.getWorkspaceDockerMount()); } @Test void testGetLocalDockerMount() { - when(function.apply(EnvConfigs.LOCAL_DOCKER_MOUNT)).thenReturn(null); - when(function.apply(EnvConfigs.LOCAL_ROOT)).thenReturn("abc/def"); + envMap.put(EnvConfigs.LOCAL_DOCKER_MOUNT, null); + envMap.put(EnvConfigs.LOCAL_ROOT, "abc/def"); Assertions.assertEquals("abc/def", config.getLocalDockerMount()); - when(function.apply(EnvConfigs.LOCAL_DOCKER_MOUNT)).thenReturn("root"); - when(function.apply(EnvConfigs.LOCAL_ROOT)).thenReturn("abc/def"); + envMap.put(EnvConfigs.LOCAL_DOCKER_MOUNT, "root"); + envMap.put(EnvConfigs.LOCAL_ROOT, "abc/def"); Assertions.assertEquals("root", config.getLocalDockerMount()); - when(function.apply(EnvConfigs.LOCAL_DOCKER_MOUNT)).thenReturn(null); - when(function.apply(EnvConfigs.LOCAL_ROOT)).thenReturn(null); + envMap.put(EnvConfigs.LOCAL_DOCKER_MOUNT, null); + envMap.put(EnvConfigs.LOCAL_ROOT, null); Assertions.assertThrows(IllegalArgumentException.class, () -> config.getLocalDockerMount()); } @Test void testDockerNetwork() { - when(function.apply(EnvConfigs.DOCKER_NETWORK)).thenReturn(null); + envMap.put(EnvConfigs.DOCKER_NETWORK, null); Assertions.assertEquals("host", config.getDockerNetwork()); - when(function.apply(EnvConfigs.DOCKER_NETWORK)).thenReturn("abc"); + envMap.put(EnvConfigs.DOCKER_NETWORK, "abc"); Assertions.assertEquals("abc", config.getDockerNetwork()); } @Test void testTrackingStrategy() { - when(function.apply(EnvConfigs.TRACKING_STRATEGY)).thenReturn(null); + envMap.put(EnvConfigs.TRACKING_STRATEGY, null); Assertions.assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); - when(function.apply(EnvConfigs.TRACKING_STRATEGY)).thenReturn("abc"); + envMap.put(EnvConfigs.TRACKING_STRATEGY, "abc"); Assertions.assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); - when(function.apply(EnvConfigs.TRACKING_STRATEGY)).thenReturn("logging"); + envMap.put(EnvConfigs.TRACKING_STRATEGY, "logging"); Assertions.assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); - when(function.apply(EnvConfigs.TRACKING_STRATEGY)).thenReturn("segment"); + envMap.put(EnvConfigs.TRACKING_STRATEGY, "segment"); Assertions.assertEquals(Configs.TrackingStrategy.SEGMENT, config.getTrackingStrategy()); - when(function.apply(EnvConfigs.TRACKING_STRATEGY)).thenReturn("LOGGING"); + envMap.put(EnvConfigs.TRACKING_STRATEGY, "LOGGING"); Assertions.assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); } @Test void testworkerKubeTolerations() { - when(function.apply(EnvConfigs.JOB_KUBE_TOLERATIONS)).thenReturn(null); + envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, null); Assertions.assertEquals(config.getJobKubeTolerations(), List.of()); - when(function.apply(EnvConfigs.JOB_KUBE_TOLERATIONS)).thenReturn(";;;"); + envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, ";;;"); Assertions.assertEquals(config.getJobKubeTolerations(), List.of()); - when(function.apply(EnvConfigs.JOB_KUBE_TOLERATIONS)).thenReturn("key=k,value=v;"); + envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, "key=k,value=v;"); Assertions.assertEquals(config.getJobKubeTolerations(), List.of()); - when(function.apply(EnvConfigs.JOB_KUBE_TOLERATIONS)).thenReturn("key=airbyte-server,operator=Exists,effect=NoSchedule"); + envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, "key=airbyte-server,operator=Exists,effect=NoSchedule"); Assertions.assertEquals(config.getJobKubeTolerations(), List.of(new TolerationPOJO("airbyte-server", "NoSchedule", null, "Exists"))); - when(function.apply(EnvConfigs.JOB_KUBE_TOLERATIONS)).thenReturn("key=airbyte-server,operator=Equals,value=true,effect=NoSchedule"); + envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, "key=airbyte-server,operator=Equals,value=true,effect=NoSchedule"); Assertions.assertEquals(config.getJobKubeTolerations(), List.of(new TolerationPOJO("airbyte-server", "NoSchedule", "true", "Equals"))); - when(function.apply(EnvConfigs.JOB_KUBE_TOLERATIONS)) - .thenReturn("key=airbyte-server,operator=Exists,effect=NoSchedule;key=airbyte-server,operator=Equals,value=true,effect=NoSchedule"); + envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, + "key=airbyte-server,operator=Exists,effect=NoSchedule;key=airbyte-server,operator=Equals,value=true,effect=NoSchedule"); Assertions.assertEquals(config.getJobKubeTolerations(), List.of( new TolerationPOJO("airbyte-server", "NoSchedule", null, "Exists"), new TolerationPOJO("airbyte-server", "NoSchedule", "true", "Equals"))); @@ -188,20 +184,34 @@ void testworkerKubeTolerations() { @Test void testworkerKubeNodeSelectors() { - when(function.apply(EnvConfigs.JOB_KUBE_NODE_SELECTORS)).thenReturn(null); + envMap.put(EnvConfigs.JOB_KUBE_NODE_SELECTORS, null); Assertions.assertEquals(config.getJobKubeNodeSelectors(), Map.of()); - when(function.apply(EnvConfigs.JOB_KUBE_NODE_SELECTORS)).thenReturn(",,,"); + envMap.put(EnvConfigs.JOB_KUBE_NODE_SELECTORS, ",,,"); Assertions.assertEquals(config.getJobKubeNodeSelectors(), Map.of()); - when(function.apply(EnvConfigs.JOB_KUBE_NODE_SELECTORS)).thenReturn("key=k,,;$%&^#"); + envMap.put(EnvConfigs.JOB_KUBE_NODE_SELECTORS, "key=k,,;$%&^#"); Assertions.assertEquals(config.getJobKubeNodeSelectors(), Map.of("key", "k")); - when(function.apply(EnvConfigs.JOB_KUBE_NODE_SELECTORS)).thenReturn("one=two"); + envMap.put(EnvConfigs.JOB_KUBE_NODE_SELECTORS, "one=two"); Assertions.assertEquals(config.getJobKubeNodeSelectors(), Map.of("one", "two")); - when(function.apply(EnvConfigs.JOB_KUBE_NODE_SELECTORS)).thenReturn("airbyte=server,something=nothing"); + envMap.put(EnvConfigs.JOB_KUBE_NODE_SELECTORS, "airbyte=server,something=nothing"); Assertions.assertEquals(config.getJobKubeNodeSelectors(), Map.of("airbyte", "server", "something", "nothing")); } + @Test + void testEmptyEnvMapRetrieval() { + Assertions.assertEquals(Map.of(), config.getJobDefaultEnvMap()); + } + + @Test + void testEnvMapRetrieval() { + envMap.put(EnvConfigs.JOB_DEFAULT_ENV_PREFIX + "ENV1", "VAL1"); + envMap.put(EnvConfigs.JOB_DEFAULT_ENV_PREFIX + "ENV2", "VAL\"2WithQuotesand$ymbols"); + + final var expected = Map.of("ENV1", "VAL1", "ENV2", "VAL\"2WithQuotesand$ymbols"); + Assertions.assertEquals(expected, config.getJobDefaultEnvMap()); + } + } diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java index 833233c0d315a..c75b0e3a65d64 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java @@ -50,7 +50,7 @@ public static void main(final String[] args) throws Exception { final Map envMap = (Map) Jsons.deserialize(Files.readString(Path.of(OrchestratorConstants.INIT_FILE_ENV_MAP)), Map.class); - final Configs configs = new EnvConfigs(envMap::get); + final Configs configs = new EnvConfigs(envMap); heartbeatServer = new WorkerHeartbeatServer(WorkerApp.KUBE_HEARTBEAT_PORT); heartbeatServer.startBackground(); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerConfigs.java b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerConfigs.java index 24dc126f40186..4dcd1d383d9c1 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerConfigs.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerConfigs.java @@ -21,6 +21,7 @@ public class WorkerConfigs { private final String jobSocatImage; private final String jobBusyboxImage; private final String jobCurlImage; + private final Map envMap; public WorkerConfigs(final Configs configs) { this.workerEnvironment = configs.getWorkerEnvironment(); @@ -36,6 +37,7 @@ public WorkerConfigs(final Configs configs) { this.jobSocatImage = configs.getJobKubeSocatImage(); this.jobBusyboxImage = configs.getJobKubeBusyboxImage(); this.jobCurlImage = configs.getJobKubeCurlImage(); + this.envMap = configs.getJobDefaultEnvMap(); } public Configs.WorkerEnvironment getWorkerEnvironment() { @@ -74,4 +76,8 @@ public String getJobCurlImage() { return jobCurlImage; } + public Map getEnvMap() { + return envMap; + } + } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java index bfe3c01580404..62ff643d00b88 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java @@ -146,6 +146,12 @@ public Process create(final String jobId, "--log-driver", "none"); } + + for (final var envEntry : workerConfigs.getEnvMap().entrySet()) { + cmd.add("-e"); + cmd.add(envEntry.getKey() + "=" + envEntry.getValue()); + } + if (!Strings.isNullOrEmpty(entrypoint)) { cmd.add("--entrypoint"); cmd.add(entrypoint); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index 0f5d0668d8f49..0274f42126e54 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -16,6 +16,7 @@ import io.fabric8.kubernetes.api.model.ContainerPortBuilder; import io.fabric8.kubernetes.api.model.ContainerStatus; import io.fabric8.kubernetes.api.model.DeletionPropagation; +import io.fabric8.kubernetes.api.model.EnvVar; import io.fabric8.kubernetes.api.model.LocalObjectReference; import io.fabric8.kubernetes.api.model.Pod; import io.fabric8.kubernetes.api.model.PodBuilder; @@ -134,7 +135,6 @@ public class KubePodProcess extends Process { private final Duration statusCheckInterval; private final int stdoutLocalPort; private final ServerSocket stderrServerSocket; - private final Map internalToExternalPorts; private final int stderrLocalPort; private final ExecutorService executorService; @@ -173,6 +173,7 @@ private static Container getMain(final String image, final List mainVolumeMounts, final ResourceRequirements resourceRequirements, final Map internalToExternalPorts, + final Map envMap, final String[] args) throws IOException { final var argsStr = String.join(" ", args); @@ -196,12 +197,17 @@ private static Container getMain(final String image, .build()) .collect(Collectors.toList()); + final List envVars = envMap.entrySet().stream() + .map(entry -> new EnvVar(entry.getKey(), entry.getValue(), null)) + .collect(Collectors.toList()); + final ContainerBuilder containerBuilder = new ContainerBuilder() .withName("main") .withPorts(containerPorts) .withImage(image) .withImagePullPolicy(imagePullPolicy) .withCommand("sh", "-c", mainCommand) + .withEnv(envVars) .withWorkingDir(CONFIG_DIR) .withVolumeMounts(mainVolumeMounts); @@ -333,6 +339,7 @@ public KubePodProcess(final boolean isOrchestrator, final String socatImage, final String busyboxImage, final String curlImage, + final Map envMap, final Map internalToExternalPorts, final String... args) throws IOException, InterruptedException { @@ -340,10 +347,8 @@ public KubePodProcess(final boolean isOrchestrator, this.statusCheckInterval = statusCheckInterval; this.stdoutLocalPort = stdoutLocalPort; this.stderrLocalPort = stderrLocalPort; - this.stdoutServerSocket = new ServerSocket(stdoutLocalPort); this.stderrServerSocket = new ServerSocket(stderrLocalPort); - this.internalToExternalPorts = internalToExternalPorts; this.executorService = Executors.newFixedThreadPool(2); setupStdOutAndStdErrListeners(); @@ -394,6 +399,7 @@ public KubePodProcess(final boolean isOrchestrator, List.of(pipeVolumeMount, configVolumeMount, terminationVolumeMount), resourceRequirements, internalToExternalPorts, + envMap, args); final io.fabric8.kubernetes.api.model.ResourceRequirements sidecarResources = getResourceRequirementsBuilder(DEFAULT_SIDECAR_RESOURCES).build(); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessFactory.java index 89336507d605b..5323d9e2e4e78 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessFactory.java @@ -147,6 +147,7 @@ public Process create(final String jobId, workerConfigs.getJobSocatImage(), workerConfigs.getJobBusyboxImage(), workerConfigs.getJobCurlImage(), + workerConfigs.getEnvMap(), internalToExternalPorts, args); } catch (final Exception e) { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java index b80da9d36f3c0..ce7b456faf71a 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java @@ -28,6 +28,7 @@ public class OrchestratorConstants { EnvConfigs.JOB_MAIN_CONTAINER_CPU_LIMIT, EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_REQUEST, EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_LIMIT, + EnvConfigs.JOB_DEFAULT_ENV_MAP, EnvConfigs.LOCAL_ROOT); public static final String INIT_FILE_ENV_MAP = "envMap.json"; diff --git a/airbyte-workers/src/test-integration/java/io/airbyte/workers/process/KubePodProcessIntegrationTest.java b/airbyte-workers/src/test-integration/java/io/airbyte/workers/process/KubePodProcessIntegrationTest.java index f2990be3161b0..f33953c533b33 100644 --- a/airbyte-workers/src/test-integration/java/io/airbyte/workers/process/KubePodProcessIntegrationTest.java +++ b/airbyte-workers/src/test-integration/java/io/airbyte/workers/process/KubePodProcessIntegrationTest.java @@ -7,6 +7,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; @@ -55,6 +57,9 @@ public class KubePodProcessIntegrationTest { private static KubernetesClient fabricClient; private static KubeProcessFactory processFactory; private static final ResourceRequirements DEFAULT_RESOURCE_REQUIREMENTS = new WorkerConfigs(new EnvConfigs()).getResourceRequirements(); + private static final String ENV_KEY = "ENV_VAR_1"; + private static final String ENV_VALUE = "ENV_VALUE_1"; + private static final Map ENV_MAP = ImmutableMap.of(ENV_KEY, ENV_VALUE); private WorkerHeartbeatServer server; @@ -69,8 +74,18 @@ public static void init() throws Exception { fabricClient = new DefaultKubernetesClient(); KubePortManagerSingleton.init(new HashSet<>(openPorts.subList(1, openPorts.size() - 1))); + + final WorkerConfigs workerConfigs = spy(new WorkerConfigs(new EnvConfigs())); + when(workerConfigs.getEnvMap()).thenReturn(Map.of("ENV_VAR_1", "ENV_VALUE_1")); + processFactory = - new KubeProcessFactory(new WorkerConfigs(new EnvConfigs()), "default", fabricClient, heartbeatUrl, getHost(), false, + new KubeProcessFactory( + workerConfigs, + "default", + fabricClient, + heartbeatUrl, + getHost(), + false, Duration.ofSeconds(1)); } @@ -193,6 +208,19 @@ public void testSuccessfulSpawningWithQuotes() throws Exception { assertEquals(0, process.exitValue()); } + @Test + public void testEnvMapSet() throws Exception { + // start a finite process + final Process process = getProcess("echo ENV_VAR_1=$ENV_VAR_1"); + final var output = new String(process.getInputStream().readAllBytes()); + assertEquals("ENV_VAR_1=ENV_VALUE_1\n", output); + process.waitFor(); + + // the pod should be dead and in a good state + assertFalse(process.isAlive()); + assertEquals(0, process.exitValue()); + } + @Test public void testPipeInEntrypoint() throws Exception { // start a process that has a pipe in the entrypoint diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/process/AirbyteIntegrationLauncherTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/process/AirbyteIntegrationLauncherTest.java index e9f65201e9845..c196f563a05d0 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/process/AirbyteIntegrationLauncherTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/process/AirbyteIntegrationLauncherTest.java @@ -57,7 +57,9 @@ void check() throws WorkerException { launcher.check(JOB_ROOT, "config", "{}"); Mockito.verify(processFactory).create(JOB_ID, JOB_ATTEMPT, JOB_ROOT, FAKE_IMAGE, false, CONFIG_FILES, null, - workerConfigs.getResourceRequirements(), Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.CHECK_JOB), Map.of(), + workerConfigs.getResourceRequirements(), + Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.CHECK_JOB), + Map.of(), "check", "--config", "config"); } @@ -67,7 +69,9 @@ void discover() throws WorkerException { launcher.discover(JOB_ROOT, "config", "{}"); Mockito.verify(processFactory).create(JOB_ID, JOB_ATTEMPT, JOB_ROOT, FAKE_IMAGE, false, CONFIG_FILES, null, - workerConfigs.getResourceRequirements(), Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.DISCOVER_JOB), Map.of(), + workerConfigs.getResourceRequirements(), + Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.DISCOVER_JOB), + Map.of(), "discover", "--config", "config"); } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/process/DockerProcessFactoryTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/process/DockerProcessFactoryTest.java index 805753581be33..53d33a21955b5 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/process/DockerProcessFactoryTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/process/DockerProcessFactoryTest.java @@ -7,6 +7,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableMap; import io.airbyte.commons.io.IOs; @@ -87,4 +89,50 @@ public void testFileWriting(boolean isOrchestrator) throws IOException, WorkerEx Jsons.deserialize(IOs.readFile(jobRoot, "config.json"))); } + /** + * Tests that the env var map passed in is accessible within the process. + */ + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testEnvMapSet(boolean isOrchestrator) throws IOException, WorkerException { + final Path workspaceRoot = Files.createTempDirectory(Files.createDirectories(TEST_ROOT), "process_factory"); + final Path jobRoot = workspaceRoot.resolve("job"); + + final WorkerConfigs workerConfigs = spy(new WorkerConfigs(new EnvConfigs())); + when(workerConfigs.getEnvMap()).thenReturn(Map.of("ENV_VAR_1", "ENV_VALUE_1")); + + final DockerProcessFactory processFactory = + new DockerProcessFactory( + workerConfigs, + workspaceRoot, + "", + "", + "host", + isOrchestrator); + + final Process process = processFactory.create( + "job_id", + 0, + jobRoot, + "busybox", + false, + Map.of(), + "/bin/sh", + workerConfigs.getResourceRequirements(), + Map.of(), + Map.of(), + "-c", + "echo ENV_VAR_1=$ENV_VAR_1"); + + final StringBuilder out = new StringBuilder(); + final StringBuilder err = new StringBuilder(); + LineGobbler.gobble(process.getInputStream(), out::append); + LineGobbler.gobble(process.getErrorStream(), err::append); + + WorkerUtils.gentleClose(new WorkerConfigs(new EnvConfigs()), process, 20, TimeUnit.SECONDS); + + assertEquals(0, process.exitValue(), String.format("Process failed with stdout: %s and stderr: %s", out, err)); + assertEquals("ENV_VAR_1=ENV_VALUE_1", out.toString(), String.format("Output did not contain the expected string. stdout: %s", out)); + } + } From f821ce212f8c40dc1a6765f8d4d5cc8b8745516e Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Sun, 9 Jan 2022 16:50:20 -0800 Subject: [PATCH 072/215] upgrade gradle to 7.3.3 (#9369) --- gradle/wrapper/gradle-wrapper.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 84d1f85fd6581..2e6e5897b5285 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.1-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists From 2a600bebef964cc5c40df5cdc24091946ef20458 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Sun, 9 Jan 2022 18:25:21 -0800 Subject: [PATCH 073/215] fix migration test snowflake version comparison error (#9370) * fix migration test again * disable acceptance tests * re-enable acceptance tests * bring snowflake version back * fix * fix how we compare versions for migration tests --- .../MigrationAcceptanceTest.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java b/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java index 13b67b4eabc22..9b1297b78d687 100644 --- a/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java +++ b/airbyte-tests/src/automaticMigrationAcceptanceTest/java/io/airbyte/test/automaticMigrationAcceptance/MigrationAcceptanceTest.java @@ -44,6 +44,7 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.testcontainers.utility.ComparableVersion; /** * This class contains an e2e test simulating what a user encounter when trying to upgrade Airybte. @@ -242,11 +243,9 @@ private static void assertDestinationDefinitionInformation(final ApiClient apiCl foundLocalCSVDestinationDefinition = true; } case "424892c4-daac-4491-b35d-c6688ba547ba" -> { - final String[] tagBrokenAsArray = destinationDefinitionRead.getDockerImageTag().replace(".", ",").split(","); - assertEquals(3, tagBrokenAsArray.length); - assertTrue(Integer.parseInt(tagBrokenAsArray[0]) >= 0); - assertTrue(Integer.parseInt(tagBrokenAsArray[1]) >= 3); - assertTrue(Integer.parseInt(tagBrokenAsArray[2]) >= 9); + final String tag = destinationDefinitionRead.getDockerImageTag(); + final ComparableVersion version = new ComparableVersion(tag); + assertTrue(version.compareTo(new ComparableVersion("0.3.9")) >= 0); assertTrue(destinationDefinitionRead.getName().contains("Snowflake")); foundSnowflakeDestinationDefinition = true; } From c9fee23b5f7dc0e75cbee93231d7ef446e44e4a4 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Sun, 9 Jan 2022 19:08:09 -0800 Subject: [PATCH 074/215] add warning about merging release on build failures (#9371) --- tools/bin/pr_body.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/bin/pr_body.sh b/tools/bin/pr_body.sh index 474f5844b591e..53b59d510cec6 100755 --- a/tools/bin/pr_body.sh +++ b/tools/bin/pr_body.sh @@ -7,6 +7,8 @@ set -e GIT_REVISION=$(git rev-parse HEAD) [[ -z "$GIT_REVISION" ]] && echo "Couldn't get the git revision..." && exit 1 +echo "*IMPORTANT: Only merge if the platform build is passing!*" +echo echo "Changelog:" echo PAGER=cat git log v${PREV_VERSION}..${GIT_REVISION} --oneline --decorate=no From 9bb28939ee0b41127eab01f6c8ac250e687bfd5c Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Sun, 9 Jan 2022 20:06:13 -0800 Subject: [PATCH 075/215] Fix connector base test (#9348) * Format code * Add sleep back * Revert wait time * Decrease waiting time * Adjust waiting time * Delay container command --- .../python/airbyte_cdk/models/airbyte_protocol.py | 5 +---- .../airbyte_protocol/models/airbyte_protocol.py | 2 +- .../unit_tests/test_utils.py | 14 ++++++-------- .../SnowflakeInsertDestinationAcceptanceTest.java | 3 ++- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py index ed89e1d7b4416..39680a330c04a 100644 --- a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py +++ b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py @@ -240,10 +240,7 @@ class Config: ) spec: Optional[ConnectorSpecification] = None connectionStatus: Optional[AirbyteConnectionStatus] = None - catalog: Optional[AirbyteCatalog] = Field( - None, - description="log message: any kind of logging you want the platform to know about.", - ) + catalog: Optional[AirbyteCatalog] = Field(None, description="catalog message: the calalog") record: Optional[AirbyteRecordMessage] = Field(None, description="record message: the record") state: Optional[AirbyteStateMessage] = Field( None, diff --git a/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py b/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py index 0d94c33737a69..25c507c4d82ac 100644 --- a/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py +++ b/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py @@ -226,7 +226,7 @@ class Config: log: Optional[AirbyteLogMessage] = Field(None, description="log message: any kind of logging you want the platform to know about.") spec: Optional[ConnectorSpecification] = None connectionStatus: Optional[AirbyteConnectionStatus] = None - catalog: Optional[AirbyteCatalog] = Field(None, description="log message: any kind of logging you want the platform to know about.") + catalog: Optional[AirbyteCatalog] = Field(None, description="catalog message: the calalog") record: Optional[AirbyteRecordMessage] = Field(None, description="record message: the record") state: Optional[AirbyteStateMessage] = Field( None, description="schema message: the state. Must be the last message produced. The platform uses this information" diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py index 2d8840e50c5cc..42bdddbcf8bd1 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py @@ -256,27 +256,25 @@ def test_failed_reading(traceback, container_error, last_line, expected_error): @pytest.mark.parametrize( - "command,wait_timeout,expected_count", + "command,expected_count", ( ( "cnt=0; while [ $cnt -lt 10 ]; do cnt=$((cnt+1)); echo something; done", - 0, 10, ), - # Sometimes a container can finish own work before python tries to read it - ("echo something;", 0.1, 1), + ("echo something;", 1), ), ids=["standard", "waiting"], ) -def test_docker_runner(command, wait_timeout, expected_count): +def test_docker_runner(command, expected_count): client = docker.from_env() new_container = client.containers.run( image="busybox", - command=f"""sh -c '{command}'""", + # Sometimes a container can finish its work before python tries to read it, + # so the container always sleeps for a while first. + command=f"""sh -c 'sleep 3; {command}'""", detach=True, ) - if wait_timeout: - time.sleep(wait_timeout) lines = list(ConnectorRunner.read(new_container, command=command)) assert set(lines) == set(["something\n"]) assert len(lines) == expected_count diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java index 6cf51a5102ffa..5aeac996f1431 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java @@ -127,7 +127,8 @@ private List retrieveRecordsFromTable(final String tableName, final St final ResultSet tableInfo = connection.createStatement() .executeQuery(String.format("SHOW TABLES LIKE '%s' IN SCHEMA %s;", tableName, schema)); assertTrue(tableInfo.next()); - // check that we're creating permanent tables. DBT defaults to transient tables, which have `TRANSIENT` as the value for the `kind` column. + // check that we're creating permanent tables. DBT defaults to transient tables, which have + // `TRANSIENT` as the value for the `kind` column. assertEquals("TABLE", tableInfo.getString("kind")); return connection.createStatement() .executeQuery(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schema, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)); From 0ee4d09eed675bf0a4ab30a444c6c82e1c80e5d9 Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Sun, 9 Jan 2022 22:17:24 -0800 Subject: [PATCH 076/215] Revert "Fix connector base test (#9348)" (#9373) This commit reverts #9348 (9bb28939ee0b41127eab01f6c8ac250e687bfd5c) because it does not work. The `test_docker_runner[standard]` and `test_docker_runner[waiting]` test cases still fail transiently. --- .../python/airbyte_cdk/models/airbyte_protocol.py | 5 ++++- .../airbyte_protocol/models/airbyte_protocol.py | 2 +- .../unit_tests/test_utils.py | 14 ++++++++------ .../SnowflakeInsertDestinationAcceptanceTest.java | 3 +-- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py index 39680a330c04a..ed89e1d7b4416 100644 --- a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py +++ b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py @@ -240,7 +240,10 @@ class Config: ) spec: Optional[ConnectorSpecification] = None connectionStatus: Optional[AirbyteConnectionStatus] = None - catalog: Optional[AirbyteCatalog] = Field(None, description="catalog message: the calalog") + catalog: Optional[AirbyteCatalog] = Field( + None, + description="log message: any kind of logging you want the platform to know about.", + ) record: Optional[AirbyteRecordMessage] = Field(None, description="record message: the record") state: Optional[AirbyteStateMessage] = Field( None, diff --git a/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py b/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py index 25c507c4d82ac..0d94c33737a69 100644 --- a/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py +++ b/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py @@ -226,7 +226,7 @@ class Config: log: Optional[AirbyteLogMessage] = Field(None, description="log message: any kind of logging you want the platform to know about.") spec: Optional[ConnectorSpecification] = None connectionStatus: Optional[AirbyteConnectionStatus] = None - catalog: Optional[AirbyteCatalog] = Field(None, description="catalog message: the calalog") + catalog: Optional[AirbyteCatalog] = Field(None, description="log message: any kind of logging you want the platform to know about.") record: Optional[AirbyteRecordMessage] = Field(None, description="record message: the record") state: Optional[AirbyteStateMessage] = Field( None, description="schema message: the state. Must be the last message produced. The platform uses this information" diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py index 42bdddbcf8bd1..2d8840e50c5cc 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py @@ -256,25 +256,27 @@ def test_failed_reading(traceback, container_error, last_line, expected_error): @pytest.mark.parametrize( - "command,expected_count", + "command,wait_timeout,expected_count", ( ( "cnt=0; while [ $cnt -lt 10 ]; do cnt=$((cnt+1)); echo something; done", + 0, 10, ), - ("echo something;", 1), + # Sometimes a container can finish own work before python tries to read it + ("echo something;", 0.1, 1), ), ids=["standard", "waiting"], ) -def test_docker_runner(command, expected_count): +def test_docker_runner(command, wait_timeout, expected_count): client = docker.from_env() new_container = client.containers.run( image="busybox", - # Sometimes a container can finish its work before python tries to read it, - # so the container always sleeps for a while first. - command=f"""sh -c 'sleep 3; {command}'""", + command=f"""sh -c '{command}'""", detach=True, ) + if wait_timeout: + time.sleep(wait_timeout) lines = list(ConnectorRunner.read(new_container, command=command)) assert set(lines) == set(["something\n"]) assert len(lines) == expected_count diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java index 5aeac996f1431..6cf51a5102ffa 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java @@ -127,8 +127,7 @@ private List retrieveRecordsFromTable(final String tableName, final St final ResultSet tableInfo = connection.createStatement() .executeQuery(String.format("SHOW TABLES LIKE '%s' IN SCHEMA %s;", tableName, schema)); assertTrue(tableInfo.next()); - // check that we're creating permanent tables. DBT defaults to transient tables, which have - // `TRANSIENT` as the value for the `kind` column. + // check that we're creating permanent tables. DBT defaults to transient tables, which have `TRANSIENT` as the value for the `kind` column. assertEquals("TABLE", tableInfo.getString("kind")); return connection.createStatement() .executeQuery(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schema, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)); From 0c57100b552c6677bbbcbf1af85dd8e137fd8aad Mon Sep 17 00:00:00 2001 From: Christophe Duong Date: Mon, 10 Jan 2022 10:36:01 +0100 Subject: [PATCH 077/215] source-googlesheet: Service account json should be secret (#9356) * Service account json should be secret * Bumpversion of connector * bumpversion --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- .../init/src/main/resources/seed/source_specs.yaml | 5 +++-- .../connectors/source-google-sheets/Dockerfile | 2 +- .../source-google-sheets/google_sheets_source/spec.json | 1 + 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index adf054c624ccc..b906143af6f16 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -259,7 +259,7 @@ - name: Google Sheets sourceDefinitionId: 71607ba1-c0ac-4799-8049-7f4b90dd50f7 dockerRepository: airbyte/source-google-sheets - dockerImageTag: 0.2.7 + dockerImageTag: 0.2.8 documentationUrl: https://docs.airbyte.io/integrations/sources/google-sheets icon: google-sheets.svg sourceType: file diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 6738f46447979..d1f6c73b32597 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2583,7 +2583,7 @@ oauthFlowOutputParameters: - - "access_token" - - "refresh_token" -- dockerImage: "airbyte/source-google-sheets:0.2.7" +- dockerImage: "airbyte/source-google-sheets:0.2.8" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/google-sheets" connectionSpecification: @@ -2639,6 +2639,7 @@ service_account_info: type: "string" description: "The JSON key of the service account to use for authorization" + airbyte_secret: true examples: - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" @@ -2650,7 +2651,7 @@ oauth2Specification: rootObject: - "credentials" - - 0 + - "0" oauthFlowInitParameters: - - "client_id" - - "client_secret" diff --git a/airbyte-integrations/connectors/source-google-sheets/Dockerfile b/airbyte-integrations/connectors/source-google-sheets/Dockerfile index 7e23836368ad1..052886ff30593 100644 --- a/airbyte-integrations/connectors/source-google-sheets/Dockerfile +++ b/airbyte-integrations/connectors/source-google-sheets/Dockerfile @@ -34,6 +34,6 @@ COPY google_sheets_source ./google_sheets_source ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.7 +LABEL io.airbyte.version=0.2.8 LABEL io.airbyte.name=airbyte/source-google-sheets diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json index 68e2ca1ec302c..14a0b357cdb22 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json @@ -60,6 +60,7 @@ "service_account_info": { "type": "string", "description": "The JSON key of the service account to use for authorization", + "airbyte_secret": true, "examples": [ "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID, \"private_key_id\": YOUR_PRIVATE_KEY, ... }" ] From e68c564e21fa6345797349d4208b4b6174a5b84f Mon Sep 17 00:00:00 2001 From: firmbase-tal <89132349+firmbase-tal@users.noreply.github.com> Date: Mon, 10 Jan 2022 13:15:30 +0200 Subject: [PATCH 078/215] Source Plaid: port to Python CDK (#7977) --- .../ed799e2b-2158-4c66-8da4-b40fe63bc72a.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 3 +- .../connectors/source-plaid/.dockerignore | 7 +- .../connectors/source-plaid/.gitignore | 1 - .../connectors/source-plaid/.prettierrc | 4 - .../connectors/source-plaid/Dockerfile | 44 ++- .../connectors/source-plaid/README.md | 128 ++++++--- .../source-plaid/acceptance-test-config.yml | 24 ++ .../source-plaid/acceptance-test-docker.sh | 16 ++ .../connectors/source-plaid/build.gradle | 40 +-- .../integration_tests/__init__.py | 3 + .../integration_tests/abnormal_state.json | 5 + .../integration_tests/acceptance.py | 14 + .../integration_tests/catalog.json | 104 ++++++++ .../integration_tests/configured_catalog.json | 107 ++++++++ .../integration_tests/invalid_config.json | 6 + .../integration_tests/sample_config.json | 6 + .../integration_tests/sample_state.json | 5 + .../connectors/source-plaid/main.py | 13 + .../connectors/source-plaid/package-lock.json | 31 --- .../connectors/source-plaid/package.json | 16 -- .../connectors/source-plaid/requirements.txt | 3 + .../fullrefresh_configured_catalog.json | 34 --- .../connectors/source-plaid/setup.py | 26 ++ .../connectors/source-plaid/source.js | 252 ------------------ .../source-plaid/source_plaid/__init__.py | 8 + .../source_plaid/schemas/balance.json | 13 + .../source_plaid/schemas/transaction.json | 63 +++++ .../source-plaid/source_plaid/source.py | 120 +++++++++ .../source-plaid/{ => source_plaid}/spec.json | 2 +- docs/integrations/sources/plaid.md | 3 + 32 files changed, 677 insertions(+), 428 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-plaid/.gitignore delete mode 100644 airbyte-integrations/connectors/source-plaid/.prettierrc create mode 100644 airbyte-integrations/connectors/source-plaid/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-plaid/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-plaid/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-plaid/integration_tests/abnormal_state.json create mode 100644 airbyte-integrations/connectors/source-plaid/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-plaid/integration_tests/catalog.json create mode 100644 airbyte-integrations/connectors/source-plaid/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-plaid/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-plaid/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-plaid/integration_tests/sample_state.json create mode 100644 airbyte-integrations/connectors/source-plaid/main.py delete mode 100644 airbyte-integrations/connectors/source-plaid/package-lock.json delete mode 100644 airbyte-integrations/connectors/source-plaid/package.json create mode 100644 airbyte-integrations/connectors/source-plaid/requirements.txt delete mode 100644 airbyte-integrations/connectors/source-plaid/sample_files/fullrefresh_configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-plaid/setup.py delete mode 100644 airbyte-integrations/connectors/source-plaid/source.js create mode 100644 airbyte-integrations/connectors/source-plaid/source_plaid/__init__.py create mode 100644 airbyte-integrations/connectors/source-plaid/source_plaid/schemas/balance.json create mode 100644 airbyte-integrations/connectors/source-plaid/source_plaid/schemas/transaction.json create mode 100644 airbyte-integrations/connectors/source-plaid/source_plaid/source.py rename airbyte-integrations/connectors/source-plaid/{ => source_plaid}/spec.json (92%) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ed799e2b-2158-4c66-8da4-b40fe63bc72a.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ed799e2b-2158-4c66-8da4-b40fe63bc72a.json index 39e9635b31fad..a711ecd13ffd4 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ed799e2b-2158-4c66-8da4-b40fe63bc72a.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ed799e2b-2158-4c66-8da4-b40fe63bc72a.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "ed799e2b-2158-4c66-8da4-b40fe63bc72a", "name": "Plaid", "dockerRepository": "airbyte/source-plaid", - "dockerImageTag": "0.2.1", + "dockerImageTag": "0.3.0", "documentationUrl": "https://docs.airbyte.io/integrations/sources/plaid", "icon": "plaid.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index b906143af6f16..0723bbb8a0cb2 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -516,7 +516,7 @@ - name: Plaid sourceDefinitionId: ed799e2b-2158-4c66-8da4-b40fe63bc72a dockerRepository: airbyte/source-plaid - dockerImageTag: 0.2.1 + dockerImageTag: 0.3.0 documentationUrl: https://docs.airbyte.io/integrations/sources/plaid icon: plaid.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index d1f6c73b32597..b660002ebac1f 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -5412,7 +5412,7 @@ - - "client_secret" oauthFlowOutputParameters: - - "refresh_token" -- dockerImage: "airbyte/source-plaid:0.2.1" +- dockerImage: "airbyte/source-plaid:0.3.0" spec: documentationUrl: "https://plaid.com/docs/api/" connectionSpecification: @@ -5422,6 +5422,7 @@ - "access_token" - "api_key" - "client_id" + - "plaid_env" additionalProperties: false properties: access_token: diff --git a/airbyte-integrations/connectors/source-plaid/.dockerignore b/airbyte-integrations/connectors/source-plaid/.dockerignore index 961cb16d7caee..6b0a8bd30b5c3 100644 --- a/airbyte-integrations/connectors/source-plaid/.dockerignore +++ b/airbyte-integrations/connectors/source-plaid/.dockerignore @@ -1,8 +1,7 @@ * !Dockerfile !Dockerfile.test -!package.json -!spec.json -!source.js +!main.py +!source_plaid +!setup.py !secrets -!fullrefresh_configured_catalog.json diff --git a/airbyte-integrations/connectors/source-plaid/.gitignore b/airbyte-integrations/connectors/source-plaid/.gitignore deleted file mode 100644 index 3c3629e647f5d..0000000000000 --- a/airbyte-integrations/connectors/source-plaid/.gitignore +++ /dev/null @@ -1 +0,0 @@ -node_modules diff --git a/airbyte-integrations/connectors/source-plaid/.prettierrc b/airbyte-integrations/connectors/source-plaid/.prettierrc deleted file mode 100644 index 3f584f6079c37..0000000000000 --- a/airbyte-integrations/connectors/source-plaid/.prettierrc +++ /dev/null @@ -1,4 +0,0 @@ -{ - "printWidth": 120, - "singleQuote": true -} diff --git a/airbyte-integrations/connectors/source-plaid/Dockerfile b/airbyte-integrations/connectors/source-plaid/Dockerfile index d5c403a99f26f..3139175442924 100644 --- a/airbyte-integrations/connectors/source-plaid/Dockerfile +++ b/airbyte-integrations/connectors/source-plaid/Dockerfile @@ -1,16 +1,38 @@ -# node 14 -FROM node:alpine3.12 +FROM python:3.7.11-alpine3.14 as base +# build and load all requirements +FROM base as builder WORKDIR /airbyte/integration_code -# Copy source files -COPY package.json . -COPY source.js . -COPY spec.json . -# Install any needed dependencies -RUN npm install -ENV AIRBYTE_ENTRYPOINT "node /airbyte/integration_code/source.js" -ENTRYPOINT ["node", "/airbyte/integration_code/source.js"] +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base -LABEL io.airbyte.version=0.2.1 + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_plaid ./source_plaid + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.3.0 LABEL io.airbyte.name=airbyte/source-plaid diff --git a/airbyte-integrations/connectors/source-plaid/README.md b/airbyte-integrations/connectors/source-plaid/README.md index e91b7619a72fe..385825fb8ddb3 100644 --- a/airbyte-integrations/connectors/source-plaid/README.md +++ b/airbyte-integrations/connectors/source-plaid/README.md @@ -1,75 +1,129 @@ # Plaid Source -This is the repository for the JavaScript Template source connector, written in JavaScript. -For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/javascript-template). +This is the repository for the Plaid source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/plaid-new). ## Local development ### Prerequisites - **To iterate on this connector, make sure to complete this prerequisites section.** -#### Build & Activate Virtual Environment - -First, build the module by running the following from the `airbyte` project root directory: +#### Minimum Python version required `= 3.7.0` +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: ``` -./gradlew :airbyte-integrations:connectors:source-plaid:build +python -m venv .venv ``` -This will generate a virtualenv for this module in `source-plaid/.venv`. Make sure this venv is active in your -development environment of choice. To activate the venv from the terminal, run: - +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: ``` -cd airbyte-integrations/connectors/source-plaid # cd into the connector directory source .venv/bin/activate +pip install -r requirements.txt ``` - If you are in an IDE, follow your IDE's instructions to activate the virtualenv. -#### Create credentials +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-plaid-new:build +``` -**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/javascript-template) -to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_javascript_template/spec.json` file. -See `sample_files/sample_config.json` for a sample config file. +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/plaid) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_plaid/spec.json` file. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. -**If you are an Airbyte core member**, copy the credentials in RPass under the secret name `source-plaid-integration-test-config` +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source plaid-new test creds` and place them into `secrets/config.json`. ### Locally running the connector - ``` -npm install -node source.js spec -node source.js check --config secrets/config.json -node source.js discover --config secrets/config.json -node source.js read --config secrets/config.json --catalog sample_files/configured_catalog.json +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json ``` -### Unit Tests (wip) - -To run unit tests locally, from the connector directory run: +### Locally running the connector docker image +#### Build +First, make sure you build the latest Docker image: ``` -npm test +docker build . -t airbyte/source-plaid:dev ``` -### Locally running the connector docker image - +You can also build the connector image via Gradle: ``` -# in airbyte root directory ./gradlew :airbyte-integrations:connectors:source-plaid:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` docker run --rm airbyte/source-plaid:dev spec -docker run --rm -v $(pwd)/airbyte-integrations/connectors/source-plaid/secrets:/secrets airbyte/source-plaid:dev check --config /secrets/config.json -docker run --rm -v $(pwd)/airbyte-integrations/connectors/source-plaid/secrets:/secrets airbyte/source-plaid:dev discover --config /secrets/config.json -docker run --rm -v $(pwd)/airbyte-integrations/connectors/source-plaid/secrets:/secrets -v $(pwd)/airbyte-integrations/connectors/source-plaid/sample_files:/sample_files airbyte/source-plaid:dev read --config /secrets/config.json --catalog /sample_files/fullrefresh_configured_catalog.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-plaid:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-plaid:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-plaid:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing + Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install '.[tests]' +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests ``` ### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker -1. From the airbyte project root, run `./gradlew :airbyte-integrations:connectors:source-plaid:integrationTest` to run the standard integration test suite. -1. To run additional integration tests, place your integration tests in a new directory `integration_tests` and run them with `node test (wip)`. +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-plaid-new:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-plaid-new:integrationTest +``` ## Dependency Management - -All of your dependencies should go in `package.json`. +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-plaid/acceptance-test-config.yml b/airbyte-integrations/connectors/source-plaid/acceptance-test-config.yml new file mode 100644 index 0000000000000..c021ad8b8dd7f --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/acceptance-test-config.yml @@ -0,0 +1,24 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-plaid:dev +tests: + spec: + - spec_path: "source_plaid/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + incremental: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + future_state_path: "integration_tests/abnormal_state.json" + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-plaid/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-plaid/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-plaid/build.gradle b/airbyte-integrations/connectors/source-plaid/build.gradle index 15e5b202c3aa1..2a6f97eeef89a 100644 --- a/airbyte-integrations/connectors/source-plaid/build.gradle +++ b/airbyte-integrations/connectors/source-plaid/build.gradle @@ -1,42 +1,14 @@ plugins { + id 'airbyte-python' id 'airbyte-docker' - id 'airbyte-standard-source-test-file' - id 'base' // ? - id 'com.github.node-gradle.node' version '2.2.4' + id 'airbyte-source-acceptance-test' } -node { - download = true - version = "14.11.0" -} - -npm_run_build { - inputs.files fileTree('public') - inputs.files fileTree('src') - inputs.file 'package.json' - inputs.file 'package-lock.json' - - outputs.dir project.buildDir -} -assemble.dependsOn npm_run_build - -//task test(type: NpmTask) { -// dependsOn assemble -// -// args = ['run', 'test', '--', '--watchAll=false'] -// inputs.files fileTree('src') -// inputs.file 'package.json' -// inputs.file 'package-lock.json' -//} - -airbyteStandardSourceTestFile { - // All these input paths must live inside this connector's directory (or subdirectories) - configPath = "secrets/config.json" - configuredCatalogPath = "sample_files/fullrefresh_configured_catalog.json" - specPath = "spec.json" +airbytePython { + moduleDirectory 'source_plaid_singer' } dependencies { - implementation files(project(':airbyte-integrations:bases:base-standard-source-test-file').airbyteDocker.outputs) - implementation files(project(':airbyte-integrations:bases:base').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) } diff --git a/airbyte-integrations/connectors/source-plaid/integration_tests/__init__.py b/airbyte-integrations/connectors/source-plaid/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-plaid/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-plaid/integration_tests/abnormal_state.json new file mode 100644 index 0000000000000..9b92a7ecd6d01 --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "transaction": { + "date": "2120-01-01" + } +} diff --git a/airbyte-integrations/connectors/source-plaid/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-plaid/integration_tests/acceptance.py new file mode 100644 index 0000000000000..0347f2a0b143d --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-plaid/integration_tests/catalog.json b/airbyte-integrations/connectors/source-plaid/integration_tests/catalog.json new file mode 100644 index 0000000000000..3de2a1bc484e3 --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/integration_tests/catalog.json @@ -0,0 +1,104 @@ +{ + "streams": [ + { + "stream": { + "name": "balance", + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "json_schema": { + "type": "object", + "required": ["account_id", "current"], + "properties": { + "account_id": { + "type": "string" + }, + "available": { + "type": ["number", "null"] + }, + "current": { + "type": "number" + }, + "iso_currency_code": { + "type": ["string", "null"] + }, + "limit": { + "type": ["number", "null"] + }, + "unofficial_currency_code": { + "type": ["string", "null"] + } + } + } + } + }, + { + "stream": { + "name": "transaction", + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "json_schema": { + "type": "object", + "required": [ + "account_id", + "amount", + "iso_currency_code", + "name", + "transaction_id", + "category", + "date", + "transaction_type" + ], + "properties": { + "account_id": { "type": "string" }, + "amount": { "type": "number" }, + "category": { "type": "array", "items": { "type": "string" } }, + "category_id": { "type": ["string", "null"] }, + "date": { "type": "string" }, + "iso_currency_code": { "type": "string" }, + "name": { "type": "string" }, + "payment_channel": { "type": ["string", "null"] }, + "pending": { "type": ["boolean", "null"] }, + "transaction_id": { "type": "string" }, + "transaction_type": { "type": "string" }, + "location": { + "type": ["object", "null"], + "properties": { + "address": { "type": ["string", "null"] }, + "city": { "type": ["string", "null"] }, + "country": { "type": ["string", "null"] }, + "lat": { "type": ["string", "null"] }, + "lon": { "type": ["string", "null"] }, + "postal_code": { "type": ["string", "null"] }, + "region": { "type": ["string", "null"] }, + "store_number": { "type": ["string", "null"] } + } + }, + "payment_meta": { + "type": ["object", "null"], + "properties": { + "by_order_of": { "type": ["string", "null"] }, + "payee": { "type": ["string", "null"] }, + "payer": { "type": ["string", "null"] }, + "payment_method": { "type": ["string", "null"] }, + "payment_processor": { "type": ["string", "null"] }, + "ppd_id": { "type": ["string", "null"] }, + "reason": { "type": ["string", "null"] }, + "reference_number": { "type": ["string", "null"] } + } + }, + "account_owner": { "type": ["string", "null"] }, + "authorized_date": { "type": ["string", "null"] }, + "authorized_datetime": { "type": ["string", "null"] }, + "check_number": { "type": ["string", "null"] }, + "datetime": { "type": ["string", "null"] }, + "merchant_name": { "type": ["string", "null"] }, + "pending_transaction_id": { "type": ["string", "null"] }, + "personal_finance_category": { "type": ["string", "null"] }, + "transaction_code": { "type": ["string", "null"] }, + "unofficial_currency_code": { "type": ["string", "null"] } + } + } + } + } + ] +} diff --git a/airbyte-integrations/connectors/source-plaid/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-plaid/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..35bcb20c2edd0 --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/integration_tests/configured_catalog.json @@ -0,0 +1,107 @@ +{ + "streams": [ + { + "stream": { + "name": "balance", + "supported_sync_modes": ["full_refresh"], + "json_schema": { + "required": ["account_id", "current"], + "type": "object", + "properties": { + "account_id": { + "type": "string" + }, + "available": { + "type": ["number", "null"] + }, + "current": { + "type": "number" + }, + "iso_currency_code": { + "type": ["string", "null"] + }, + "limit": { + "type": ["number", "null"] + }, + "unofficial_currency_code": { + "type": ["string", "null"] + } + } + } + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "transaction", + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "json_schema": { + "type": "object", + "required": [ + "account_id", + "amount", + "iso_currency_code", + "name", + "transaction_id", + "category", + "date", + "transaction_type" + ], + "properties": { + "account_id": { "type": "string" }, + "amount": { "type": "number" }, + "category": { "type": "array", "items": { "type": "string" } }, + "category_id": { "type": ["string", "null"] }, + "date": { "type": "string" }, + "iso_currency_code": { "type": "string" }, + "name": { "type": "string" }, + "payment_channel": { "type": ["string", "null"] }, + "pending": { "type": ["boolean", "null"] }, + "transaction_id": { "type": "string" }, + "transaction_type": { "type": "string" }, + "location": { + "type": ["object", "null"], + "properties": { + "address": { "type": ["string", "null"] }, + "city": { "type": ["string", "null"] }, + "country": { "type": ["string", "null"] }, + "lat": { "type": ["string", "null"] }, + "lon": { "type": ["string", "null"] }, + "postal_code": { "type": ["string", "null"] }, + "region": { "type": ["string", "null"] }, + "store_number": { "type": ["string", "null"] } + } + }, + "payment_meta": { + "type": ["object", "null"], + "properties": { + "by_order_of": { "type": ["string", "null"] }, + "payee": { "type": ["string", "null"] }, + "payer": { "type": ["string", "null"] }, + "payment_method": { "type": ["string", "null"] }, + "payment_processor": { "type": ["string", "null"] }, + "ppd_id": { "type": ["string", "null"] }, + "reason": { "type": ["string", "null"] }, + "reference_number": { "type": ["string", "null"] } + } + }, + "account_owner": { "type": ["string", "null"] }, + "authorized_date": { "type": ["string", "null"] }, + "authorized_datetime": { "type": ["string", "null"] }, + "check_number": { "type": ["string", "null"] }, + "datetime": { "type": ["string", "null"] }, + "merchant_name": { "type": ["string", "null"] }, + "pending_transaction_id": { "type": ["string", "null"] }, + "personal_finance_category": { "type": ["string", "null"] }, + "transaction_code": { "type": ["string", "null"] }, + "unofficial_currency_code": { "type": ["string", "null"] } + } + } + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-plaid/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-plaid/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..bb88377031792 --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/integration_tests/invalid_config.json @@ -0,0 +1,6 @@ +{ + "api_key": "??", + "client_id": "??", + "plaid_env": "sandbox", + "access_token": "??" +} diff --git a/airbyte-integrations/connectors/source-plaid/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-plaid/integration_tests/sample_config.json new file mode 100644 index 0000000000000..3d61c0ab82d7d --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/integration_tests/sample_config.json @@ -0,0 +1,6 @@ +{ + "access_token": "??", + "api_key": "??", + "client_id": "??", + "plaid_env": "sandbox" +} diff --git a/airbyte-integrations/connectors/source-plaid/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-plaid/integration_tests/sample_state.json new file mode 100644 index 0000000000000..3d429a37c2a9f --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/integration_tests/sample_state.json @@ -0,0 +1,5 @@ +{ + "transaction": { + "date": "2020-01-01" + } +} diff --git a/airbyte-integrations/connectors/source-plaid/main.py b/airbyte-integrations/connectors/source-plaid/main.py new file mode 100644 index 0000000000000..c3f9861f1ed6d --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_plaid import SourcePlaid + +if __name__ == "__main__": + source = SourcePlaid() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-plaid/package-lock.json b/airbyte-integrations/connectors/source-plaid/package-lock.json deleted file mode 100644 index 97db82f05404b..0000000000000 --- a/airbyte-integrations/connectors/source-plaid/package-lock.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "name": "source-plaid", - "version": "1.0.0", - "lockfileVersion": 1, - "requires": true, - "dependencies": { - "argparse": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" - }, - "axios": { - "version": "0.21.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", - "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==", - "requires": { - "follow-redirects": "^1.10.0" - } - }, - "date-fns": { - "version": "2.16.1", - "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-2.16.1.tgz", - "integrity": "sha512-sAJVKx/FqrLYHAQeN7VpJrPhagZc9R4ImZIWYRFZaaohR3KzmuK88touwsSwSVT8Qcbd4zoDsnGfX4GFB4imyQ==" - }, - "follow-redirects": { - "version": "1.13.1", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.13.1.tgz", - "integrity": "sha512-SSG5xmZh1mkPGyKzjZP8zLjltIfpW32Y5QpdNJyjcfGxK3qo3NDDkZOZSFiGn1A6SclQxY9GzEwAHQ3dmYRWpg==" - } - } -} diff --git a/airbyte-integrations/connectors/source-plaid/package.json b/airbyte-integrations/connectors/source-plaid/package.json deleted file mode 100644 index 74888dc84864c..0000000000000 --- a/airbyte-integrations/connectors/source-plaid/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "name": "source-plaid", - "version": "1.0.0", - "description": "Airbyte Plaid Source.", - "main": "source.js", - "scripts": { - "build": "" - }, - "author": "", - "license": "ISC", - "dependencies": { - "argparse": "^2.0.1", - "axios": "^0.21.1", - "date-fns": "^2.16.1" - } -} diff --git a/airbyte-integrations/connectors/source-plaid/requirements.txt b/airbyte-integrations/connectors/source-plaid/requirements.txt new file mode 100644 index 0000000000000..7be17a56d745d --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/requirements.txt @@ -0,0 +1,3 @@ +# This file is autogenerated -- only edit if you know what you are doing. Use setup.py for declaring dependencies. +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-plaid/sample_files/fullrefresh_configured_catalog.json b/airbyte-integrations/connectors/source-plaid/sample_files/fullrefresh_configured_catalog.json deleted file mode 100644 index 41272534f496a..0000000000000 --- a/airbyte-integrations/connectors/source-plaid/sample_files/fullrefresh_configured_catalog.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "balances", - "supported_sync_modes": ["full_refresh"], - "json_schema": { - "properties": { - "account_id": { - "type": "string" - }, - "available": { - "type": "number" - }, - "current": { - "type": "number" - }, - "iso_currency_code": { - "type": "string" - }, - "limit": { - "type": "number" - }, - "unofficial_currency_code": { - "type": "string" - } - } - } - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-plaid/setup.py b/airbyte-integrations/connectors/source-plaid/setup.py new file mode 100644 index 0000000000000..3f4c0eda81673 --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/setup.py @@ -0,0 +1,26 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = ["airbyte-cdk", "plaid-python"] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "source-acceptance-test", +] + +setup( + name="source_plaid", + description="Source implementation for Plaid.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-plaid/source.js b/airbyte-integrations/connectors/source-plaid/source.js deleted file mode 100644 index cfbd63043451e..0000000000000 --- a/airbyte-integrations/connectors/source-plaid/source.js +++ /dev/null @@ -1,252 +0,0 @@ -const fs = require('fs'); -const axios = require('axios'); -const path = require('path'); -const { ArgumentParser } = require('argparse'); -const dateFns = require('date-fns'); -const getMilliseconds = dateFns.getMilliseconds; - -async function read(config, catalog) { - let balancesStream = null; - for (const configuredStreamIndex in catalog.streams) { - const configuredStream = catalog.streams[configuredStreamIndex]; - if (configuredStream.stream.name === 'balances') { - balancesStream = configuredStream; - } - } - - if (balancesStream === null) { - log('No streams selected'); - return; - } - // We only support full_refresh at the moment, so verify the user didn't ask for another sync mode - if (balancesStream.sync_mode !== 'full_refresh') { - log('This connector only supports full refresh syncs! (for now)'); - process.exit(1); - } - - // If we've made it this far, all the configuration is good and we can pull the balance. - const now = new Date(); - const url = `${getBaseUrl(config.plaid_env)}/accounts/balance/get`; - const response = await axios.post( - url, - { - access_token: config.access_token, - client_id: config.client_id, - secret: config.api_key, - }, - { validateStatus: () => true } - ); - - if (response.status !== 200) { - log('Failure occurred when calling Plaid API'); - process.exit(1); - } else { - response.data.accounts - .map((account) => { - const data = { - account_id: account.account_id, - available: account.balances.available, - current: account.balances.current, - iso_currency_code: account.balances.iso_currency_code, - limit: account.balances.limit, - unofficial_currency_code: account.balances.unofficial_currency_code, - }; - const record = { - stream: 'balances', - data: data, - emitted_at: getMilliseconds(now), - }; - return { type: 'RECORD', record: record }; - }) - .forEach((record) => console.log(JSON.stringify(record))); - } -} - -function readJson(filePath) { - return JSON.parse(fs.readFileSync(filePath)); -} - -function getBaseUrl(plaidEnv) { - if (plaidEnv === 'sandbox') { - return 'https://sandbox.plaid.com'; - } else if (plaidEnv === 'development') { - return 'https://development.plaid.com'; - } else if (plaidEnv === 'production') { - return 'https://production.plaid.com'; - } else { - throw new Error('Invalid Plaid Environment'); - } -} - -async function check(config) { - // Validate input configuration by hitting the balance endpoint. - let result; - const url = `${getBaseUrl(config.plaid_env)}/accounts/balance/get`; - const response = await axios.post( - url, - { - access_token: config.access_token, - client_id: config.client_id, - secret: config.api_key, - }, - { validateStatus: () => true } - ); - if (response.status === 200) { - result = { status: 'SUCCEEDED' }; - } else if (response.data.code === 'INVALID_ACCESS_TOKEN') { - result = { status: 'FAILED', message: 'Access token is incorrect.' }; - } else { - result = { - status: 'FAILED', - message: response.data.error_message, - }; - } - // Format the result of the check operation according to the Airbyte Specification - const outputMessage = { type: 'CONNECTION_STATUS', connectionStatus: result }; - console.log(JSON.stringify(outputMessage)); -} - -function log(message) { - const logJson = { type: 'LOG', log: message }; - console.log(logJson); -} - -function discover() { - const catalog = { - streams: [ - { - name: 'balance', - supported_sync_modes: ['full_refresh'], - json_schema: { - properties: { - account_id: { - type: 'string', - }, - available: { - type: 'number', - }, - current: { - type: 'number', - }, - iso_currency_code: { - type: 'string', - }, - limit: { - type: 'number', - }, - unofficial_currency_code: { - type: 'string', - }, - }, - }, - }, - ], - }; - const airbyte_message = { type: 'CATALOG', catalog }; - console.log(JSON.stringify(airbyte_message)); -} - -function getInputFilePath(filePath) { - if (path.isAbsolute(filePath)) { - return filePath; - } else { - return path.join(process.cwd(), filePath); - } -} - -function spec() { - // Read the file named spec.json from the module directory as a JSON file - const specPath = path.join(path.dirname(__filename), 'spec.json'); - const specification = readJson(specPath); - - // form an Airbyte Message containing the spec and print it to stdout - const airbyteMessage = { type: 'SPEC', spec: specification }; - - console.log(JSON.stringify(airbyteMessage)); -} - -async function run(args) { - const parentParser = new ArgumentParser({ add_help: false }); - const mainParser = new ArgumentParser({ add_help: false }); - const subparsers = mainParser.add_subparsers({ title: 'commands', dest: 'command' }); - - // Accept the spec command - subparsers.add_parser('spec', { - help: 'outputs the json configuration specification', - parents: [parentParser], - }); - - // Accept the check command - const checkParser = subparsers.add_parser('check', { - help: 'checks the config used to connect', - parents: [parentParser], - }); - const requiredCheckParser = checkParser.add_argument_group('required named arguments'); - requiredCheckParser.add_argument('--config', { - type: 'str', - required: true, - help: 'path to the json configuration file', - }); - - // Accept the discover command - const discover_parser = subparsers.add_parser('discover', { - help: "outputs a catalog describing the source's schema", - parents: [parentParser], - }); - const requiredDiscoverParser = discover_parser.add_argument_group('required named arguments'); - requiredDiscoverParser.add_argument('--config', { - type: 'str', - required: true, - help: 'path to the json configuration file', - }); - - // Accept the read command - const readParser = subparsers.add_parser('read', { - help: 'reads the source and outputs messages to STDOUT', - parents: [parentParser], - }); - readParser.add_argument('--state', { - type: 'str', - required: false, - help: 'path to the json-encoded state file', - }); - const requiredReadParser = readParser.add_argument_group('required named arguments'); - requiredReadParser.add_argument('--config', { - type: 'str', - required: true, - help: 'path to the json configuration file', - }); - requiredReadParser.add_argument('--catalog', { - type: 'str', - required: true, - help: 'path to the catalog used to determine which data to read', - }); - - const parsedArgs = mainParser.parse_args(args); - const command = parsedArgs.command; - - if (command === 'spec') { - spec(); - } else if (command === 'check') { - const config = readJson(getInputFilePath(parsedArgs.config)); - await check(config); - } else if (command === 'discover') { - discover(); - } else if (command === 'read') { - const config = readJson(getInputFilePath(parsedArgs.config)); - const configuredCatalog = readJson(getInputFilePath(parsedArgs.catalog)); - await read(config, configuredCatalog); - } else { - // If we don't recognize the command log the problem and exit with an error code greater than 0 to indicate the process - // had a failure - log('Invalid command. Allowable commands: [spec, check, discover, read]'); - process.exit(1); - } - - // A zero exit code means the process successfully completed - process.exit(0); -} - -(async function () { - await run(process.argv.slice(2)).catch((reason) => console.log(reason)); -})(); diff --git a/airbyte-integrations/connectors/source-plaid/source_plaid/__init__.py b/airbyte-integrations/connectors/source-plaid/source_plaid/__init__.py new file mode 100644 index 0000000000000..8ff627c962f9b --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/source_plaid/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourcePlaid + +__all__ = ["SourcePlaid"] diff --git a/airbyte-integrations/connectors/source-plaid/source_plaid/schemas/balance.json b/airbyte-integrations/connectors/source-plaid/source_plaid/schemas/balance.json new file mode 100644 index 0000000000000..e0b4885ca3bce --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/source_plaid/schemas/balance.json @@ -0,0 +1,13 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["account_id", "current"], + "properties": { + "account_id": { "type": "string" }, + "available": { "type": ["number", "null"] }, + "current": { "type": "number" }, + "iso_currency_code": { "type": ["string", "null"] }, + "limit": { "type": ["number", "null"] }, + "unofficial_currency_code": { "type": ["string", "null"] } + } +} diff --git a/airbyte-integrations/connectors/source-plaid/source_plaid/schemas/transaction.json b/airbyte-integrations/connectors/source-plaid/source_plaid/schemas/transaction.json new file mode 100644 index 0000000000000..dba0a472d568c --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/source_plaid/schemas/transaction.json @@ -0,0 +1,63 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "account_id", + "amount", + "iso_currency_code", + "name", + "transaction_id", + "category", + "date", + "transaction_type" + ], + "properties": { + "account_id": { "type": "string" }, + "amount": { "type": "number" }, + "category": { "type": "array", "items": { "type": "string" } }, + "category_id": { "type": ["string", "null"] }, + "date": { "type": "string" }, + "iso_currency_code": { "type": "string" }, + "name": { "type": "string" }, + "payment_channel": { "type": ["string", "null"] }, + "pending": { "type": ["boolean", "null"] }, + "transaction_id": { "type": "string" }, + "transaction_type": { "type": "string" }, + "location": { + "type": ["object", "null"], + "properties": { + "address": { "type": ["string", "null"] }, + "city": { "type": ["string", "null"] }, + "country": { "type": ["string", "null"] }, + "lat": { "type": ["string", "null"] }, + "lon": { "type": ["string", "null"] }, + "postal_code": { "type": ["string", "null"] }, + "region": { "type": ["string", "null"] }, + "store_number": { "type": ["string", "null"] } + } + }, + "payment_meta": { + "type": ["object", "null"], + "properties": { + "by_order_of": { "type": ["string", "null"] }, + "payee": { "type": ["string", "null"] }, + "payer": { "type": ["string", "null"] }, + "payment_method": { "type": ["string", "null"] }, + "payment_processor": { "type": ["string", "null"] }, + "ppd_id": { "type": ["string", "null"] }, + "reason": { "type": ["string", "null"] }, + "reference_number": { "type": ["string", "null"] } + } + }, + "account_owner": { "type": ["string", "null"] }, + "authorized_date": { "type": ["string", "null"] }, + "authorized_datetime": { "type": ["string", "null"] }, + "check_number": { "type": ["string", "null"] }, + "datetime": { "type": ["string", "null"] }, + "merchant_name": { "type": ["string", "null"] }, + "pending_transaction_id": { "type": ["string", "null"] }, + "personal_finance_category": { "type": ["string", "null"] }, + "transaction_code": { "type": ["string", "null"] }, + "unofficial_currency_code": { "type": ["string", "null"] } + } +} diff --git a/airbyte-integrations/connectors/source-plaid/source_plaid/source.py b/airbyte-integrations/connectors/source-plaid/source_plaid/source.py new file mode 100644 index 0000000000000..b7e4fae61f0aa --- /dev/null +++ b/airbyte-integrations/connectors/source-plaid/source_plaid/source.py @@ -0,0 +1,120 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import datetime +import json +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union + +import plaid +from airbyte_cdk.logger import AirbyteLogger +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from plaid.api import plaid_api +from plaid.model.accounts_balance_get_request import AccountsBalanceGetRequest +from plaid.model.transactions_get_request import TransactionsGetRequest + +SPEC_ENV_TO_PLAID_ENV = { + "production": plaid.Environment.Production, + "development": plaid.Environment.Development, + "sandbox": plaid.Environment.Sandbox, +} + + +class PlaidStream(Stream): + def __init__(self, config: Mapping[str, Any]): + plaid_config = plaid.Configuration( + host=SPEC_ENV_TO_PLAID_ENV[config["plaid_env"]], api_key={"clientId": config["client_id"], "secret": config["api_key"]} + ) + api_client = plaid.ApiClient(plaid_config) + self.client = plaid_api.PlaidApi(api_client) + self.access_token = config["access_token"] + + +class BalanceStream(PlaidStream): + @property + def name(self): + return "balance" + + @property + def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + return "account_id" + + def read_records( + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, + ) -> Iterable[Mapping[str, Any]]: + balance_response = self.client.accounts_balance_get(AccountsBalanceGetRequest(access_token=self.access_token)) + for balance in balance_response["accounts"]: + message_dict = balance["balances"].to_dict() + message_dict["account_id"] = balance["account_id"] + yield message_dict + + +class IncrementalTransactionStream(PlaidStream): + @property + def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + return "transaction_id" + + @property + def name(self): + return "transaction" + + @property + def source_defined_cursor(self) -> bool: + return True + + @property + def cursor_field(self) -> Union[str, List[str]]: + return "date" + + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]): + return {"date": latest_record.get("date")} + + def read_records( + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, + ) -> Iterable[Mapping[str, Any]]: + stream_state = stream_state or {} + date = stream_state.get("date") + if not date: + date = datetime.date.fromtimestamp(0) + else: + date = datetime.date.fromisoformat(date) + if date >= datetime.datetime.utcnow().date(): + return + + transaction_response = self.client.transactions_get( + TransactionsGetRequest(access_token=self.access_token, start_date=date, end_date=datetime.datetime.utcnow().date()) + ) + + yield from map(lambda x: x.to_dict(), sorted(transaction_response["transactions"], key=lambda t: t["date"])) + + +class SourcePlaid(AbstractSource): + def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]: + try: + plaid_config = plaid.Configuration( + host=SPEC_ENV_TO_PLAID_ENV[config["plaid_env"]], api_key={"clientId": config["client_id"], "secret": config["api_key"]} + ) + api_client = plaid.ApiClient(plaid_config) + client = plaid_api.PlaidApi(api_client) + try: + request = AccountsBalanceGetRequest(access_token=config["access_token"]) + client.accounts_balance_get(request) + return True, None + except plaid.ApiException as e: + response = json.loads(e.body) + return False, response + except Exception as error: + return False, error + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + return [BalanceStream(config), IncrementalTransactionStream(config)] diff --git a/airbyte-integrations/connectors/source-plaid/spec.json b/airbyte-integrations/connectors/source-plaid/source_plaid/spec.json similarity index 92% rename from airbyte-integrations/connectors/source-plaid/spec.json rename to airbyte-integrations/connectors/source-plaid/source_plaid/spec.json index 83717bf528a13..e8d1289aa0af6 100644 --- a/airbyte-integrations/connectors/source-plaid/spec.json +++ b/airbyte-integrations/connectors/source-plaid/source_plaid/spec.json @@ -3,7 +3,7 @@ "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", - "required": ["access_token", "api_key", "client_id"], + "required": ["access_token", "api_key", "client_id", "plaid_env"], "additionalProperties": false, "properties": { "access_token": { diff --git a/docs/integrations/sources/plaid.md b/docs/integrations/sources/plaid.md index 6731b4887ec81..b22a5752734a6 100644 --- a/docs/integrations/sources/plaid.md +++ b/docs/integrations/sources/plaid.md @@ -64,3 +64,6 @@ This guide will walk through how to create the credentials you need to run this ``` * We should now have everything we need to configure this source in the UI. +| Version | Date | Pull Request | Subject | +| :--- | :--- | :--- | :--- | +| 0.3.0 | 2022-01-05 | [7977](https://github.com/airbytehq/airbyte/pull/7977) | Migrate to Python CDK + add transaction stream | From ceaa1a478e8dda108f157360464450025db89cc1 Mon Sep 17 00:00:00 2001 From: oneshcheret <33333155+sashaNeshcheret@users.noreply.github.com> Date: Mon, 10 Jan 2022 14:30:52 +0200 Subject: [PATCH 079/215] =?UTF-8?q?=F0=9F=90=9B=20Snowflake=20destination:?= =?UTF-8?q?=20do=20not=20create=20schema=20if=20it=20exists=20(#9311)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Snowflake destination: do not create schema if it exists * Snowflake destination: move common constants to parent class * Snowflake destination: bump version * Snowflake destination: bump version --- .../424892c4-daac-4491-b35d-c6688ba547ba.json | 2 +- .../seed/destination_definitions.yaml | 2 +- .../destination/jdbc/JdbcSqlOperations.java | 6 ++++- .../destination/jdbc/SqlOperations.java | 12 +++++++++ .../destination-snowflake/Dockerfile | 2 +- .../snowflake/SnowflakeSqlOperations.java | 4 +++ .../SnowflakeStagingSqlOperations.java | 5 ++++ docs/integrations/destinations/snowflake.md | 27 ++++++++++--------- 8 files changed, 43 insertions(+), 17 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json index 0c5a32eba6cf7..2a2d650814261 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "424892c4-daac-4491-b35d-c6688ba547ba", "name": "Snowflake", "dockerRepository": "airbyte/destination-snowflake", - "dockerImageTag": "0.3.23", + "dockerImageTag": "0.4.1", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/snowflake", "icon": "snowflake.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 694449982dea6..78eb2c4e61621 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -179,7 +179,7 @@ - name: Snowflake destinationDefinitionId: 424892c4-daac-4491-b35d-c6688ba547ba dockerRepository: airbyte/destination-snowflake - dockerImageTag: 0.4.0 + dockerImageTag: 0.4.1 documentationUrl: https://docs.airbyte.io/integrations/destinations/snowflake icon: snowflake.svg - name: MariaDB ColumnStore diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcSqlOperations.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcSqlOperations.java index 4dd3e2638e7a6..5cb27816d30d4 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcSqlOperations.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcSqlOperations.java @@ -25,10 +25,14 @@ public abstract class JdbcSqlOperations implements SqlOperations { private static final Logger LOGGER = LoggerFactory.getLogger(JdbcSqlOperations.class); + protected static final String SHOW_SCHEMAS = "show schemas;"; + protected static final String NAME = "name"; @Override public void createSchemaIfNotExists(final JdbcDatabase database, final String schemaName) throws Exception { - database.execute(createSchemaQuery(schemaName)); + if (!isSchemaExists(database, schemaName)) { + database.execute(createSchemaQuery(schemaName));; + } } private String createSchemaQuery(final String schemaName) { diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/SqlOperations.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/SqlOperations.java index 3b6656f04b97b..81ed02d013722 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/SqlOperations.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/SqlOperations.java @@ -22,6 +22,18 @@ public interface SqlOperations { */ void createSchemaIfNotExists(JdbcDatabase database, String schemaName) throws Exception; + /** + * Denotes whether the schema exists in destination database + * + * @param database Database that the connector is syncing + * @param schemaName Name of schema. + * + * @return true if the schema exists in destination database, false if it doesn't + */ + default boolean isSchemaExists(final JdbcDatabase database, final String schemaName) throws Exception { + return false; + } + /** * Create a table with provided name in provided schema if it does not already exist. * diff --git a/airbyte-integrations/connectors/destination-snowflake/Dockerfile b/airbyte-integrations/connectors/destination-snowflake/Dockerfile index 18e82989befd7..e2709a5ab124b 100644 --- a/airbyte-integrations/connectors/destination-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/destination-snowflake/Dockerfile @@ -18,5 +18,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.4.0 +LABEL io.airbyte.version=0.4.1 LABEL io.airbyte.name=airbyte/destination-snowflake diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperations.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperations.java index 97a807a444c4f..ba4f068b41f3d 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperations.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperations.java @@ -31,6 +31,10 @@ public void createTableIfNotExists(final JdbcDatabase database, final String sch database.execute(createTableQuery); } + public boolean isSchemaExists(JdbcDatabase database, String outputSchema) throws Exception { + return database.query(SHOW_SCHEMAS).map(schemas -> schemas.get(NAME).asText()).anyMatch(outputSchema::equalsIgnoreCase); + } + @Override public void insertRecordsInternal(final JdbcDatabase database, final List records, diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java index 50ca8c57d8e8f..07b811b1d7c99 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java @@ -77,4 +77,9 @@ public String createTableQuery(final JdbcDatabase database, final String schemaN schemaName, tableName, JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_DATA, JavaBaseConstants.COLUMN_NAME_EMITTED_AT); } + @Override + public boolean isSchemaExists(JdbcDatabase database, String outputSchema) throws Exception { + return database.query(SHOW_SCHEMAS).map(schemas -> schemas.get(NAME).asText()).anyMatch(outputSchema::equalsIgnoreCase); + } + } diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index d2435ccc8e5e9..2c78d003fc1a1 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -196,21 +196,22 @@ The final query should show a `STORAGE_GCP_SERVICE_ACCOUNT` property with an ema Finally, you need to add read/write permissions to your bucket with that email. -| Version | Date | Pull Request | Subject | -|:--------| :-------- | :----- | :------ | -| 0.4.0 | 2021-12-27 | [#9063](https://github.com/airbytehq/airbyte/pull/9063) | Updated normalization to produce permanent tables | +| Version | Date | Pull Request | Subject | +|:--------|:-----------| :----- | :------ | +| 0.4.1 | 2021-01-06 | [#9311](https://github.com/airbytehq/airbyte/pull/9311) | Update сreating schema during check | +| 0.4.0 | 2021-12-27 | [#9063](https://github.com/airbytehq/airbyte/pull/9063) | Updated normalization to produce permanent tables | | 0.3.24 | 2021-12-23 | [#8869](https://github.com/airbytehq/airbyte/pull/8869) | Changed staging approach to Byte-Buffered | | 0.3.23 | 2021-12-22 | [#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration in UI for S3 loading method | | 0.3.22 | 2021-12-21 | [#9006](https://github.com/airbytehq/airbyte/pull/9006) | Updated jdbc schema naming to follow Snowflake Naming Conventions | | 0.3.21 | 2021-12-15 | [#8781](https://github.com/airbytehq/airbyte/pull/8781) | Updated check method to verify permissions to create/drop stage for internal staging; compatibility fix for Java 17 | -| 0.3.20 | 2021-12-10 | [#8562](https://github.com/airbytehq/airbyte/pull/8562) | Moving classes around for better dependency management; compatibility fix for Java 17 | -| 0.3.19 | 2021-12-06 | [#8528](https://github.com/airbytehq/airbyte/pull/8528) | Set Internal Staging as default choice | -| 0.3.18 | 2021-11-26 | [#8253](https://github.com/airbytehq/airbyte/pull/8253) | Snowflake Internal Staging Support | -| 0.3.17 | 2021-11-08 | [#7719](https://github.com/airbytehq/airbyte/pull/7719) | Improve handling of wide rows by buffering records based on their byte size rather than their count | -| 0.3.15 | 2021-10-11 | [#6949](https://github.com/airbytehq/airbyte/pull/6949) | Each stream was split into files of 10,000 records each for copying using S3 or GCS | -| 0.3.14 | 2021-09-08 | [#5924](https://github.com/airbytehq/airbyte/pull/5924) | Fixed AWS S3 Staging COPY is writing records from different table in the same raw table | -| 0.3.13 | 2021-09-01 | [#5784](https://github.com/airbytehq/airbyte/pull/5784) | Updated query timeout from 30 minutes to 3 hours | -| 0.3.12 | 2021-07-30 | [#5125](https://github.com/airbytehq/airbyte/pull/5125) | Enable `additionalPropertities` in spec.json | -| 0.3.11 | 2021-07-21 | [#3555](https://github.com/airbytehq/airbyte/pull/3555) | Partial Success in BufferedStreamConsumer | -| 0.3.10 | 2021-07-12 | [#4713](https://github.com/airbytehq/airbyte/pull/4713)| Tag traffic with `airbyte` label to enable optimization opportunities from Snowflake | +| 0.3.20 | 2021-12-10 | [#8562](https://github.com/airbytehq/airbyte/pull/8562) | Moving classes around for better dependency management; compatibility fix for Java 17 | +| 0.3.19 | 2021-12-06 | [#8528](https://github.com/airbytehq/airbyte/pull/8528) | Set Internal Staging as default choice | +| 0.3.18 | 2021-11-26 | [#8253](https://github.com/airbytehq/airbyte/pull/8253) | Snowflake Internal Staging Support | +| 0.3.17 | 2021-11-08 | [#7719](https://github.com/airbytehq/airbyte/pull/7719) | Improve handling of wide rows by buffering records based on their byte size rather than their count | +| 0.3.15 | 2021-10-11 | [#6949](https://github.com/airbytehq/airbyte/pull/6949) | Each stream was split into files of 10,000 records each for copying using S3 or GCS | +| 0.3.14 | 2021-09-08 | [#5924](https://github.com/airbytehq/airbyte/pull/5924) | Fixed AWS S3 Staging COPY is writing records from different table in the same raw table | +| 0.3.13 | 2021-09-01 | [#5784](https://github.com/airbytehq/airbyte/pull/5784) | Updated query timeout from 30 minutes to 3 hours | +| 0.3.12 | 2021-07-30 | [#5125](https://github.com/airbytehq/airbyte/pull/5125) | Enable `additionalPropertities` in spec.json | +| 0.3.11 | 2021-07-21 | [#3555](https://github.com/airbytehq/airbyte/pull/3555) | Partial Success in BufferedStreamConsumer | +| 0.3.10 | 2021-07-12 | [#4713](https://github.com/airbytehq/airbyte/pull/4713)| Tag traffic with `airbyte` label to enable optimization opportunities from Snowflake | From 4137a3427fa684f6d60da7729f7092c2a07dc552 Mon Sep 17 00:00:00 2001 From: Charles Date: Mon, 10 Jan 2022 07:47:48 -0800 Subject: [PATCH 080/215] on upgrade of airbyte, explain logic for upgrading connectors (#9291) --- docs/operator-guides/upgrading-airbyte.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 027a90826be38..52d978da25808 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -4,6 +4,8 @@ This tutorial will describe how to determine if you need to run this upgrade process, and if you do, how to do so. This process does require temporarily turning off Airbyte. +When Airbyte is upgraded, it will attempt to upgrade some connector versions. It follows the following rules: 1. if a connector is not used, it will be upgraded to the latest version 2. if a connector is used, it will NOT be upgraded to avoid disrupting working workflows. If you want to upgrade a connector, do so in the settings page in the webapp. + ## Determining if you need to Upgrade Airbyte intelligently performs upgrades automatically based off of your version defined in your `.env` file and will handle data migration for you. From 58253cdedb85680fe83040858ee145ed0db2317a Mon Sep 17 00:00:00 2001 From: Augustin Date: Mon, 10 Jan 2022 17:11:44 +0100 Subject: [PATCH 081/215] octavia-cli: replace 3.10 occurences with 3.8 (#9376) --- octavia-cli/README.md | 2 +- octavia-cli/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/octavia-cli/README.md b/octavia-cli/README.md index a7a61d02a23a3..4bcc11dac808c 100644 --- a/octavia-cli/README.md +++ b/octavia-cli/README.md @@ -41,7 +41,7 @@ Summary of achievements: | 2021-12-22 | Bootstrapping the project's code base | # Developing locally -1. Install Python 3.10.0. We suggest doing it through `pyenv` +1. Install Python 3.8.12. We suggest doing it through `pyenv` 2. Create a virtualenv: `python -m venv .venv` 3. Activate the virtualenv: `source .venv/bin/activate` 4. Install dev dependencies: `pip install -e .\[dev\]` diff --git a/octavia-cli/setup.py b/octavia-cli/setup.py index 93534a6e851a6..e5ad552eb3d9d 100644 --- a/octavia-cli/setup.py +++ b/octavia-cli/setup.py @@ -31,7 +31,7 @@ "Topic :: Software Development :: Libraries :: Python Modules", "License :: OSI Approved :: MIT License", # Python Version Support - "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.8", ], keywords="airbyte cli command-line-interface configuration", project_urls={ From a3557fc43ef2c461b48f472abc50affc5ce744fd Mon Sep 17 00:00:00 2001 From: oneshcheret <33333155+sashaNeshcheret@users.noreply.github.com> Date: Mon, 10 Jan 2022 18:17:03 +0200 Subject: [PATCH 082/215] Snowflake destination: update destination_specs (#9388) --- .../init/src/main/resources/seed/destination_specs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index a99171e40b825..76f1641de17ef 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -3767,7 +3767,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-snowflake:0.4.0" +- dockerImage: "airbyte/destination-snowflake:0.4.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" connectionSpecification: From 3eb64732ab5beb18f26563972c55b2bedbe0411e Mon Sep 17 00:00:00 2001 From: Harsha Teja Kanna Date: Mon, 10 Jan 2022 11:42:46 -0600 Subject: [PATCH 083/215] Fix JDK version to 17 (#9390) --- airbyte-bootloader/Dockerfile | 2 +- airbyte-container-orchestrator/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-bootloader/Dockerfile b/airbyte-bootloader/Dockerfile index 4bb6ba12ebe05..05c9912913f43 100644 --- a/airbyte-bootloader/Dockerfile +++ b/airbyte-bootloader/Dockerfile @@ -1,4 +1,4 @@ -ARG JDK_VERSION=14.0.2 +ARG JDK_VERSION=17.0.1 FROM openjdk:${JDK_VERSION}-slim ENV APPLICATION airbyte-bootloader diff --git a/airbyte-container-orchestrator/Dockerfile b/airbyte-container-orchestrator/Dockerfile index 303ccb61a1c64..434cf1cc8e2ca 100644 --- a/airbyte-container-orchestrator/Dockerfile +++ b/airbyte-container-orchestrator/Dockerfile @@ -1,4 +1,4 @@ -ARG JDK_VERSION=14.0.2 +ARG JDK_VERSION=17.0.1 FROM openjdk:${JDK_VERSION}-slim AS sync-attempt ARG DOCKER_BUILD_ARCH=amd64 From 1054e7e91604350d95b9ddb7b2383ccaac830fb7 Mon Sep 17 00:00:00 2001 From: VitaliiMaltsev <39538064+VitaliiMaltsev@users.noreply.github.com> Date: Mon, 10 Jan 2022 21:26:38 +0200 Subject: [PATCH 084/215] Destination Snowflake : fixed duplicate rows on retries (#9141) * fix for jdk 17 * Destination Snowflake: duplicate rows on retries * added changelog * fix checkstyle * replace concat with + * replaced static fields and methods with non-static * bump version Co-authored-by: vmaltsev --- .../424892c4-daac-4491-b35d-c6688ba547ba.json | 2 +- .../seed/destination_definitions.yaml | 2 +- .../destination-snowflake/Dockerfile | 2 +- ...owflakeInternalStagingConsumerFactory.java | 29 ++++++++++++------- .../SnowflakeInternalStagingDestination.java | 2 +- .../SnowflakeSQLNameTransformer.java | 3 ++ .../SnowflakeStagingSqlOperations.java | 5 +++- docs/integrations/destinations/snowflake.md | 1 + 8 files changed, 31 insertions(+), 15 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json index 2a2d650814261..b90ab00a62c1c 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "424892c4-daac-4491-b35d-c6688ba547ba", "name": "Snowflake", "dockerRepository": "airbyte/destination-snowflake", - "dockerImageTag": "0.4.1", + "dockerImageTag": "0.4.2", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/snowflake", "icon": "snowflake.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 78eb2c4e61621..de378c18a3b4b 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -179,7 +179,7 @@ - name: Snowflake destinationDefinitionId: 424892c4-daac-4491-b35d-c6688ba547ba dockerRepository: airbyte/destination-snowflake - dockerImageTag: 0.4.1 + dockerImageTag: 0.4.2 documentationUrl: https://docs.airbyte.io/integrations/destinations/snowflake icon: snowflake.svg - name: MariaDB ColumnStore diff --git a/airbyte-integrations/connectors/destination-snowflake/Dockerfile b/airbyte-integrations/connectors/destination-snowflake/Dockerfile index e2709a5ab124b..f854615fb1c72 100644 --- a/airbyte-integrations/connectors/destination-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/destination-snowflake/Dockerfile @@ -18,5 +18,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.4.1 +LABEL io.airbyte.version=0.4.2 LABEL io.airbyte.name=airbyte/destination-snowflake diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java index f6e1ba0f3b3a0..74f1e2358c1c8 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; @@ -45,8 +46,9 @@ public class SnowflakeInternalStagingConsumerFactory { private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeInternalStagingConsumerFactory.class); private static final long MAX_BATCH_SIZE_BYTES = 1024 * 1024 * 1024 / 4; // 256mb + private final String CURRENT_SYNC_PATH = UUID.randomUUID().toString(); - public static AirbyteMessageConsumer create(final Consumer outputRecordCollector, + public AirbyteMessageConsumer create(final Consumer outputRecordCollector, final JdbcDatabase database, final SnowflakeStagingSqlOperations sqlOperations, final SnowflakeSQLNameTransformer namingResolver, @@ -130,7 +132,7 @@ private static AirbyteStreamNameNamespacePair toNameNamespacePair(final WriteCon return new AirbyteStreamNameNamespacePair(config.getStreamName(), config.getNamespace()); } - private static RecordWriter recordWriterFunction(final JdbcDatabase database, + private RecordWriter recordWriterFunction(final JdbcDatabase database, final SqlOperations snowflakeSqlOperations, final List writeConfigs, final ConfiguredAirbyteCatalog catalog, @@ -149,13 +151,13 @@ private static RecordWriter recordWriterFunction(final JdbcDatabase database, final WriteConfig writeConfig = pairToWriteConfig.get(pair); final String schemaName = writeConfig.getOutputSchemaName(); final String tableName = writeConfig.getOutputTableName(); - final String stageName = namingResolver.getStageName(schemaName, tableName); + final String path = namingResolver.getStagingPath(schemaName, tableName, CURRENT_SYNC_PATH); - snowflakeSqlOperations.insertRecords(database, records, schemaName, stageName); + snowflakeSqlOperations.insertRecords(database, records, schemaName, path); }; } - private static OnCloseFunction onCloseFunction(final JdbcDatabase database, + private OnCloseFunction onCloseFunction(final JdbcDatabase database, final SnowflakeStagingSqlOperations sqlOperations, final List writeConfigs, final SnowflakeSQLNameTransformer namingResolver) { @@ -170,11 +172,18 @@ private static OnCloseFunction onCloseFunction(final JdbcDatabase database, LOGGER.info("Finalizing stream {}. schema {}, tmp table {}, final table {}", writeConfig.getStreamName(), schemaName, srcTableName, dstTableName); - final String stageName = namingResolver.getStageName(schemaName, dstTableName); - sqlOperations.copyIntoTmpTableFromStage(database, stageName, srcTableName, schemaName); - LOGGER.info("Uploading data from stage: stream {}. schema {}, tmp table {}, stage {}", writeConfig.getStreamName(), schemaName, - srcTableName, - stageName); + final String path = namingResolver.getStagingPath(schemaName, dstTableName, CURRENT_SYNC_PATH); + LOGGER.info("Uploading data from stage: stream {}. schema {}, tmp table {}, stage path {}", writeConfig.getStreamName(), schemaName, + srcTableName, + path); + try { + sqlOperations.copyIntoTmpTableFromStage(database, path, srcTableName, schemaName); + } catch (Exception e){ + sqlOperations.cleanUpStage(database, path); + LOGGER.info("Cleaning stage path {}", path); + throw new RuntimeException("Failed to upload data from stage "+ path, e); + } + sqlOperations.createTableIfNotExists(database, schemaName, dstTableName); switch (writeConfig.getSyncMode()) { case OVERWRITE -> queryList.add(sqlOperations.truncateTableQuery(database, schemaName, dstTableName)); diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java index c20a6905b2941..e1db16e984a31 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java @@ -71,7 +71,7 @@ public JsonNode toJdbcConfig(final JsonNode config) { public AirbyteMessageConsumer getConsumer(final JsonNode config, final ConfiguredAirbyteCatalog catalog, final Consumer outputRecordCollector) { - return SnowflakeInternalStagingConsumerFactory.create(outputRecordCollector, getDatabase(config), + return new SnowflakeInternalStagingConsumerFactory().create(outputRecordCollector, getDatabase(config), new SnowflakeStagingSqlOperations(), new SnowflakeSQLNameTransformer(), config, catalog); } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java index 420d03e709412..18c97338f39a6 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java @@ -17,4 +17,7 @@ public String getStageName(String schemaName, String outputTableName) { return schemaName.concat(outputTableName).replaceAll("-", "_").toUpperCase(); } + public String getStagingPath(String schemaName, String tableName, String currentSyncPath) { + return (getStageName(schemaName,tableName)+"/staged/"+currentSyncPath).toUpperCase(); + } } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java index 07b811b1d7c99..6fa6a7c65df55 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java @@ -77,9 +77,12 @@ public String createTableQuery(final JdbcDatabase database, final String schemaN schemaName, tableName, JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_DATA, JavaBaseConstants.COLUMN_NAME_EMITTED_AT); } + public void cleanUpStage(JdbcDatabase database, String path) throws SQLException { + database.execute(String.format("REMOVE @%s;", path)); + } + @Override public boolean isSchemaExists(JdbcDatabase database, String outputSchema) throws Exception { return database.query(SHOW_SCHEMAS).map(schemas -> schemas.get(NAME).asText()).anyMatch(outputSchema::equalsIgnoreCase); } - } diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index 2c78d003fc1a1..8c9885b71a862 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -198,6 +198,7 @@ Finally, you need to add read/write permissions to your bucket with that email. | Version | Date | Pull Request | Subject | |:--------|:-----------| :----- | :------ | +| 0.4.2 | 2022-01-10 | [#9141](https://github.com/airbytehq/airbyte/pull/9141) | Fixed duplicate rows on retries | | 0.4.1 | 2021-01-06 | [#9311](https://github.com/airbytehq/airbyte/pull/9311) | Update сreating schema during check | | 0.4.0 | 2021-12-27 | [#9063](https://github.com/airbytehq/airbyte/pull/9063) | Updated normalization to produce permanent tables | | 0.3.24 | 2021-12-23 | [#8869](https://github.com/airbytehq/airbyte/pull/8869) | Changed staging approach to Byte-Buffered | From 44cb30aca5a3532dc2688200cf28d04fe543336b Mon Sep 17 00:00:00 2001 From: Eugene Date: Mon, 10 Jan 2022 22:00:16 +0200 Subject: [PATCH 085/215] =?UTF-8?q?=F0=9F=90=9BDestination-gcs\destination?= =?UTF-8?q?-bigquery(gcs)=20-=20updated=20check()=20method=20to=20handle?= =?UTF-8?q?=20that=20user=20has=20both=20storage.objects.create=20and=20st?= =?UTF-8?q?orage.multipartUploads.create=20roles=20(#9121)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [9044] Destination-gcs\destination-bigquery(gcs) - updated check() method to handle that user has both storage.objects.create and storage.multipartUploads.create roles --- .../airbyte_cdk/models/airbyte_protocol.py | 5 +- .../22f6c74f-5699-40ff-833c-4a879ea40133.json | 2 +- .../ca8f6566-e555-4b40-943a-545bf123117a.json | 2 +- .../seed/destination_definitions.yaml | 4 +- .../resources/seed/destination_specs.yaml | 12 +-- .../models/airbyte_protocol.py | 2 +- .../DestinationAcceptanceTest.java | 2 +- .../destination-bigquery/Dockerfile | 2 +- .../connectors/destination-gcs/Dockerfile | 2 +- .../connectors/destination-gcs/README.md | 3 +- .../connectors/destination-gcs/build.gradle | 1 + .../insufficient_roles_config.json | 10 +++ .../destination/gcs/GcsDestination.java | 74 ++++++++++++++++++- .../gcs/GcsDestinationAcceptanceTest.java | 32 +++++++- ...wflakeInsertDestinationAcceptanceTest.java | 3 +- docs/integrations/destinations/bigquery.md | 5 +- docs/integrations/destinations/gcs.md | 1 + 17 files changed, 135 insertions(+), 27 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-gcs/sample_secrets/insufficient_roles_config.json diff --git a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py index ed89e1d7b4416..39680a330c04a 100644 --- a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py +++ b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py @@ -240,10 +240,7 @@ class Config: ) spec: Optional[ConnectorSpecification] = None connectionStatus: Optional[AirbyteConnectionStatus] = None - catalog: Optional[AirbyteCatalog] = Field( - None, - description="log message: any kind of logging you want the platform to know about.", - ) + catalog: Optional[AirbyteCatalog] = Field(None, description="catalog message: the calalog") record: Optional[AirbyteRecordMessage] = Field(None, description="record message: the record") state: Optional[AirbyteStateMessage] = Field( None, diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json index 1f67b317fd096..d69347400d6f7 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "22f6c74f-5699-40ff-833c-4a879ea40133", "name": "BigQuery", "dockerRepository": "airbyte/destination-bigquery", - "dockerImageTag": "0.6.1", + "dockerImageTag": "0.6.2", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery", "icon": "bigquery.svg" } diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json index 32253be7ffb91..848186a281e75 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "ca8f6566-e555-4b40-943a-545bf123117a", "name": "Google Cloud Storage (GCS)", "dockerRepository": "airbyte/destination-gcs", - "dockerImageTag": "0.1.17", + "dockerImageTag": "0.1.19", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/gcs", "icon": "googlecloudstorage.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index de378c18a3b4b..7f0fbadec12d4 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -13,7 +13,7 @@ - name: BigQuery destinationDefinitionId: 22f6c74f-5699-40ff-833c-4a879ea40133 dockerRepository: airbyte/destination-bigquery - dockerImageTag: 0.6.1 + dockerImageTag: 0.6.2 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery icon: bigquery.svg - name: BigQuery (denormalized typed struct) @@ -60,7 +60,7 @@ - name: Google Cloud Storage (GCS) destinationDefinitionId: ca8f6566-e555-4b40-943a-545bf123117a dockerRepository: airbyte/destination-gcs - dockerImageTag: 0.1.18 + dockerImageTag: 0.1.19 documentationUrl: https://docs.airbyte.io/integrations/destinations/gcs icon: googlecloudstorage.svg - name: Google PubSub diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 76f1641de17ef..cf247c52ed702 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -176,7 +176,7 @@ supportsDBT: false supported_destination_sync_modes: - "append" -- dockerImage: "airbyte/destination-bigquery:0.6.1" +- dockerImage: "airbyte/destination-bigquery:0.6.2" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" connectionSpecification: @@ -261,7 +261,7 @@ type: "string" description: "When running custom transformations or Basic normalization,\ \ running queries on interactive mode can hit BQ limits, choosing batch\ - \ will solve those limitss." + \ will solve those limits." title: "Transformation Query Run Type" default: "interactive" enum: @@ -311,10 +311,12 @@ title: "Block Size (MB) for GCS multipart upload" description: "This is the size of a \"Part\" being buffered in memory.\ \ It limits the memory usage when writing. Larger values will allow\ - \ to upload a bigger files and improve the speed, but consumes9\ - \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + \ to upload a bigger files and improve the speed, but consumes more\ + \ memory. Allowed values: min=5MB, max=525MB Default: 5MB." type: "integer" default: 5 + minimum: 5 + maximum: 525 examples: - 5 keep_files_in_gcs-bucket: @@ -1141,7 +1143,7 @@ - "overwrite" - "append" supportsNamespaces: true -- dockerImage: "airbyte/destination-gcs:0.1.18" +- dockerImage: "airbyte/destination-gcs:0.1.19" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/gcs" connectionSpecification: diff --git a/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py b/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py index 0d94c33737a69..25c507c4d82ac 100644 --- a/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py +++ b/airbyte-integrations/bases/airbyte-protocol/airbyte_protocol/models/airbyte_protocol.py @@ -226,7 +226,7 @@ class Config: log: Optional[AirbyteLogMessage] = Field(None, description="log message: any kind of logging you want the platform to know about.") spec: Optional[ConnectorSpecification] = None connectionStatus: Optional[AirbyteConnectionStatus] = None - catalog: Optional[AirbyteCatalog] = Field(None, description="log message: any kind of logging you want the platform to know about.") + catalog: Optional[AirbyteCatalog] = Field(None, description="catalog message: the calalog") record: Optional[AirbyteRecordMessage] = Field(None, description="record message: the record") state: Optional[AirbyteStateMessage] = Field( None, description="schema message: the state. Must be the last message produced. The platform uses this information" diff --git a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java index 7d84d1b4ac197..52f4633168f91 100644 --- a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java +++ b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java @@ -930,7 +930,7 @@ workerConfigs, new AirbyteIntegrationLauncher(JOB_ID, JOB_ATTEMPT, getImageName( .run(new JobGetSpecConfig().withDockerImage(getImageName()), jobRoot); } - private StandardCheckConnectionOutput runCheck(final JsonNode config) throws WorkerException { + protected StandardCheckConnectionOutput runCheck(final JsonNode config) throws WorkerException { return new DefaultCheckConnectionWorker( workerConfigs, new AirbyteIntegrationLauncher(JOB_ID, JOB_ATTEMPT, getImageName(), processFactory, null)) .run(new StandardCheckConnectionInput().withConnectionConfiguration(config), jobRoot); diff --git a/airbyte-integrations/connectors/destination-bigquery/Dockerfile b/airbyte-integrations/connectors/destination-bigquery/Dockerfile index b174182a586fc..e4bb7588c7ca2 100644 --- a/airbyte-integrations/connectors/destination-bigquery/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-bigquery COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.6.1 +LABEL io.airbyte.version=0.6.2 LABEL io.airbyte.name=airbyte/destination-bigquery diff --git a/airbyte-integrations/connectors/destination-gcs/Dockerfile b/airbyte-integrations/connectors/destination-gcs/Dockerfile index f92f19ab11915..793b2e0cead17 100644 --- a/airbyte-integrations/connectors/destination-gcs/Dockerfile +++ b/airbyte-integrations/connectors/destination-gcs/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-gcs COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.18 +LABEL io.airbyte.version=0.1.19 LABEL io.airbyte.name=airbyte/destination-gcs diff --git a/airbyte-integrations/connectors/destination-gcs/README.md b/airbyte-integrations/connectors/destination-gcs/README.md index fbd7eaec1ccd4..62ae22ab7d598 100644 --- a/airbyte-integrations/connectors/destination-gcs/README.md +++ b/airbyte-integrations/connectors/destination-gcs/README.md @@ -14,7 +14,8 @@ As a community contributor, you can follow these steps to run integration tests. ## Airbyte Employee -- Access the `destination gcs creds` secrets on Last Pass, and put it in `sample_secrets/config.json`. +- Access the `SECRET_DESTINATION-GCS__CREDS` secrets on SecretManager, and put it in `sample_secrets/config.json`. +_ Access the `SECRET_DESTINATION-GCS_NO_MULTIPART_ROLE_CREDS` secrets on SecretManager, and put it in `sample_secrets/insufficient_roles_config.json`. - Rename the directory from `sample_secrets` to `secrets`. ### GCP Service Account for Testing diff --git a/airbyte-integrations/connectors/destination-gcs/build.gradle b/airbyte-integrations/connectors/destination-gcs/build.gradle index 36c46d80ffc5b..291042e64051a 100644 --- a/airbyte-integrations/connectors/destination-gcs/build.gradle +++ b/airbyte-integrations/connectors/destination-gcs/build.gradle @@ -40,4 +40,5 @@ dependencies { integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-gcs') + integrationTestJavaImplementation project(':airbyte-workers') } diff --git a/airbyte-integrations/connectors/destination-gcs/sample_secrets/insufficient_roles_config.json b/airbyte-integrations/connectors/destination-gcs/sample_secrets/insufficient_roles_config.json new file mode 100644 index 0000000000000..778287b9b3159 --- /dev/null +++ b/airbyte-integrations/connectors/destination-gcs/sample_secrets/insufficient_roles_config.json @@ -0,0 +1,10 @@ +{ + "gcs_bucket_name": "", + "gcs_bucket_path": "integration-test", + "gcs_bucket_region": "", + "credential": { + "credential_type": "HMAC_KEY", + "hmac_key_access_id": "", + "hmac_key_secret": "" + } +} diff --git a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/GcsDestination.java b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/GcsDestination.java index 0a09a17f9eb92..d2d960fac8a01 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/GcsDestination.java +++ b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/GcsDestination.java @@ -5,6 +5,9 @@ package io.airbyte.integrations.destination.gcs; import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.transfer.TransferManager; +import com.amazonaws.services.s3.transfer.TransferManagerBuilder; +import com.amazonaws.services.s3.transfer.Upload; import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.integrations.BaseConnector; import io.airbyte.integrations.base.AirbyteMessageConsumer; @@ -16,13 +19,22 @@ import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; import java.util.function.Consumer; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class GcsDestination extends BaseConnector implements Destination { private static final Logger LOGGER = LoggerFactory.getLogger(GcsDestination.class); + public static final String EXPECTED_ROLES = "storage.multipartUploads.abort, storage.multipartUploads.create, " + + "storage.objects.create, storage.objects.delete, storage.objects.get, storage.objects.list"; + + public static final String CHECK_ACTIONS_TMP_FILE_NAME = "test"; + public static final String DUMMY_TEXT = "This is just a dummy text to write to test file"; public static void main(final String[] args) throws Exception { new IntegrationRunner(new GcsDestination()).run(args); @@ -31,13 +43,21 @@ public static void main(final String[] args) throws Exception { @Override public AirbyteConnectionStatus check(final JsonNode config) { try { - final GcsDestinationConfig destinationConfig = GcsDestinationConfig.getGcsDestinationConfig(config); + final GcsDestinationConfig destinationConfig = GcsDestinationConfig + .getGcsDestinationConfig(config); final AmazonS3 s3Client = GcsS3Helper.getGcsS3Client(destinationConfig); - s3Client.putObject(destinationConfig.getBucketName(), "test", "check-content"); - s3Client.deleteObject(destinationConfig.getBucketName(), "test"); + + // Test single Upload (for small files) permissions + testSingleUpload(s3Client, destinationConfig); + + // Test Multipart Upload permissions + testMultipartUpload(s3Client, destinationConfig); + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); } catch (final Exception e) { LOGGER.error("Exception attempting to access the Gcs bucket: {}", e.getMessage()); + LOGGER.error("Please make sure you account has all of these roles: " + EXPECTED_ROLES); + return new AirbyteConnectionStatus() .withStatus(AirbyteConnectionStatus.Status.FAILED) .withMessage("Could not connect to the Gcs bucket with the provided configuration. \n" + e @@ -45,12 +65,58 @@ public AirbyteConnectionStatus check(final JsonNode config) { } } + private void testSingleUpload(final AmazonS3 s3Client, final GcsDestinationConfig destinationConfig) { + LOGGER.info("Started testing if all required credentials assigned to user for single file uploading"); + s3Client.putObject(destinationConfig.getBucketName(), CHECK_ACTIONS_TMP_FILE_NAME, DUMMY_TEXT); + s3Client.deleteObject(destinationConfig.getBucketName(), CHECK_ACTIONS_TMP_FILE_NAME); + LOGGER.info("Finished checking for normal upload mode"); + } + + private void testMultipartUpload(final AmazonS3 s3Client, final GcsDestinationConfig destinationConfig) + throws Exception { + + LOGGER.info("Started testing if all required credentials assigned to user for Multipart upload"); + final TransferManager tm = TransferManagerBuilder.standard() + .withS3Client(s3Client) + // Sets the size threshold, in bytes, for when to use multipart uploads. Uploads over this size will + // automatically use a multipart upload strategy, while uploads smaller than this threshold will use + // a single connection to upload the whole object. So we need to set it as small for testing + // connection. See javadoc for more details. + .withMultipartUploadThreshold(1024L) // set 1KB as part size + .build(); + + try { + // TransferManager processes all transfers asynchronously, + // so this call returns immediately. + final Upload upload = tm.upload(destinationConfig.getBucketName(), CHECK_ACTIONS_TMP_FILE_NAME, getTmpFileToUpload()); + upload.waitForCompletion(); + s3Client.deleteObject(destinationConfig.getBucketName(), CHECK_ACTIONS_TMP_FILE_NAME); + } finally { + tm.shutdownNow(true); + } + LOGGER.info("Finished verification for multipart upload mode"); + } + + private File getTmpFileToUpload() throws IOException { + final File tmpFile = File.createTempFile(CHECK_ACTIONS_TMP_FILE_NAME, ".tmp"); + try (final FileWriter writer = new FileWriter(tmpFile)) { + // Text should be bigger than Threshold's size to make client use a multipart upload strategy, + // smaller than threshold will use a single connection to upload the whole object even if multipart + // upload option is ON. See {@link TransferManagerBuilder#withMultipartUploadThreshold} + // javadoc for more information. + + writer.write(StringUtils.repeat(DUMMY_TEXT, 1000)); + } + return tmpFile; + } + @Override public AirbyteMessageConsumer getConsumer(final JsonNode config, final ConfiguredAirbyteCatalog configuredCatalog, final Consumer outputRecordCollector) { final GcsWriterFactory formatterFactory = new ProductionWriterFactory(); - return new GcsConsumer(GcsDestinationConfig.getGcsDestinationConfig(config), configuredCatalog, formatterFactory, outputRecordCollector); + return new GcsConsumer(GcsDestinationConfig.getGcsDestinationConfig(config), configuredCatalog, + formatterFactory, outputRecordCollector); } } diff --git a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java index 13bc2ec2c3791..097eb97a2c566 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java @@ -4,6 +4,8 @@ package io.airbyte.integrations.destination.gcs; +import static org.junit.jupiter.api.Assertions.assertEquals; + import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; import com.amazonaws.services.s3.model.S3ObjectSummary; @@ -13,6 +15,7 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; +import io.airbyte.config.StandardCheckConnectionOutput.Status; import io.airbyte.integrations.destination.s3.S3DestinationConstants; import io.airbyte.integrations.destination.s3.S3Format; import io.airbyte.integrations.destination.s3.S3FormatConfig; @@ -25,6 +28,7 @@ import java.util.Locale; import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,7 +48,8 @@ public abstract class GcsDestinationAcceptanceTest extends DestinationAcceptance protected static final Logger LOGGER = LoggerFactory.getLogger(GcsDestinationAcceptanceTest.class); protected static final ObjectMapper MAPPER = MoreMappers.initMapper(); - protected final String secretFilePath = "secrets/config.json"; + protected static final String SECRET_FILE_PATH = "secrets/config.json"; + protected static final String SECRET_FILE_PATH_INSUFFICIENT_ROLES = "secrets/insufficient_roles_config.json"; protected final S3Format outputFormat; protected JsonNode configJson; protected GcsDestinationConfig config; @@ -55,7 +60,7 @@ protected GcsDestinationAcceptanceTest(final S3Format outputFormat) { } protected JsonNode getBaseConfigJson() { - return Jsons.deserialize(IOs.readFile(Path.of(secretFilePath))); + return Jsons.deserialize(IOs.readFile(Path.of(SECRET_FILE_PATH))); } @Override @@ -147,4 +152,27 @@ protected void tearDown(final TestDestinationEnv testEnv) { } } + /** + * Verify that when given user with no Multipart Upload Roles, that check connection returns a + * failed response. Assume that the #getInsufficientRolesFailCheckConfig() returns the service + * account has storage.objects.create permission but not storage.multipartUploads.create. + */ + @Test + public void testCheckConnectionInsufficientRoles() throws Exception { + final JsonNode baseConfigJson = Jsons.deserialize(IOs.readFile(Path.of( + SECRET_FILE_PATH_INSUFFICIENT_ROLES))); + + // Set a random GCS bucket path for each integration test + final JsonNode configJson = Jsons.clone(baseConfigJson); + final String testBucketPath = String.format( + "%s_test_%s", + outputFormat.name().toLowerCase(Locale.ROOT), + RandomStringUtils.randomAlphanumeric(5)); + ((ObjectNode) configJson) + .put("gcs_bucket_path", testBucketPath) + .set("format", getFormatConfig()); + + assertEquals(Status.FAILED, runCheck(configJson).getStatus()); + } + } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java index 6cf51a5102ffa..5aeac996f1431 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java @@ -127,7 +127,8 @@ private List retrieveRecordsFromTable(final String tableName, final St final ResultSet tableInfo = connection.createStatement() .executeQuery(String.format("SHOW TABLES LIKE '%s' IN SCHEMA %s;", tableName, schema)); assertTrue(tableInfo.next()); - // check that we're creating permanent tables. DBT defaults to transient tables, which have `TRANSIENT` as the value for the `kind` column. + // check that we're creating permanent tables. DBT defaults to transient tables, which have + // `TRANSIENT` as the value for the `kind` column. assertEquals("TABLE", tableInfo.getString("kind")); return connection.createStatement() .executeQuery(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schema, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)); diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index f1eadc5d968d2..67b13f5e79d57 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -153,6 +153,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | |:--------| :--- | :--- | :--- | +| 0.6.2 | 2022-01-10 | [\#9121](https://github.com/airbytehq/airbyte/pull/9121) | Fixed check method for GCS mode to verify if all roles assigned to user | | 0.6.1 | 2021-12-22 | [\#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration to UI for GCS staging | | 0.6.0 | 2021-12-17 | [\#8788](https://github.com/airbytehq/airbyte/issues/8788) | BigQuery/BiqQuery denorm Destinations : Add possibility to use different types of GCS files | | 0.5.1 | 2021-12-16 | [\#8816](https://github.com/airbytehq/airbyte/issues/8816) | Update dataset locations | @@ -170,8 +171,8 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | |:--------| :--- | :--- | :--- | -| 0.2.2 | 2021-12-22 | [\#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration to UI for GCS staging | -| 0.2.1 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | +| 0.2.2 | 2021-12-22 | [\#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration to UI for GCS staging | +| 0.2.1 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | | 0.2.0 | 2021-12-17 | [\#8788](https://github.com/airbytehq/airbyte/pull/8788) | BigQuery/BiqQuery denorm Destinations : Add possibility to use different types of GCS files | | 0.1.11 | 2021-12-16 | [\#8816](https://github.com/airbytehq/airbyte/issues/8816) | Update dataset locations | | 0.1.10 | 2021-11-09 | [\#7804](https://github.com/airbytehq/airbyte/pull/7804) | handle null values in fields described by a $ref definition | diff --git a/docs/integrations/destinations/gcs.md b/docs/integrations/destinations/gcs.md index b0141d0b91566..2bb45eee300a1 100644 --- a/docs/integrations/destinations/gcs.md +++ b/docs/integrations/destinations/gcs.md @@ -229,6 +229,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.19 | 2022-01-10 | [\#9121](https://github.com/airbytehq/airbyte/pull/9121) | Fixed check method for GCS mode to verify if all roles assigned to user | | 0.1.18 | 2021-12-30 | [\#8809](https://github.com/airbytehq/airbyte/pull/8809) | Update connector fields title/description | | 0.1.17 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | | 0.1.16 | 2021-12-20 | [\#8974](https://github.com/airbytehq/airbyte/pull/8974) | Release a new version to ensure there is no excessive logging. | From 7c4ae134734de1409e30f08efe52fee4c7c5b07e Mon Sep 17 00:00:00 2001 From: vitaliizazmic <75620293+vitaliizazmic@users.noreply.github.com> Date: Mon, 10 Jan 2022 22:26:25 +0200 Subject: [PATCH 086/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20HubSpot:=20supp?= =?UTF-8?q?ort=20list=20memberships=20in=20Contacts=20stream=20(#9129)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Source Hubspot #8477 - separate stream for contacts list memberships * Source Hubspot #8477 - reformat * Source Hubspot #8477 - fix basic read tests: add contacts_list_memberships to empty streams * Source Hubspot #8477 - bump version and update documentation --- .../36c891d9-4bd9-43ac-bad2-10e12756272c.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-hubspot/Dockerfile | 2 +- .../source-hubspot/acceptance-test-config.yml | 2 +- .../configured_catalog_for_oauth_config.json | 9 ++++ .../sample_files/full_refresh_catalog.json | 9 ++++ .../source-hubspot/source_hubspot/api.py | 34 ++++++++++++ .../source-hubspot/source_hubspot/client.py | 2 + .../schemas/contacts_list_memberships.json | 24 +++++++++ docs/integrations/sources/hubspot.md | 53 ++++++++++--------- 11 files changed, 111 insertions(+), 30 deletions(-) create mode 100644 airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/contacts_list_memberships.json diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json index 913968015e749..5e76551413b44 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "36c891d9-4bd9-43ac-bad2-10e12756272c", "name": "HubSpot", "dockerRepository": "airbyte/source-hubspot", - "dockerImageTag": "0.1.29", + "dockerImageTag": "0.1.30", "documentationUrl": "https://docs.airbyte.io/integrations/sources/hubspot", "icon": "hubspot.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 0723bbb8a0cb2..973c37515c38c 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -287,7 +287,7 @@ - name: HubSpot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot - dockerImageTag: 0.1.29 + dockerImageTag: 0.1.30 documentationUrl: https://docs.airbyte.io/integrations/sources/hubspot icon: hubspot.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index b660002ebac1f..f16799115d5d5 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2846,7 +2846,7 @@ path_in_connector_config: - "credentials" - "client_secret" -- dockerImage: "airbyte/source-hubspot:0.1.29" +- dockerImage: "airbyte/source-hubspot:0.1.30" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index 7c35101c19b4f..c8ddb3ac162e7 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -34,5 +34,5 @@ COPY source_hubspot ./source_hubspot ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.29 +LABEL io.airbyte.version=0.1.30 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml b/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml index 02faf9dc55f9d..d10181d818662 100644 --- a/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml @@ -25,7 +25,7 @@ tests: # incremental streams: subscription_changes and email_events (it takes a long time to read) # and therefore the start date is set at 2021-10-10 for `config_oauth.json`, # but the campaign was created on 2021-01-11 - empty_streams: ["campaigns", "workflows"] + empty_streams: ["campaigns", "workflows", "contacts_list_memberships"] incremental: - config_path: "secrets/config.json" configured_catalog_path: "sample_files/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog_for_oauth_config.json b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog_for_oauth_config.json index ee85be89faf26..3bc43efac7647 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog_for_oauth_config.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog_for_oauth_config.json @@ -36,6 +36,15 @@ "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, + { + "stream": { + "name": "contacts_list_memberships", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, { "stream": { "name": "deal_pipelines", diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json b/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json index d7dd7dbc7ca51..19f4049c4ac17 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json @@ -36,6 +36,15 @@ "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, + { + "stream": { + "name": "contacts_list_memberships", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, { "stream": { "name": "deal_pipelines", diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py index 01d353f4920bb..7d02a46eacb52 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py @@ -598,6 +598,40 @@ class ContactListStream(IncrementalStream): need_chunk = False +class ContactsListMembershipsStream(Stream): + """Contacts list Memberships, API v1 + The Stream was created due to issue #8477, where supporting List Memberships in Contacts stream was requested. + According to the issue this feature is supported in API v1 by setting parameter showListMemberships=true + in get all contacts endpoint. API will return list memberships for each contact record. + But for syncing Contacts API v3 is used, where list memberships for contacts isn't supported. + Therefore, new stream was created based on get all contacts endpoint of API V1. + Docs: https://legacydocs.hubspot.com/docs/methods/contacts/get_contacts + """ + + url = "/contacts/v1/lists/all/contacts/all" + updated_at_field = "timestamp" + more_key = "has-more" + data_field = "contacts" + page_filter = "vidOffset" + page_field = "vid-offset" + + def _transform(self, records: Iterable) -> Iterable: + """Extracting list membership records from contacts + According to documentation Contacts may have multiple vids, + but the canonical-vid will be the primary ID for a record. + Docs: https://legacydocs.hubspot.com/docs/methods/contacts/contacts-overview + """ + for record in super()._transform(records): + canonical_vid = record.get("canonical-vid") + for item in record.get("list-memberships", []): + yield {"canonical-vid": canonical_vid, **item} + + def list(self, fields) -> Iterable: + """Receiving all contacts with list memberships""" + params = {"showListMemberships": True} + yield from self.read(partial(self._api.get, url=self.url), params) + + class DealStageHistoryStream(Stream): """Deal stage history, API v1 Deal stage history is exposed by the v1 API, but not the v3 API. diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py index b47bdaeaa4d13..ca17ea698f025 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py @@ -12,6 +12,7 @@ API, CampaignStream, ContactListStream, + ContactsListMembershipsStream, CRMObjectIncrementalStream, DealPipelineStream, DealStream, @@ -38,6 +39,7 @@ def __init__(self, start_date, credentials, **kwargs): "companies": CRMObjectIncrementalStream(entity="company", associations=["contacts"], **common_params), "contact_lists": ContactListStream(**common_params), "contacts": CRMObjectIncrementalStream(entity="contact", **common_params), + "contacts_list_memberships": ContactsListMembershipsStream(**common_params), "deal_pipelines": DealPipelineStream(**common_params), "deals": DealStream(associations=["contacts"], **common_params), "email_events": EmailEventStream(**common_params), diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/contacts_list_memberships.json b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/contacts_list_memberships.json new file mode 100644 index 0000000000000..4252bc9efadd6 --- /dev/null +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/contacts_list_memberships.json @@ -0,0 +1,24 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "canonical-vid": { + "type": ["null", "integer"] + }, + "static-list-id": { + "type": ["null", "integer"] + }, + "internal-list-id": { + "type": ["null", "integer"] + }, + "timestamp": { + "type": ["null", "integer"] + }, + "vid": { + "type": ["null", "integer"] + }, + "is-member": { + "type": ["null", "boolean"] + } + } +} diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index c85e7fdafcc79..81c77c50f345f 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -21,6 +21,7 @@ This source is capable of syncing the following tables and their data: * [Companies](https://developers.hubspot.com/docs/api/crm/companies) \(Incremental\) * [Contact Lists](http://developers.hubspot.com/docs/methods/lists/get_lists) \(Incremental\) * [Contacts](https://developers.hubspot.com/docs/methods/contacts/get_contacts) \(Incremental\) +* [Contacts list memberships](https://legacydocs.hubspot.com/docs/methods/contacts/get_contacts) * [Deal Pipelines](https://developers.hubspot.com/docs/methods/pipelines/get_pipelines_for_object_type) * [Deals](https://developers.hubspot.com/docs/api/crm/deals) \(including Contact associations\) \(Incremental\) * [Email Events](https://developers.hubspot.com/docs/methods/email/get_events) \(Incremental\) @@ -91,6 +92,7 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt | `companies` | `contacts` | | `contact_lists` | `contacts` | | `contacts` | `contacts` | +| `contacts_list_memberships` | `contacts` | | `deal_pipelines` | either the `contacts` scope \(to fetch deals pipelines\) or the `tickets` scope. | | `deals` | `contacts` | | `email_events` | `content` | @@ -106,29 +108,30 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.29 | 2021-12-17 | [8699](https://github.com/airbytehq/airbyte/pull/8699) | Add incremental sync support for `companies`, `contact_lists`, `contacts`, `deals`, `line_items`, `products`, `quotes`, `tickets` streams | -| 0.1.28 | 2021-12-15 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update fields and descriptions | -| 0.1.27 | 2021-12-09 | [8658](https://github.com/airbytehq/airbyte/pull/8658) | Fixed config backward compatibility issue by allowing additional properties in the spec | -| 0.1.26 | 2021-11-30 | [8329](https://github.com/airbytehq/airbyte/pull/8329) | Removed 'skip_dynamic_fields' config param | -| 0.1.25 | 2021-11-23 | [8216](https://github.com/airbytehq/airbyte/pull/8216) | Add skip dynamic fields for testing only | -| 0.1.24 | 2021-11-09 | [7683](https://github.com/airbytehq/airbyte/pull/7683) | Fix name issue 'Hubspot' -> 'HubSpot' | -| 0.1.23 | 2021-11-08 | [7730](https://github.com/airbytehq/airbyte/pull/7730) | Fix oAuth flow schema| -| 0.1.22 | 2021-11-03 | [7562](https://github.com/airbytehq/airbyte/pull/7562) | Migrate Hubspot source to CDK structure | -| 0.1.21 | 2021-10-27 | [7405](https://github.com/airbytehq/airbyte/pull/7405) | Change of package `import` from `urllib` to `urllib.parse` | -| 0.1.20 | 2021-10-26 | [7393](https://github.com/airbytehq/airbyte/pull/7393) | Hotfix for `split_properties` function, add the length of separator symbol `,`(`%2C` in HTTP format) to the checking of the summary URL length | -| 0.1.19 | 2021-10-26 | [6954](https://github.com/airbytehq/airbyte/pull/6954) | Fix issue with getting `414` HTTP error for streams | -| 0.1.18 | 2021-10-18 | [5840](https://github.com/airbytehq/airbyte/pull/5840) | Add new marketing emails (with statistics) stream | -| 0.1.17 | 2021-10-14 | [6995](https://github.com/airbytehq/airbyte/pull/6995) | Update `discover` method: disable `quotes` stream when using OAuth config | -| 0.1.16 | 2021-09-27 | [6465](https://github.com/airbytehq/airbyte/pull/6465) | Implement OAuth support. Use CDK authenticator instead of connector specific authenticator | -| 0.1.15 | 2021-09-23 | [6374](https://github.com/airbytehq/airbyte/pull/6374) | Use correct schema for `owners` stream | -| 0.1.14 | 2021-09-08 | [5693](https://github.com/airbytehq/airbyte/pull/5693) | Include deal\_to\_contact association when pulling deal stream and include contact ID in contact stream | -| 0.1.13 | 2021-09-08 | [5834](https://github.com/airbytehq/airbyte/pull/5834) | Fixed array fields without items property in schema | -| 0.1.12 | 2021-09-02 | [5798](https://github.com/airbytehq/airbyte/pull/5798) | Treat empty string values as None for field with format to fix normalization errors | -| 0.1.11 | 2021-08-26 | [5685](https://github.com/airbytehq/airbyte/pull/5685) | Remove all date-time format from schemas | -| 0.1.10 | 2021-08-17 | [5463](https://github.com/airbytehq/airbyte/pull/5463) | Fix fail on reading stream using `API Key` without required permissions | -| 0.1.9 | 2021-08-11 | [5334](https://github.com/airbytehq/airbyte/pull/5334) | Fix empty strings inside float datatype | -| 0.1.8 | 2021-08-06 | [5250](https://github.com/airbytehq/airbyte/pull/5250) | Fix issue with printing exceptions | -| 0.1.7 | 2021-07-27 | [4913](https://github.com/airbytehq/airbyte/pull/4913) | Update fields schema | +| Version | Date | Pull Request | Subject | +|:--------|:-----------| :--- |:-----------------------------------------------------------------------------------------------------------------------------------------------| +| 0.1.30 | 2021-01-10 | [9129](https://github.com/airbytehq/airbyte/pull/9129) | Created Contacts list memberships streams | +| 0.1.29 | 2021-12-17 | [8699](https://github.com/airbytehq/airbyte/pull/8699) | Add incremental sync support for `companies`, `contact_lists`, `contacts`, `deals`, `line_items`, `products`, `quotes`, `tickets` streams | +| 0.1.28 | 2021-12-15 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update fields and descriptions | +| 0.1.27 | 2021-12-09 | [8658](https://github.com/airbytehq/airbyte/pull/8658) | Fixed config backward compatibility issue by allowing additional properties in the spec | +| 0.1.26 | 2021-11-30 | [8329](https://github.com/airbytehq/airbyte/pull/8329) | Removed 'skip_dynamic_fields' config param | +| 0.1.25 | 2021-11-23 | [8216](https://github.com/airbytehq/airbyte/pull/8216) | Add skip dynamic fields for testing only | +| 0.1.24 | 2021-11-09 | [7683](https://github.com/airbytehq/airbyte/pull/7683) | Fix name issue 'Hubspot' -> 'HubSpot' | +| 0.1.23 | 2021-11-08 | [7730](https://github.com/airbytehq/airbyte/pull/7730) | Fix oAuth flow schema | +| 0.1.22 | 2021-11-03 | [7562](https://github.com/airbytehq/airbyte/pull/7562) | Migrate Hubspot source to CDK structure | +| 0.1.21 | 2021-10-27 | [7405](https://github.com/airbytehq/airbyte/pull/7405) | Change of package `import` from `urllib` to `urllib.parse` | +| 0.1.20 | 2021-10-26 | [7393](https://github.com/airbytehq/airbyte/pull/7393) | Hotfix for `split_properties` function, add the length of separator symbol `,`(`%2C` in HTTP format) to the checking of the summary URL length | +| 0.1.19 | 2021-10-26 | [6954](https://github.com/airbytehq/airbyte/pull/6954) | Fix issue with getting `414` HTTP error for streams | +| 0.1.18 | 2021-10-18 | [5840](https://github.com/airbytehq/airbyte/pull/5840) | Add new marketing emails (with statistics) stream | +| 0.1.17 | 2021-10-14 | [6995](https://github.com/airbytehq/airbyte/pull/6995) | Update `discover` method: disable `quotes` stream when using OAuth config | +| 0.1.16 | 2021-09-27 | [6465](https://github.com/airbytehq/airbyte/pull/6465) | Implement OAuth support. Use CDK authenticator instead of connector specific authenticator | +| 0.1.15 | 2021-09-23 | [6374](https://github.com/airbytehq/airbyte/pull/6374) | Use correct schema for `owners` stream | +| 0.1.14 | 2021-09-08 | [5693](https://github.com/airbytehq/airbyte/pull/5693) | Include deal\_to\_contact association when pulling deal stream and include contact ID in contact stream | +| 0.1.13 | 2021-09-08 | [5834](https://github.com/airbytehq/airbyte/pull/5834) | Fixed array fields without items property in schema | +| 0.1.12 | 2021-09-02 | [5798](https://github.com/airbytehq/airbyte/pull/5798) | Treat empty string values as None for field with format to fix normalization errors | +| 0.1.11 | 2021-08-26 | [5685](https://github.com/airbytehq/airbyte/pull/5685) | Remove all date-time format from schemas | +| 0.1.10 | 2021-08-17 | [5463](https://github.com/airbytehq/airbyte/pull/5463) | Fix fail on reading stream using `API Key` without required permissions | +| 0.1.9 | 2021-08-11 | [5334](https://github.com/airbytehq/airbyte/pull/5334) | Fix empty strings inside float datatype | +| 0.1.8 | 2021-08-06 | [5250](https://github.com/airbytehq/airbyte/pull/5250) | Fix issue with printing exceptions | +| 0.1.7 | 2021-07-27 | [4913](https://github.com/airbytehq/airbyte/pull/4913) | Update fields schema | From 484169eed149c95f6d595b30c3efb1d1a326bd1a Mon Sep 17 00:00:00 2001 From: Mohamed Magdy Date: Mon, 10 Jan 2022 23:55:02 +0100 Subject: [PATCH 087/215] =?UTF-8?q?=F0=9F=90=9B=20=20Source=20Recurly:=20a?= =?UTF-8?q?void=20loading=20all=20accounts=20when=20importing=20account=20?= =?UTF-8?q?coupon=20redemptions=20(#9382)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Avoid loading all accounts when importing account coupon redemptions * Bump Recurly connector version to `0.3.1` * merge contributor code * update seed file * update docs Co-authored-by: Marcos Marx --- .../cd42861b-01fc-4658-a8ab-5d11d0510f01.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- airbyte-integrations/connectors/source-recurly/.python-version | 1 + airbyte-integrations/connectors/source-recurly/Dockerfile | 2 +- .../connectors/source-recurly/source_recurly/streams.py | 2 +- .../cd42861b-01fc-4658-a8ab-5d11d0510f01.json | 2 +- docs/integrations/sources/recurly.md | 2 ++ 8 files changed, 9 insertions(+), 6 deletions(-) create mode 100644 airbyte-integrations/connectors/source-recurly/.python-version diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/cd42861b-01fc-4658-a8ab-5d11d0510f01.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/cd42861b-01fc-4658-a8ab-5d11d0510f01.json index 0fe2a8a80ffaa..40f1ecd0fd373 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/cd42861b-01fc-4658-a8ab-5d11d0510f01.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/cd42861b-01fc-4658-a8ab-5d11d0510f01.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "cd42861b-01fc-4658-a8ab-5d11d0510f01", "name": "Recurly", "dockerRepository": "airbyte/source-recurly", - "dockerImageTag": "0.3.0", + "dockerImageTag": "0.3.1", "documentationUrl": "https://docs.airbyte.io/integrations/sources/recurly", "icon": "recurly.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 973c37515c38c..dd33d87d8eb13 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -572,7 +572,7 @@ - name: Recurly sourceDefinitionId: cd42861b-01fc-4658-a8ab-5d11d0510f01 dockerRepository: airbyte/source-recurly - dockerImageTag: 0.3.0 + dockerImageTag: 0.3.1 documentationUrl: https://docs.airbyte.io/integrations/sources/recurly icon: recurly.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index f16799115d5d5..f6b00243de253 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -5885,7 +5885,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-recurly:0.3.0" +- dockerImage: "airbyte/source-recurly:0.3.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/recurly" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-recurly/.python-version b/airbyte-integrations/connectors/source-recurly/.python-version new file mode 100644 index 0000000000000..b13d4f55c6812 --- /dev/null +++ b/airbyte-integrations/connectors/source-recurly/.python-version @@ -0,0 +1 @@ +3.9.7/envs/airbyte-recurly diff --git a/airbyte-integrations/connectors/source-recurly/Dockerfile b/airbyte-integrations/connectors/source-recurly/Dockerfile index da5c8fa04476b..9c79c6d84b4a7 100644 --- a/airbyte-integrations/connectors/source-recurly/Dockerfile +++ b/airbyte-integrations/connectors/source-recurly/Dockerfile @@ -34,5 +34,5 @@ COPY source_recurly ./source_recurly ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.3.0 +LABEL io.airbyte.version=0.3.1 LABEL io.airbyte.name=airbyte/source-recurly diff --git a/airbyte-integrations/connectors/source-recurly/source_recurly/streams.py b/airbyte-integrations/connectors/source-recurly/source_recurly/streams.py index e10b00e9e2fd8..e2f489282e2e3 100644 --- a/airbyte-integrations/connectors/source-recurly/source_recurly/streams.py +++ b/airbyte-integrations/connectors/source-recurly/source_recurly/streams.py @@ -169,7 +169,7 @@ def read_records( params.update({BEGIN_TIME_PARAM: self.begin_time}) # Call the Recurly client methods - accounts = self._client.list_accounts().items() + accounts = self._client.list_accounts(params=params).items() for account in accounts: coupons = self._client.list_account_coupon_redemptions(account_id=account.id, params=params).items() for coupon in coupons: diff --git a/airbyte-server/src/test/resources/migration/dummy_data/config/STANDARD_SOURCE_DEFINITION/cd42861b-01fc-4658-a8ab-5d11d0510f01.json b/airbyte-server/src/test/resources/migration/dummy_data/config/STANDARD_SOURCE_DEFINITION/cd42861b-01fc-4658-a8ab-5d11d0510f01.json index 9c96bcbb52944..4b6ac4817bf1d 100644 --- a/airbyte-server/src/test/resources/migration/dummy_data/config/STANDARD_SOURCE_DEFINITION/cd42861b-01fc-4658-a8ab-5d11d0510f01.json +++ b/airbyte-server/src/test/resources/migration/dummy_data/config/STANDARD_SOURCE_DEFINITION/cd42861b-01fc-4658-a8ab-5d11d0510f01.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "cd42861b-01fc-4658-a8ab-5d11d0510f01", "name": "Recurly", "dockerRepository": "airbyte/source-recurly", - "dockerImageTag": "0.2.0", + "dockerImageTag": "0.3.1", "documentationUrl": "https://hub.docker.com/r/airbyte/source-recurly", "spec": { "documentationUrl": "https://docs.airbyte.io/integrations/sources/recurly", diff --git a/docs/integrations/sources/recurly.md b/docs/integrations/sources/recurly.md index b64901a318041..d1a49a4a65b7b 100644 --- a/docs/integrations/sources/recurly.md +++ b/docs/integrations/sources/recurly.md @@ -54,5 +54,7 @@ We recommend creating a restricted, read-only key specifically for Airbyte acces | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- | :--- | +| 0.3.1 | 2022-01-10 | [9382](https://github.com/airbytehq/airbyte/pull/9382) | Source Recurly: avoid loading all accounts when importing account coupon redemptions | | 0.3.0 | 2021-12-08 | [8468](https://github.com/airbytehq/airbyte/pull/8468) | Support Incremental Sync Mode | + From 746102ac20742442e52342b43b82257cfb123242 Mon Sep 17 00:00:00 2001 From: Charles Date: Mon, 10 Jan 2022 17:15:08 -0800 Subject: [PATCH 088/215] update spec (#9401) --- .../init/src/main/resources/seed/destination_specs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index cf247c52ed702..535ea3c6abbfd 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -3769,7 +3769,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-snowflake:0.4.1" +- dockerImage: "airbyte/destination-snowflake:0.4.2" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" connectionSpecification: From a5d5eb7d597926b71097967e46a403aa7ffe98e9 Mon Sep 17 00:00:00 2001 From: Augustin Date: Tue, 11 Jan 2022 10:26:20 +0100 Subject: [PATCH 089/215] Source notion: fix documentation url (#9084) --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 4 ++-- airbyte-integrations/connectors/source-notion/Dockerfile | 2 +- .../connectors/source-notion/source_notion/spec.json | 2 +- docs/integrations/sources/notion.md | 1 + 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index dd33d87d8eb13..4a0a889fe2b88 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -447,7 +447,7 @@ - name: Notion sourceDefinitionId: 6e00b415-b02e-4160-bf02-58176a0ae687 dockerRepository: airbyte/source-notion - dockerImageTag: 0.1.1 + dockerImageTag: 0.1.2 documentationUrl: https://docs.airbyte.io/integrations/sources/notion icon: notion.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index f6b00243de253..6a9d6e0178a0b 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -4722,9 +4722,9 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-notion:0.1.1" +- dockerImage: "airbyte/source-notion:0.1.2" spec: - documentationUrl: "https://docsurl.com" + documentationUrl: "https://docs.airbyte.io/integrations/sources/notion" connectionSpecification: $schema: "http://json-schema.org/draft-07/schema#" title: "Notion Source Spec" diff --git a/airbyte-integrations/connectors/source-notion/Dockerfile b/airbyte-integrations/connectors/source-notion/Dockerfile index 9e121a38fd34d..c43238645f350 100644 --- a/airbyte-integrations/connectors/source-notion/Dockerfile +++ b/airbyte-integrations/connectors/source-notion/Dockerfile @@ -34,5 +34,5 @@ COPY source_notion ./source_notion ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/source-notion diff --git a/airbyte-integrations/connectors/source-notion/source_notion/spec.json b/airbyte-integrations/connectors/source-notion/source_notion/spec.json index b3029b1d5fbfd..2552368c0ebcd 100644 --- a/airbyte-integrations/connectors/source-notion/source_notion/spec.json +++ b/airbyte-integrations/connectors/source-notion/source_notion/spec.json @@ -1,5 +1,5 @@ { - "documentationUrl": "https://docsurl.com", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/notion", "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Notion Source Spec", diff --git a/docs/integrations/sources/notion.md b/docs/integrations/sources/notion.md index 474396e1a6211..98c9de9bd4457 100644 --- a/docs/integrations/sources/notion.md +++ b/docs/integrations/sources/notion.md @@ -57,6 +57,7 @@ Please register on Notion and follow this [docs](https://developers.notion.com/d | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.2 | 2022-01-11 | [9084](https://github.com/airbytehq/airbyte/pull/9084) | Fix documentation URL | | 0.1.1 | 2021-12-30 | [9207](https://github.com/airbytehq/airbyte/pull/9207) | Update connector fields title/description | | 0.1.0 | 2021-10-17 | [7092](https://github.com/airbytehq/airbyte/pull/7092) | Initial Release | From f466986f34ce0792280452261c8b20f00675a631 Mon Sep 17 00:00:00 2001 From: vitaliizazmic <75620293+vitaliizazmic@users.noreply.github.com> Date: Tue, 11 Jan 2022 11:36:19 +0200 Subject: [PATCH 090/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20Salesforce:=20S?= =?UTF-8?q?upport=20oauth'ing=20into=20sandbox=20environment,=20switching?= =?UTF-8?q?=20to=20advanced=5Fauth=20(#8797)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Source Salesforce #7722 - change spec to use advanced_auth * Source Salesforce #7722 - update oauth flow * Source Salesforce #7722 - reformat * Source Salesforce #7722 - fixing get environment in oauth flow * Source Salesforce #7722 - improve spec * Source Salesforce #7722 - reformat * Source Salesforce #7722 - bump version and update docs --- .../b117307c-14b6-41aa-9422-947e34922962.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-salesforce/Dockerfile | 2 +- .../source_salesforce/spec.json | 61 +++++++++++++++++-- .../oauth/flows/SalesforceOAuthFlow.java | 16 +++-- docs/integrations/sources/salesforce.md | 31 +++++----- 7 files changed, 87 insertions(+), 29 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json index 3373ef919e663..e0eb9d47f2ea3 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "b117307c-14b6-41aa-9422-947e34922962", "name": "Salesforce", "dockerRepository": "airbyte/source-salesforce", - "dockerImageTag": "0.1.10", + "dockerImageTag": "0.1.13", "documentationUrl": "https://docs.airbyte.io/integrations/sources/salesforce", "icon": "salesforce.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 4a0a889fe2b88..4a411eef5008f 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -607,7 +607,7 @@ - name: Salesforce sourceDefinitionId: b117307c-14b6-41aa-9422-947e34922962 dockerRepository: airbyte/source-salesforce - dockerImageTag: 0.1.12 + dockerImageTag: 0.1.13 documentationUrl: https://docs.airbyte.io/integrations/sources/salesforce icon: salesforce.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 6a9d6e0178a0b..709cf13606595 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6310,7 +6310,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-salesforce:0.1.12" +- dockerImage: "airbyte/source-salesforce:0.1.13" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-salesforce/Dockerfile b/airbyte-integrations/connectors/source-salesforce/Dockerfile index 400be3e7289e3..5f654c8695e20 100644 --- a/airbyte-integrations/connectors/source-salesforce/Dockerfile +++ b/airbyte-integrations/connectors/source-salesforce/Dockerfile @@ -25,5 +25,5 @@ COPY source_salesforce ./source_salesforce ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.12 +LABEL io.airbyte.version=0.1.13 LABEL io.airbyte.name=airbyte/source-salesforce diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json b/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json index e10ef126745e1..1435f17cf04e9 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json @@ -7,6 +7,10 @@ "required": ["client_id", "client_secret", "refresh_token", "api_type"], "additionalProperties": false, "properties": { + "auth_type": { + "type": "string", + "const": "Client" + }, "client_id": { "title": "Client ID", "description": "The Consumer Key that can be found when viewing your app in Salesforce", @@ -84,12 +88,57 @@ } } }, - "authSpecification": { - "auth_type": "oauth2.0", - "oauth2Specification": { - "rootObject": [], - "oauthFlowInitParameters": [["client_id"], ["client_secret"]], - "oauthFlowOutputParameters": [["refresh_token"]] + "advanced_auth": { + "auth_flow_type": "oauth2.0", + "predicate_key": ["auth_type"], + "predicate_value": "Client", + "oauth_config_specification": { + "oauth_user_input_from_connector_config_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "is_sandbox": { + "type": "boolean", + "path_in_connector_config": ["is_sandbox"] + } + } + }, + "complete_oauth_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "refresh_token": { + "type": "string", + "path_in_connector_config": ["refresh_token"] + } + } + }, + "complete_oauth_server_input_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string" + }, + "client_secret": { + "type": "string" + } + } + }, + "complete_oauth_server_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string", + "path_in_connector_config": ["client_id"] + }, + "client_secret": { + "type": "string", + "path_in_connector_config": ["client_secret"] + } + } + } } } } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java index 1d4d91cc19e20..c44413914122c 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java @@ -26,8 +26,8 @@ public class SalesforceOAuthFlow extends BaseOAuth2Flow { // Clickable link for IDE // https://help.salesforce.com/s/articleView?language=en_US&id=sf.remoteaccess_oauth_web_server_flow.htm - private static final String AUTHORIZE_URL = "https://login.salesforce.com/services/oauth2/authorize"; - private static final String ACCESS_TOKEN_URL = "https://login.salesforce.com/services/oauth2/token"; + private static final String AUTHORIZE_URL = "https://%s.salesforce.com/services/oauth2/authorize"; + private static final String ACCESS_TOKEN_URL = "https://%s.salesforce.com/services/oauth2/token"; public SalesforceOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient) { super(configRepository, httpClient); @@ -45,7 +45,7 @@ protected String formatConsentUrl(final UUID definitionId, final JsonNode inputOAuthConfiguration) throws IOException { try { - return new URIBuilder(AUTHORIZE_URL) + return new URIBuilder(String.format(AUTHORIZE_URL, getEnvironment(inputOAuthConfiguration))) .addParameter("client_id", clientId) .addParameter("redirect_uri", redirectUrl) .addParameter("response_type", "code") @@ -58,7 +58,7 @@ protected String formatConsentUrl(final UUID definitionId, @Override protected String getAccessTokenUrl(final JsonNode inputOAuthConfiguration) { - return ACCESS_TOKEN_URL; + return String.format(ACCESS_TOKEN_URL, getEnvironment(inputOAuthConfiguration)); } @Override @@ -77,4 +77,12 @@ public List getDefaultOAuthOutputPath() { return List.of(); } + private String getEnvironment(JsonNode inputOAuthConfiguration) { + var isSandbox = inputOAuthConfiguration.get("is_sandbox"); + if (isSandbox == null) { + return "login"; + } + return (isSandbox.asBoolean() == true) ? "test" : "login"; + } + } diff --git a/docs/integrations/sources/salesforce.md b/docs/integrations/sources/salesforce.md index 05851bd7f9642..7a104b382b7ff 100644 --- a/docs/integrations/sources/salesforce.md +++ b/docs/integrations/sources/salesforce.md @@ -735,19 +735,20 @@ List of available streams: ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.12 | 2021-12-23 | [8871](https://github.com/airbytehq/airbyte/pull/8871) | Fix `examples` for new field in specification | -| 0.1.11 | 2021-12-23 | [8871](https://github.com/airbytehq/airbyte/pull/8871) | Add the ability to filter streams by user | -| 0.1.10 | 2021-12-23 | [9005](https://github.com/airbytehq/airbyte/pull/9005) | Handling 400 error when a stream is not queryable | -| 0.1.9 | 2021-12-07 | [8405](https://github.com/airbytehq/airbyte/pull/8405) | Filter 'null' byte(s) in HTTP responses | -| 0.1.8 | 2021-11-30 | [8191](https://github.com/airbytehq/airbyte/pull/8191) | Make `start_date` optional and change its format to `YYYY-MM-DD` | -| 0.1.7 | 2021-11-24 | [8206](https://github.com/airbytehq/airbyte/pull/8206) | Handling 400 error when trying to create a job for sync using Bulk API. | -| 0.1.6 | 2021-11-16 | [8009](https://github.com/airbytehq/airbyte/pull/8009) | Fix retring of BULK jobs | -| 0.1.5 | 2021-11-15 | [7885](https://github.com/airbytehq/airbyte/pull/7885) | Add `Transform` for output records | -| 0.1.4 | 2021-11-09 | [7778](https://github.com/airbytehq/airbyte/pull/7778) | Fix types for `anyType` fields | -| 0.1.3 | 2021-11-06 | [7592](https://github.com/airbytehq/airbyte/pull/7592) | Fix getting `anyType` fields using BULK API | -| 0.1.2 | 2021-09-30 | [6438](https://github.com/airbytehq/airbyte/pull/6438) | Annotate Oauth2 flow initialization parameters in connector specification | -| 0.1.1 | 2021-09-21 | [6209](https://github.com/airbytehq/airbyte/pull/6209) | Fix bug with pagination for BULK API | -| 0.1.0 | 2021-09-08 | [5619](https://github.com/airbytehq/airbyte/pull/5619) | Salesforce Aitbyte-Native Connector | +| Version | Date | Pull Request | Subject | +|:--------|:-----------| :--- |:--------------------------------------------------------------------------| +| 0.1.13 | 2022-01-11 | [8797](https://github.com/airbytehq/airbyte/pull/8797) | Switched from authSpecification to advanced_auth in specefication | +| 0.1.12 | 2021-12-23 | [8871](https://github.com/airbytehq/airbyte/pull/8871) | Fix `examples` for new field in specification | +| 0.1.11 | 2021-12-23 | [8871](https://github.com/airbytehq/airbyte/pull/8871) | Add the ability to filter streams by user | +| 0.1.10 | 2021-12-23 | [9005](https://github.com/airbytehq/airbyte/pull/9005) | Handling 400 error when a stream is not queryable | +| 0.1.9 | 2021-12-07 | [8405](https://github.com/airbytehq/airbyte/pull/8405) | Filter 'null' byte(s) in HTTP responses | +| 0.1.8 | 2021-11-30 | [8191](https://github.com/airbytehq/airbyte/pull/8191) | Make `start_date` optional and change its format to `YYYY-MM-DD` | +| 0.1.7 | 2021-11-24 | [8206](https://github.com/airbytehq/airbyte/pull/8206) | Handling 400 error when trying to create a job for sync using Bulk API. | +| 0.1.6 | 2021-11-16 | [8009](https://github.com/airbytehq/airbyte/pull/8009) | Fix retring of BULK jobs | +| 0.1.5 | 2021-11-15 | [7885](https://github.com/airbytehq/airbyte/pull/7885) | Add `Transform` for output records | +| 0.1.4 | 2021-11-09 | [7778](https://github.com/airbytehq/airbyte/pull/7778) | Fix types for `anyType` fields | +| 0.1.3 | 2021-11-06 | [7592](https://github.com/airbytehq/airbyte/pull/7592) | Fix getting `anyType` fields using BULK API | +| 0.1.2 | 2021-09-30 | [6438](https://github.com/airbytehq/airbyte/pull/6438) | Annotate Oauth2 flow initialization parameters in connector specification | +| 0.1.1 | 2021-09-21 | [6209](https://github.com/airbytehq/airbyte/pull/6209) | Fix bug with pagination for BULK API | +| 0.1.0 | 2021-09-08 | [5619](https://github.com/airbytehq/airbyte/pull/5619) | Salesforce Aitbyte-Native Connector | From b5d24106a3232eeedc626cb467313d54b85dc510 Mon Sep 17 00:00:00 2001 From: Yevhenii <34103125+yevhenii-ldv@users.noreply.github.com> Date: Tue, 11 Jan 2022 12:08:54 +0200 Subject: [PATCH 091/215] =?UTF-8?q?=F0=9F=90=9B=20Salesforce=20Connector:?= =?UTF-8?q?=20handling=20400=20error,=20while=20sobject=20doesn't=20suppor?= =?UTF-8?q?t=20query=20or=20queryAll=20requests=20(#9386)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Salesforce Connector: handling 400 error, while sobject doesn't support query or queryAll requests --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 56 +++++++++++++++---- .../connectors/source-salesforce/Dockerfile | 2 +- .../source_salesforce/streams.py | 15 ++++- docs/integrations/sources/salesforce.md | 1 + 5 files changed, 62 insertions(+), 14 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 4a411eef5008f..5341dbb20d046 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -607,7 +607,7 @@ - name: Salesforce sourceDefinitionId: b117307c-14b6-41aa-9422-947e34922962 dockerRepository: airbyte/source-salesforce - dockerImageTag: 0.1.13 + dockerImageTag: 0.1.14 documentationUrl: https://docs.airbyte.io/integrations/sources/salesforce icon: salesforce.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 709cf13606595..000e4cd03cdbc 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6310,7 +6310,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-salesforce:0.1.13" +- dockerImage: "airbyte/source-salesforce:0.1.14" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce" connectionSpecification: @@ -6324,6 +6324,9 @@ - "api_type" additionalProperties: false properties: + auth_type: + type: "string" + const: "Client" client_id: title: "Client ID" description: "The Consumer Key that can be found when viewing your app in\ @@ -6409,15 +6412,48 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: [] - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - oauthFlowOutputParameters: - - - "refresh_token" + advanced_auth: + auth_flow_type: "oauth2.0" + predicate_key: + - "auth_type" + predicate_value: "Client" + oauth_config_specification: + oauth_user_input_from_connector_config_specification: + type: "object" + additionalProperties: false + properties: + is_sandbox: + type: "boolean" + path_in_connector_config: + - "is_sandbox" + complete_oauth_output_specification: + type: "object" + additionalProperties: false + properties: + refresh_token: + type: "string" + path_in_connector_config: + - "refresh_token" + complete_oauth_server_input_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + client_secret: + type: "string" + complete_oauth_server_output_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + path_in_connector_config: + - "client_id" + client_secret: + type: "string" + path_in_connector_config: + - "client_secret" - dockerImage: "airbyte/source-search-metrics:0.1.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/seacrh-metrics" diff --git a/airbyte-integrations/connectors/source-salesforce/Dockerfile b/airbyte-integrations/connectors/source-salesforce/Dockerfile index 5f654c8695e20..2186b8306e577 100644 --- a/airbyte-integrations/connectors/source-salesforce/Dockerfile +++ b/airbyte-integrations/connectors/source-salesforce/Dockerfile @@ -25,5 +25,5 @@ COPY source_salesforce ./source_salesforce ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.13 +LABEL io.airbyte.version=0.1.14 LABEL io.airbyte.name=airbyte/source-salesforce diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py index 4e4a028481a31..43e55a573fb1e 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py @@ -90,9 +90,20 @@ def read_records(self, **kwargs) -> Iterable[Mapping[str, Any]]: try: yield from super().read_records(**kwargs) except exceptions.HTTPError as error: + """ + There are several types of Salesforce sobjects that require additional processing: + 1. Sobjects for which the user, after setting up the data using Airbyte, restricted access, + and we will receive 403 HTTP errors. + 2. There are streams that do not allow you to make a sample using Salesforce `query` or `queryAll`. + And since we use a dynamic method of generating streams for Salesforce connector - at the stage of discover, + we cannot filter out these streams, so we catch them at the stage of reading data. + """ error_data = error.response.json()[0] - if error.response.status_code == codes.FORBIDDEN and not error_data.get("errorCode", "") == "REQUEST_LIMIT_EXCEEDED": - self.logger.error(f"Cannot receive data for stream '{self.name}', error message: '{error_data.get('message')}'") + if error.response.status_code in [codes.FORBIDDEN, codes.BAD_REQUEST]: + error_code = error_data.get("errorCode", "") + if error_code != "REQUEST_LIMIT_EXCEEDED" or error_code == "INVALID_TYPE_FOR_OPERATION": + self.logger.error(f"Cannot receive data for stream '{self.name}', error message: '{error_data.get('message')}'") + return else: raise error diff --git a/docs/integrations/sources/salesforce.md b/docs/integrations/sources/salesforce.md index 7a104b382b7ff..e11cc79c4efc5 100644 --- a/docs/integrations/sources/salesforce.md +++ b/docs/integrations/sources/salesforce.md @@ -737,6 +737,7 @@ List of available streams: | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:--------------------------------------------------------------------------| +| 0.1.14 | 2022-01-11 | [9386](https://github.com/airbytehq/airbyte/pull/9386) | Handling 400 error, while `sobject` doesn't support `query` or `queryAll` requests | | 0.1.13 | 2022-01-11 | [8797](https://github.com/airbytehq/airbyte/pull/8797) | Switched from authSpecification to advanced_auth in specefication | | 0.1.12 | 2021-12-23 | [8871](https://github.com/airbytehq/airbyte/pull/8871) | Fix `examples` for new field in specification | | 0.1.11 | 2021-12-23 | [8871](https://github.com/airbytehq/airbyte/pull/8871) | Add the ability to filter streams by user | From 791f39aee9cea66ebd93646c7fe815a8703e6f46 Mon Sep 17 00:00:00 2001 From: Yevhenii <34103125+yevhenii-ldv@users.noreply.github.com> Date: Tue, 11 Jan 2022 12:45:22 +0200 Subject: [PATCH 092/215] =?UTF-8?q?=F0=9F=90=9B=20Salesforce=20Connector:?= =?UTF-8?q?=20Correcting=20the=20presence=20of=20an=20extra=20'else'=20han?= =?UTF-8?q?dler=20in=20the=20error=20handling=20(#9409)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Correcting the presence of an extra 'else' handler in the error handling --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- airbyte-integrations/connectors/source-salesforce/Dockerfile | 2 +- .../connectors/source-salesforce/source_salesforce/streams.py | 3 +-- docs/integrations/sources/salesforce.md | 1 + 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 5341dbb20d046..374869a1e9196 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -607,7 +607,7 @@ - name: Salesforce sourceDefinitionId: b117307c-14b6-41aa-9422-947e34922962 dockerRepository: airbyte/source-salesforce - dockerImageTag: 0.1.14 + dockerImageTag: 0.1.15 documentationUrl: https://docs.airbyte.io/integrations/sources/salesforce icon: salesforce.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 000e4cd03cdbc..fba0efb4d2360 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6310,7 +6310,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-salesforce:0.1.14" +- dockerImage: "airbyte/source-salesforce:0.1.15" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-salesforce/Dockerfile b/airbyte-integrations/connectors/source-salesforce/Dockerfile index 2186b8306e577..6c59b3d8b1d38 100644 --- a/airbyte-integrations/connectors/source-salesforce/Dockerfile +++ b/airbyte-integrations/connectors/source-salesforce/Dockerfile @@ -25,5 +25,5 @@ COPY source_salesforce ./source_salesforce ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.14 +LABEL io.airbyte.version=0.1.15 LABEL io.airbyte.name=airbyte/source-salesforce diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py index 43e55a573fb1e..4ea39cde10149 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py @@ -104,8 +104,7 @@ def read_records(self, **kwargs) -> Iterable[Mapping[str, Any]]: if error_code != "REQUEST_LIMIT_EXCEEDED" or error_code == "INVALID_TYPE_FOR_OPERATION": self.logger.error(f"Cannot receive data for stream '{self.name}', error message: '{error_data.get('message')}'") return - else: - raise error + raise error class BulkSalesforceStream(SalesforceStream): diff --git a/docs/integrations/sources/salesforce.md b/docs/integrations/sources/salesforce.md index e11cc79c4efc5..26c44a8ea0f6d 100644 --- a/docs/integrations/sources/salesforce.md +++ b/docs/integrations/sources/salesforce.md @@ -737,6 +737,7 @@ List of available streams: | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:--------------------------------------------------------------------------| +| 0.1.15 | 2022-01-11 | [9409](https://github.com/airbytehq/airbyte/pull/9409) | Correcting the presence of an extra `else` handler in the error handling | | 0.1.14 | 2022-01-11 | [9386](https://github.com/airbytehq/airbyte/pull/9386) | Handling 400 error, while `sobject` doesn't support `query` or `queryAll` requests | | 0.1.13 | 2022-01-11 | [8797](https://github.com/airbytehq/airbyte/pull/8797) | Switched from authSpecification to advanced_auth in specefication | | 0.1.12 | 2021-12-23 | [8871](https://github.com/airbytehq/airbyte/pull/8871) | Fix `examples` for new field in specification | From 986eb588b43d08a04b3386fce48959b296c1aece Mon Sep 17 00:00:00 2001 From: Yurii Bidiuk <35812734+yurii-bidiuk@users.noreply.github.com> Date: Tue, 11 Jan 2022 15:58:26 +0200 Subject: [PATCH 093/215] =?UTF-8?q?=F0=9F=90=9B=20Source=20Snowflake:=20fi?= =?UTF-8?q?x=20tests=20(#9304)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fixed tests, upgrade jdbc driver version * bump vesion --- .../e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- .../init/src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-snowflake/Dockerfile | 2 +- .../connectors/source-snowflake/build.gradle | 2 +- .../SnowflakeSource.java | 7 +++++-- .../sources/SnowflakeSourceDatatypeTest.java | 2 +- docs/integrations/sources/snowflake.md | 1 + 8 files changed, 12 insertions(+), 8 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json index 17fe87203f363..9f7883eea777c 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2", "name": "Snowflake", "dockerRepository": "airbyte/source-snowflake", - "dockerImageTag": "0.1.2", + "dockerImageTag": "0.1.3", "documentationUrl": "https://docs.airbyte.io/integrations/sources/snowflake", "icon": "snowflake.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 374869a1e9196..02823954fd323 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -663,7 +663,7 @@ - name: Snowflake sourceDefinitionId: e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2 dockerRepository: airbyte/source-snowflake - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://docs.airbyte.io/integrations/sources/snowflake icon: snowflake.svg sourceType: database diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index fba0efb4d2360..98672ad81f648 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6865,7 +6865,7 @@ - - "client_secret" oauthFlowOutputParameters: - - "refresh_token" -- dockerImage: "airbyte/source-snowflake:0.1.2" +- dockerImage: "airbyte/source-snowflake:0.1.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/snowflake" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-snowflake/Dockerfile b/airbyte-integrations/connectors/source-snowflake/Dockerfile index 9277b0c3cb8a2..80f8a2f88c631 100644 --- a/airbyte-integrations/connectors/source-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/source-snowflake/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-snowflake COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/source-snowflake diff --git a/airbyte-integrations/connectors/source-snowflake/build.gradle b/airbyte-integrations/connectors/source-snowflake/build.gradle index 7648c5319a73d..846cb671ac12c 100644 --- a/airbyte-integrations/connectors/source-snowflake/build.gradle +++ b/airbyte-integrations/connectors/source-snowflake/build.gradle @@ -16,7 +16,7 @@ dependencies { implementation project(':airbyte-integrations:connectors:source-relational-db') implementation project(':airbyte-protocol:models') implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) - implementation group: 'net.snowflake', name: 'snowflake-jdbc', version: '3.12.14' + implementation group: 'net.snowflake', name: 'snowflake-jdbc', version: '3.13.9' testImplementation testFixtures(project(':airbyte-integrations:connectors:source-jdbc')) testImplementation project(':airbyte-test-utils') diff --git a/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java b/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java index 5056381b7161d..9d6d55db7efe2 100644 --- a/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java +++ b/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java @@ -39,11 +39,14 @@ public JsonNode toDatabaseConfig(final JsonNode config) { .put("host", config.get("host").asText()) .put("username", config.get("username").asText()) .put("password", config.get("password").asText()) - .put("connection_properties", String.format("role=%s;warehouse=%s;database=%s;schema=%s", + .put("connection_properties", String.format("role=%s;warehouse=%s;database=%s;schema=%s;JDBC_QUERY_RESULT_FORMAT=%s;", config.get("role").asText(), config.get("warehouse").asText(), config.get("database").asText(), - config.get("schema").asText())) + config.get("schema").asText(), + // Needed for JDK17 - see + // https://stackoverflow.com/questions/67409650/snowflake-jdbc-driver-internal-error-fail-to-retrieve-row-count-for-first-arrow + "JSON")) .build()); } diff --git a/airbyte-integrations/connectors/source-snowflake/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SnowflakeSourceDatatypeTest.java b/airbyte-integrations/connectors/source-snowflake/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SnowflakeSourceDatatypeTest.java index 1f732de4acd8a..c06c2afe13823 100644 --- a/airbyte-integrations/connectors/source-snowflake/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SnowflakeSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-snowflake/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SnowflakeSourceDatatypeTest.java @@ -166,7 +166,7 @@ protected void initTests() { .sourceType("DOUBLE") .airbyteType(JsonSchemaPrimitive.NUMBER) .addInsertValues("null", "-9007199254740991", "9007199254740991") - .addExpectedValues(null, "-9.007199254740991E15", "9.007199254740991E15") + .addExpectedValues(null, "-9.00719925474099E15", "9.00719925474099E15") .build()); addDataTypeTestData( TestDataHolder.builder() diff --git a/docs/integrations/sources/snowflake.md b/docs/integrations/sources/snowflake.md index 79097f57beb3b..a6dc33eefde3e 100644 --- a/docs/integrations/sources/snowflake.md +++ b/docs/integrations/sources/snowflake.md @@ -75,6 +75,7 @@ Your database user should now be ready for use with Airbyte. | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.3 | 2021-01-11 | [9304](https://github.com/airbytehq/airbyte/pull/9304) | Upgrade version of JDBC driver | | 0.1.2 | 2021-10-21 | [7257](https://github.com/airbytehq/airbyte/pull/7257) | Fixed parsing of extreme values for FLOAT and NUMBER data types | | 0.1.1 | 2021-08-13 | [4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | From 858e185ff05e20ca209ee1573040e2377a5d462d Mon Sep 17 00:00:00 2001 From: Iryna Grankova <87977540+igrankova@users.noreply.github.com> Date: Tue, 11 Jan 2022 17:12:14 +0200 Subject: [PATCH 094/215] Update fields in source-connectors specifications: snowflake (#9203) --- .../e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json | 2 +- .../main/resources/seed/source_definitions.yaml | 2 +- .../init/src/main/resources/seed/source_specs.yaml | 14 +++++++------- .../connectors/source-snowflake/Dockerfile | 2 +- .../source-snowflake/src/main/resources/spec.json | 10 +++++----- docs/integrations/sources/snowflake.md | 1 + 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json index 9f7883eea777c..9ebdba29dde73 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2", "name": "Snowflake", "dockerRepository": "airbyte/source-snowflake", - "dockerImageTag": "0.1.3", + "dockerImageTag": "0.1.4", "documentationUrl": "https://docs.airbyte.io/integrations/sources/snowflake", "icon": "snowflake.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 02823954fd323..e834552141572 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -663,7 +663,7 @@ - name: Snowflake sourceDefinitionId: e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2 dockerRepository: airbyte/source-snowflake - dockerImageTag: 0.1.3 + dockerImageTag: 0.1.4 documentationUrl: https://docs.airbyte.io/integrations/sources/snowflake icon: snowflake.svg sourceType: database diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 98672ad81f648..78f81c1aed04f 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6865,7 +6865,7 @@ - - "client_secret" oauthFlowOutputParameters: - - "refresh_token" -- dockerImage: "airbyte/source-snowflake:0.1.3" +- dockerImage: "airbyte/source-snowflake:0.1.4" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/snowflake" connectionSpecification: @@ -6883,12 +6883,12 @@ additionalProperties: false properties: host: - description: "Host domain of the snowflake instance (must include the account,\ - \ region, cloud environment, and end with snowflakecomputing.com)." + description: "The host domain of the snowflake instance (must include the\ + \ account, region, cloud environment, and end with snowflakecomputing.com)." examples: - "accountname.us-east-2.aws.snowflakecomputing.com" type: "string" - title: "Account name" + title: "Account Name" order: 0 role: description: "The role you created for Airbyte to access Snowflake." @@ -6898,14 +6898,14 @@ title: "Role" order: 1 warehouse: - description: "The warehouse you created for Airbyte to access data into." + description: "The warehouse you created for Airbyte to access data." examples: - "AIRBYTE_WAREHOUSE" type: "string" title: "Warehouse" order: 2 database: - description: "The database you created for Airbyte to access data into." + description: "The database you created for Airbyte to access data." examples: - "AIRBYTE_DATABASE" type: "string" @@ -6926,7 +6926,7 @@ title: "Username" order: 5 password: - description: "Password associated with the username." + description: "The password associated with the username." type: "string" airbyte_secret: true title: "Password" diff --git a/airbyte-integrations/connectors/source-snowflake/Dockerfile b/airbyte-integrations/connectors/source-snowflake/Dockerfile index 80f8a2f88c631..873b0b2d5902e 100644 --- a/airbyte-integrations/connectors/source-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/source-snowflake/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-snowflake COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-snowflake diff --git a/airbyte-integrations/connectors/source-snowflake/src/main/resources/spec.json b/airbyte-integrations/connectors/source-snowflake/src/main/resources/spec.json index 8b8e24da03e03..afc06d871af63 100644 --- a/airbyte-integrations/connectors/source-snowflake/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-snowflake/src/main/resources/spec.json @@ -16,10 +16,10 @@ "additionalProperties": false, "properties": { "host": { - "description": "Host domain of the snowflake instance (must include the account, region, cloud environment, and end with snowflakecomputing.com).", + "description": "The host domain of the snowflake instance (must include the account, region, cloud environment, and end with snowflakecomputing.com).", "examples": ["accountname.us-east-2.aws.snowflakecomputing.com"], "type": "string", - "title": "Account name", + "title": "Account Name", "order": 0 }, "role": { @@ -30,14 +30,14 @@ "order": 1 }, "warehouse": { - "description": "The warehouse you created for Airbyte to access data into.", + "description": "The warehouse you created for Airbyte to access data.", "examples": ["AIRBYTE_WAREHOUSE"], "type": "string", "title": "Warehouse", "order": 2 }, "database": { - "description": "The database you created for Airbyte to access data into.", + "description": "The database you created for Airbyte to access data.", "examples": ["AIRBYTE_DATABASE"], "type": "string", "title": "Database", @@ -58,7 +58,7 @@ "order": 5 }, "password": { - "description": "Password associated with the username.", + "description": "The password associated with the username.", "type": "string", "airbyte_secret": true, "title": "Password", diff --git a/docs/integrations/sources/snowflake.md b/docs/integrations/sources/snowflake.md index a6dc33eefde3e..6e03b35ac86df 100644 --- a/docs/integrations/sources/snowflake.md +++ b/docs/integrations/sources/snowflake.md @@ -75,6 +75,7 @@ Your database user should now be ready for use with Airbyte. | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.4 | 2021-12-30 | [9203](https://github.com/airbytehq/airbyte/pull/9203) | Update connector fields title/description | | 0.1.3 | 2021-01-11 | [9304](https://github.com/airbytehq/airbyte/pull/9304) | Upgrade version of JDBC driver | | 0.1.2 | 2021-10-21 | [7257](https://github.com/airbytehq/airbyte/pull/7257) | Fixed parsing of extreme values for FLOAT and NUMBER data types | | 0.1.1 | 2021-08-13 | [4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | From dad52edcc4ef764a285a21b0c11e8c50d9e33e74 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 11 Jan 2022 11:33:54 -0800 Subject: [PATCH 095/215] make airbyteDocker build cache functional (#9362) --- .../src/main/groovy/airbyte-docker.gradle | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/buildSrc/src/main/groovy/airbyte-docker.gradle b/buildSrc/src/main/groovy/airbyte-docker.gradle index d5d76ac88e312..37e0f7c07cae4 100644 --- a/buildSrc/src/main/groovy/airbyte-docker.gradle +++ b/buildSrc/src/main/groovy/airbyte-docker.gradle @@ -64,6 +64,15 @@ class AirbyteDockerPlugin implements Plugin { static def getBaseTaggedImages(File dockerfile) { def result = [] as Set + // Look for "FROM foo AS bar" directives, and add them to the map with .put("bar", "foo") + Map imageAliases = [:] + dockerfile.eachLine { line -> + def parts = line.split() + if (parts.length >= 4 && parts[0].equals("FROM") && parts[parts.length - 2].equals("AS")) { + imageAliases.put(parts[parts.length - 1], parts[1]) + } + } + dockerfile.eachLine { line -> if (line.startsWith("FROM ")) { def image = line.split()[1] @@ -72,7 +81,11 @@ class AirbyteDockerPlugin implements Plugin { } else if (line.startsWith("COPY --from=")) { def image = line.substring("COPY --from=".length()).split()[0] assert !image.isEmpty() - result.add(image) + if (imageAliases[image] != null) { + result.add(imageAliases[image]) + } else { + result.add(image) + } } } @@ -93,7 +106,20 @@ class AirbyteDockerPlugin implements Plugin { def stdout = new ByteArrayOutputStream() project.exec { - commandLine "docker", "images", "--no-trunc", "-f", "dangling=false", "--format", "{{.ID}}", taggedImage + commandLine "docker", "images", "--no-trunc", "-f", "dangling=false", "--format", "{{.ID}}", resolveEnvironmentVariables(project, taggedImage) + standardOutput = stdout; + } + + return "$stdout".toString().trim() + } + + // Some image tags rely on environment variables (e.g. "FROM openjdk:${JDK_VERSION}-slim"). + // dump those into a "sh -c 'echo ...'" command to resolve them (e.g. "openjdk:17-slim") + static String resolveEnvironmentVariables(Project project, String str) { + def stdout = new ByteArrayOutputStream() + + project.exec { + commandLine "sh", "-c", "echo " + str standardOutput = stdout; } @@ -112,7 +138,7 @@ class AirbyteDockerPlugin implements Plugin { def notUpToDate = new ArrayList(getBaseTaggedImages(dockerPath.toFile())).any { baseImage -> logger.debug "checking base image " + baseImage - def storedBase = (String) project.rootProject.imageToHash.get(baseImage) + def storedBase = (String) project.rootProject.imageToHash.get(resolveEnvironmentVariables(project, baseImage)) def currentBase = getImageHash(project, baseImage) logger.debug "storedBase " + storedBase From 9cff110510d729cc9870d3ffae60b05d28f3eeb2 Mon Sep 17 00:00:00 2001 From: Parker Mossman Date: Tue, 11 Jan 2022 13:48:58 -0800 Subject: [PATCH 096/215] Run the 'Release Airbyte' workflow on EC2 runners (#9361) * use ec2 runner for release-airbyte-os github workflow * use setup-python to install pip, which is needed for release_version.sh --- .github/workflows/release-airbyte-os.yml | 52 +++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release-airbyte-os.yml b/.github/workflows/release-airbyte-os.yml index 8f45f8411d386..3c1f4e58024d6 100644 --- a/.github/workflows/release-airbyte-os.yml +++ b/.github/workflows/release-airbyte-os.yml @@ -7,8 +7,29 @@ on: required: true default: "patch" jobs: - releaseAirbyte: + # In case of self-hosted EC2 errors, remove this block. + start-release-airbyte-runner: + name: "Release Airbyte: Start EC2 Runner" + timeout-minutes: 10 runs-on: ubuntu-latest + outputs: + label: ${{ steps.start-ec2-runner.outputs.label }} + ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} + steps: + - name: Checkout Airbyte + uses: actions/checkout@v2 + - name: Start AWS Runner + id: start-ec2-runner + uses: ./.github/actions/start-aws-runner + with: + aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} + github-token: ${{ secrets.SELF_RUNNER_GITHUB_ACCESS_TOKEN }} + + releaseAirbyte: + # In case of self-hosted EC2 errors, removed the `needs` line and switch back to running on ubuntu-latest. + needs: start-release-airbyte-runner # required to start the main job when the runner is ready + runs-on: ${{ needs.start-release-airbyte-runner.outputs.label }} # run the job on the newly created runner environment: more-secrets steps: - name: Checkout @@ -22,6 +43,11 @@ jobs: - uses: actions/setup-node@v1 with: node-version: "16.13.0" + + # necessary to install pip + - uses: actions/setup-python@v2 + with: + python-version: "3.7" - name: Save Old Version id: old_version run: | @@ -63,3 +89,27 @@ jobs: run: | echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}" echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}" + + # In case of self-hosted EC2 errors, remove this block. + stop-release-airbyte-runner: + name: "Release Airbyte: Stop EC2 Runner" + timeout-minutes: 10 + needs: + - start-release-airbyte-runner # required to get output from the start-runner job + - releaseAirbyte # required to wait when the main job is done + runs-on: ubuntu-latest + if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} + aws-region: us-east-2 + - name: Stop EC2 runner + uses: machulav/ec2-github-runner@v2.3.0 + with: + mode: stop + github-token: ${{ secrets.SELF_RUNNER_GITHUB_ACCESS_TOKEN }} + label: ${{ needs.start-release-airbyte-runner.outputs.label }} + ec2-instance-id: ${{ needs.start-release-airbyte-runner.outputs.ec2-instance-id }} From bf9e9cae383cc42755592aa2efb32652613fca8f Mon Sep 17 00:00:00 2001 From: Parker Mossman Date: Tue, 11 Jan 2022 13:49:25 -0800 Subject: [PATCH 097/215] Track per-stream record counts and records committed, and other sync summary metadata (#9327) * StateDeltaTracker class and tests * working prototype implementation of per-stream record tracking * misc stuff to get build working * add new fields to replicationAttemptSummary * update AirbyteMessageTracker to use StateDeltaTracker, and new interface methods * finish implementation and tests for stateDeltaTracker and all new ReplicationAttemptSummary fields * undo temporary changes to files that I accidentally committed * simplify interactions with byte buffers (#9331) * define a map instead of generic object for counts by stream * follow convention of keyToValue instead of valueByKey for maps * use synchronized blocks instead of synchronized methods * add totalBytesEmitted field to eventually replace bytesSynced * misc PR feedback nits * additionalProperties probably should still be false * javadoc formatting * define syncStats and use it for total and per-stream stats * change per-stream stats map to a list, and set stats in standardSyncSummary * wrap entire method bodies in synchronized block * use a long instead of a Long for required fields * remove extranneous 'this' * set committed records to emitted records if sync has success status * throw checked exception if commit state before add state, simplify exception handling throughout * set delta tracker memory limit to 20MiB * log error message that was thrown instead of assumed cause * StreamSyncStats wrapper, add test case for populating stats on failure, misc formatting Co-authored-by: Charles --- .../types/ReplicationAttemptSummary.yaml | 12 +- .../resources/types/StandardSyncSummary.yaml | 12 +- .../main/resources/types/StreamSyncStats.yaml | 15 + .../src/main/resources/types/SyncStats.yaml | 19 ++ .../ReplicationJobOrchestrator.java | 1 - .../workers/DefaultReplicationWorker.java | 71 +++-- .../airbyte/AirbyteMessageTracker.java | 219 ++++++++++++-- .../protocols/airbyte/MessageTracker.java | 86 +++++- .../protocols/airbyte/StateDeltaTracker.java | 158 ++++++++++ .../sync/ReplicationActivityImpl.java | 5 +- .../workers/DefaultReplicationWorkerTest.java | 111 +++++-- .../airbyte/AirbyteMessageTrackerTest.java | 277 ++++++++++++++++-- .../airbyte/StateDeltaTrackerTest.java | 121 ++++++++ 13 files changed, 987 insertions(+), 120 deletions(-) create mode 100644 airbyte-config/models/src/main/resources/types/StreamSyncStats.yaml create mode 100644 airbyte-config/models/src/main/resources/types/SyncStats.yaml create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/StateDeltaTracker.java create mode 100644 airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/StateDeltaTrackerTest.java diff --git a/airbyte-config/models/src/main/resources/types/ReplicationAttemptSummary.yaml b/airbyte-config/models/src/main/resources/types/ReplicationAttemptSummary.yaml index 90356db63cbe9..fc3eee68b54b9 100644 --- a/airbyte-config/models/src/main/resources/types/ReplicationAttemptSummary.yaml +++ b/airbyte-config/models/src/main/resources/types/ReplicationAttemptSummary.yaml @@ -9,17 +9,25 @@ required: - bytesSynced - startTime - endTime + - totalStats + - streamStats additionalProperties: false properties: status: "$ref": ReplicationStatus.yaml - recordsSynced: + recordsSynced: # TODO (parker) remove in favor of totalRecordsEmitted type: integer minValue: 0 - bytesSynced: + bytesSynced: # TODO (parker) remove in favor of totalBytesEmitted type: integer minValue: 0 startTime: type: integer endTime: type: integer + totalStats: + "$ref": SyncStats.yaml + streamStats: + type: array + items: + "$ref": StreamSyncStats.yaml diff --git a/airbyte-config/models/src/main/resources/types/StandardSyncSummary.yaml b/airbyte-config/models/src/main/resources/types/StandardSyncSummary.yaml index 49d84d49e0a1f..a305f7fc44104 100644 --- a/airbyte-config/models/src/main/resources/types/StandardSyncSummary.yaml +++ b/airbyte-config/models/src/main/resources/types/StandardSyncSummary.yaml @@ -12,17 +12,25 @@ required: - bytesSynced - startTime - endTime + - totalStats + - streamStats additionalProperties: false properties: status: "$ref": ReplicationStatus.yaml - recordsSynced: + recordsSynced: # TODO (parker) remove in favor of totalRecordsEmitted type: integer minValue: 0 - bytesSynced: + bytesSynced: # TODO (parker) remove in favor of totalBytesEmitted type: integer minValue: 0 startTime: type: integer endTime: type: integer + totalStats: + "$ref": SyncStats.yaml + streamStats: + type: array + items: + "$ref": StreamSyncStats.yaml diff --git a/airbyte-config/models/src/main/resources/types/StreamSyncStats.yaml b/airbyte-config/models/src/main/resources/types/StreamSyncStats.yaml new file mode 100644 index 0000000000000..c20003f72c5dc --- /dev/null +++ b/airbyte-config/models/src/main/resources/types/StreamSyncStats.yaml @@ -0,0 +1,15 @@ +--- +"$schema": http://json-schema.org/draft-07/schema# +"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/StreamSyncStats.yaml +title: StreamSyncStats +description: Sync stats for a particular stream. +type: object +required: + - streamName + - stats +additionalProperties: false +properties: + streamName: + type: string + stats: + "$ref": SyncStats.yaml diff --git a/airbyte-config/models/src/main/resources/types/SyncStats.yaml b/airbyte-config/models/src/main/resources/types/SyncStats.yaml new file mode 100644 index 0000000000000..5c38885e6dc2f --- /dev/null +++ b/airbyte-config/models/src/main/resources/types/SyncStats.yaml @@ -0,0 +1,19 @@ +--- +"$schema": http://json-schema.org/draft-07/schema# +"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/SyncStats.yaml +title: SyncStats +description: sync stats. +type: object +required: + - recordsEmitted + - bytesEmitted +additionalProperties: false +properties: + recordsEmitted: + type: integer + bytesEmitted: + type: integer + stateMessagesEmitted: # TODO make required once per-stream state messages are supported in V2 + type: integer + recordsCommitted: + type: integer # if unset, committed records could not be computed diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java index ea8953e74976a..cbcc4338c0562 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java @@ -91,7 +91,6 @@ public void runJob() throws Exception { airbyteSource, new NamespacingMapper(syncInput.getNamespaceDefinition(), syncInput.getNamespaceFormat(), syncInput.getPrefix()), new DefaultAirbyteDestination(workerConfigs, destinationLauncher), - new AirbyteMessageTracker(), new AirbyteMessageTracker()); log.info("Running replication worker..."); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/DefaultReplicationWorker.java b/airbyte-workers/src/main/java/io/airbyte/workers/DefaultReplicationWorker.java index 5956816498852..752526699bf27 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/DefaultReplicationWorker.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/DefaultReplicationWorker.java @@ -9,6 +9,8 @@ import io.airbyte.config.StandardSyncInput; import io.airbyte.config.StandardSyncSummary.ReplicationStatus; import io.airbyte.config.State; +import io.airbyte.config.StreamSyncStats; +import io.airbyte.config.SyncStats; import io.airbyte.config.WorkerDestinationConfig; import io.airbyte.config.WorkerSourceConfig; import io.airbyte.protocol.models.AirbyteMessage; @@ -17,6 +19,7 @@ import io.airbyte.workers.protocols.airbyte.AirbyteSource; import io.airbyte.workers.protocols.airbyte.MessageTracker; import java.nio.file.Path; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.concurrent.CompletableFuture; @@ -55,8 +58,7 @@ public class DefaultReplicationWorker implements ReplicationWorker { private final AirbyteSource source; private final AirbyteMapper mapper; private final AirbyteDestination destination; - private final MessageTracker sourceMessageTracker; - private final MessageTracker destinationMessageTracker; + private final MessageTracker messageTracker; private final ExecutorService executors; private final AtomicBoolean cancelled; @@ -67,15 +69,13 @@ public DefaultReplicationWorker(final String jobId, final AirbyteSource source, final AirbyteMapper mapper, final AirbyteDestination destination, - final MessageTracker sourceMessageTracker, - final MessageTracker destinationMessageTracker) { + final MessageTracker messageTracker) { this.jobId = jobId; this.attempt = attempt; this.source = source; this.mapper = mapper; this.destination = destination; - this.sourceMessageTracker = sourceMessageTracker; - this.destinationMessageTracker = destinationMessageTracker; + this.messageTracker = messageTracker; this.executors = Executors.newFixedThreadPool(2); this.cancelled = new AtomicBoolean(false); @@ -120,11 +120,11 @@ public ReplicationOutput run(final StandardSyncInput syncInput, final Path jobRo source.start(sourceConfig, jobRoot); final CompletableFuture destinationOutputThreadFuture = CompletableFuture.runAsync( - getDestinationOutputRunnable(destination, cancelled, destinationMessageTracker, mdc), + getDestinationOutputRunnable(destination, cancelled, messageTracker, mdc), executors); final CompletableFuture replicationThreadFuture = CompletableFuture.runAsync( - getReplicationRunnable(source, destination, cancelled, mapper, sourceMessageTracker, mdc), + getReplicationRunnable(source, destination, cancelled, mapper, messageTracker, mdc), executors); LOGGER.info("Waiting for source and destination threads to complete."); @@ -155,10 +155,45 @@ else if (hasFailed.get()) { outputStatus = ReplicationStatus.COMPLETED; } + final SyncStats totalSyncStats = new SyncStats() + .withRecordsEmitted(messageTracker.getTotalRecordsEmitted()) + .withBytesEmitted(messageTracker.getTotalBytesEmitted()) + .withStateMessagesEmitted(messageTracker.getTotalStateMessagesEmitted()); + + if (outputStatus == ReplicationStatus.COMPLETED) { + totalSyncStats.setRecordsCommitted(totalSyncStats.getRecordsEmitted()); + } else if (messageTracker.getTotalRecordsCommitted().isPresent()) { + totalSyncStats.setRecordsCommitted(messageTracker.getTotalRecordsCommitted().get()); + } else { + LOGGER.warn("Could not reliably determine committed record counts, committed record stats will be set to null"); + totalSyncStats.setRecordsCommitted(null); + } + + // assume every stream with stats is in streamToEmittedRecords map + final List streamSyncStats = messageTracker.getStreamToEmittedRecords().keySet().stream().map(stream -> { + final SyncStats syncStats = new SyncStats() + .withRecordsEmitted(messageTracker.getStreamToEmittedRecords().get(stream)) + .withBytesEmitted(messageTracker.getStreamToEmittedBytes().get(stream)) + .withStateMessagesEmitted(null); // TODO (parker) populate per-stream state messages emitted once supported in V2 + + if (outputStatus == ReplicationStatus.COMPLETED) { + syncStats.setRecordsCommitted(messageTracker.getStreamToEmittedRecords().get(stream)); + } else if (messageTracker.getStreamToCommittedRecords().isPresent()) { + syncStats.setRecordsCommitted(messageTracker.getStreamToCommittedRecords().get().get(stream)); + } else { + syncStats.setRecordsCommitted(null); + } + return new StreamSyncStats() + .withStreamName(stream) + .withStats(syncStats); + }).collect(Collectors.toList()); + final ReplicationAttemptSummary summary = new ReplicationAttemptSummary() .withStatus(outputStatus) - .withRecordsSynced(sourceMessageTracker.getRecordCount()) - .withBytesSynced(sourceMessageTracker.getBytesCount()) + .withRecordsSynced(messageTracker.getTotalRecordsEmitted()) // TODO (parker) remove in favor of totalRecordsEmitted + .withBytesSynced(messageTracker.getTotalBytesEmitted()) // TODO (parker) remove in favor of totalBytesEmitted + .withTotalStats(totalSyncStats) + .withStreamStats(streamSyncStats) .withStartTime(startTime) .withEndTime(System.currentTimeMillis()); @@ -168,15 +203,15 @@ else if (hasFailed.get()) { .withReplicationAttemptSummary(summary) .withOutputCatalog(destinationConfig.getCatalog()); - if (sourceMessageTracker.getOutputState().isPresent()) { + if (messageTracker.getSourceOutputState().isPresent()) { LOGGER.info("Source output at least one state message"); } else { LOGGER.info("Source did not output any state messages"); } - if (destinationMessageTracker.getOutputState().isPresent()) { - LOGGER.info("State capture: Updated state to: {}", destinationMessageTracker.getOutputState()); - final State state = destinationMessageTracker.getOutputState().get(); + if (messageTracker.getDestinationOutputState().isPresent()) { + LOGGER.info("State capture: Updated state to: {}", messageTracker.getDestinationOutputState()); + final State state = messageTracker.getDestinationOutputState().get(); output.withState(state); } else if (syncInput.getState() != null) { LOGGER.warn("State capture: No new state, falling back on input state: {}", syncInput.getState()); @@ -196,7 +231,7 @@ private static Runnable getReplicationRunnable(final AirbyteSource source, final AirbyteDestination destination, final AtomicBoolean cancelled, final AirbyteMapper mapper, - final MessageTracker sourceMessageTracker, + final MessageTracker messageTracker, final Map mdc) { return () -> { MDC.setContextMap(mdc); @@ -208,7 +243,7 @@ private static Runnable getReplicationRunnable(final AirbyteSource source, if (messageOptional.isPresent()) { final AirbyteMessage message = mapper.mapMessage(messageOptional.get()); - sourceMessageTracker.accept(message); + messageTracker.acceptFromSource(message); destination.accept(message); recordsRead += 1; @@ -235,7 +270,7 @@ private static Runnable getReplicationRunnable(final AirbyteSource source, private static Runnable getDestinationOutputRunnable(final AirbyteDestination destination, final AtomicBoolean cancelled, - final MessageTracker destinationMessageTracker, + final MessageTracker messageTracker, final Map mdc) { return () -> { MDC.setContextMap(mdc); @@ -245,7 +280,7 @@ private static Runnable getDestinationOutputRunnable(final AirbyteDestination de final Optional messageOptional = destination.attemptRead(); if (messageOptional.isPresent()) { LOGGER.info("state in DefaultReplicationWorker from Destination: {}", messageOptional.get()); - destinationMessageTracker.accept(messageOptional.get()); + messageTracker.acceptFromDestination(messageOptional.get()); } } if (!cancelled.get() && destination.getExitValue() != 0) { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTracker.java b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTracker.java index 8cbf4be5308ba..4d4f93ed6f0af 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTracker.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTracker.java @@ -4,51 +4,232 @@ package io.airbyte.workers.protocols.airbyte; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Charsets; +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; import io.airbyte.commons.json.Jsons; import io.airbyte.config.State; import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.workers.protocols.airbyte.StateDeltaTracker.StateDeltaTrackerException; +import java.util.HashMap; +import java.util.Map; import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +@Slf4j public class AirbyteMessageTracker implements MessageTracker { - private final AtomicLong recordCount; - private final AtomicLong numBytes; - private final AtomicReference outputState; + private static final long STATE_DELTA_TRACKER_MEMORY_LIMIT_BYTES = 20L * 1024L * 1024L; // 20 MiB, ~10% of default cloud worker memory + + private final AtomicReference sourceOutputState; + private final AtomicReference destinationOutputState; + private final AtomicLong totalEmittedStateMessages; + private final Map streamToRunningCount; + private final HashFunction hashFunction; + private final BiMap streamNameToIndex; + private final Map streamToTotalBytesEmitted; + private final Map streamToTotalRecordsEmitted; + private final StateDeltaTracker stateDeltaTracker; + + private short nextStreamIndex; + + /** + * If the StateDeltaTracker throws an exception, this flag is set to true and committed counts are + * not returned. + */ + private boolean unreliableCommittedCounts; public AirbyteMessageTracker() { - this.recordCount = new AtomicLong(); - this.numBytes = new AtomicLong(); - this.outputState = new AtomicReference<>(); + this(new StateDeltaTracker(STATE_DELTA_TRACKER_MEMORY_LIMIT_BYTES)); + } + + @VisibleForTesting + protected AirbyteMessageTracker(final StateDeltaTracker stateDeltaTracker) { + this.sourceOutputState = new AtomicReference<>(); + this.destinationOutputState = new AtomicReference<>(); + this.totalEmittedStateMessages = new AtomicLong(0L); + this.streamToRunningCount = new HashMap<>(); + this.streamNameToIndex = HashBiMap.create(); + this.hashFunction = Hashing.murmur3_32_fixed(); + this.streamToTotalBytesEmitted = new HashMap<>(); + this.streamToTotalRecordsEmitted = new HashMap<>(); + this.stateDeltaTracker = stateDeltaTracker; + this.nextStreamIndex = 0; + this.unreliableCommittedCounts = false; } @Override - public void accept(final AirbyteMessage message) { - if (message.getType() == AirbyteMessage.Type.RECORD) { - recordCount.incrementAndGet(); - // todo (cgardens) - pretty wasteful to do an extra serialization just to get size. - numBytes.addAndGet(Jsons.serialize(message.getRecord().getData()).getBytes(Charsets.UTF_8).length); + public void acceptFromSource(final AirbyteMessage message) { + switch (message.getType()) { + case RECORD -> handleSourceEmittedRecord(message.getRecord()); + case STATE -> handleSourceEmittedState(message.getState()); + default -> log.warn("Invalid message type for message: {}", message); + } + } + + @Override + public void acceptFromDestination(final AirbyteMessage message) { + switch (message.getType()) { + case STATE -> handleDestinationEmittedState(message.getState()); + default -> log.warn("Invalid message type for message: {}", message); + } + } + + /** + * When a source emits a record, increment the running record count, the total record count, and the + * total byte count for the record's stream. + */ + private void handleSourceEmittedRecord(final AirbyteRecordMessage recordMessage) { + final short streamIndex = getStreamIndex(recordMessage.getStream()); + + final long currentRunningCount = streamToRunningCount.getOrDefault(streamIndex, 0L); + streamToRunningCount.put(streamIndex, currentRunningCount + 1); + + final long currentTotalCount = streamToTotalRecordsEmitted.getOrDefault(streamIndex, 0L); + streamToTotalRecordsEmitted.put(streamIndex, currentTotalCount + 1); + + // todo (cgardens) - pretty wasteful to do an extra serialization just to get size. + final int numBytes = Jsons.serialize(recordMessage.getData()).getBytes(Charsets.UTF_8).length; + final long currentTotalStreamBytes = streamToTotalBytesEmitted.getOrDefault(streamIndex, 0L); + streamToTotalBytesEmitted.put(streamIndex, currentTotalStreamBytes + numBytes); + } + + /** + * When a source emits a state, persist the current running count per stream to the + * {@link StateDeltaTracker}. Then, reset the running count per stream so that new counts can start + * recording for the next state. Also add the state to list so that state order is tracked + * correctly. + */ + private void handleSourceEmittedState(final AirbyteStateMessage stateMessage) { + sourceOutputState.set(new State().withState(stateMessage.getData())); + totalEmittedStateMessages.incrementAndGet(); + final int stateHash = getStateHashCode(stateMessage); + try { + if (!unreliableCommittedCounts) { + stateDeltaTracker.addState(stateHash, streamToRunningCount); + } + } catch (final StateDeltaTrackerException e) { + log.error(e.getMessage(), e); + unreliableCommittedCounts = true; + } + streamToRunningCount.clear(); + } + + /** + * When a destination emits a state, mark all uncommitted states up to and including this state as + * committed in the {@link StateDeltaTracker}. Also record this state as the last committed state. + */ + private void handleDestinationEmittedState(final AirbyteStateMessage stateMessage) { + destinationOutputState.set(new State().withState(stateMessage.getData())); + try { + if (!unreliableCommittedCounts) { + stateDeltaTracker.commitStateHash(getStateHashCode(stateMessage)); + } + } catch (final StateDeltaTrackerException e) { + log.error(e.getMessage(), e); + unreliableCommittedCounts = true; + } + } + + private short getStreamIndex(final String streamName) { + if (!streamNameToIndex.containsKey(streamName)) { + streamNameToIndex.put(streamName, nextStreamIndex); + nextStreamIndex++; } - if (message.getType() == AirbyteMessage.Type.STATE) { - outputState.set(new State().withState(message.getState().getData())); + return streamNameToIndex.get(streamName); + } + + private int getStateHashCode(final AirbyteStateMessage stateMessage) { + return hashFunction.hashBytes(Jsons.serialize(stateMessage.getData()).getBytes(Charsets.UTF_8)).hashCode(); + } + + @Override + public Optional getSourceOutputState() { + return Optional.ofNullable(sourceOutputState.get()); + } + + @Override + public Optional getDestinationOutputState() { + return Optional.ofNullable(destinationOutputState.get()); + } + + /** + * Fetch committed stream index to record count from the {@link StateDeltaTracker}. Then, swap out + * stream indices for stream names. If the delta tracker has exceeded its capacity, return empty + * because committed record counts cannot be reliably computed. + */ + @Override + public Optional> getStreamToCommittedRecords() { + if (unreliableCommittedCounts) { + return Optional.empty(); } + final Map streamIndexToCommittedRecordCount = stateDeltaTracker.getStreamToCommittedRecords(); + return Optional.of( + streamIndexToCommittedRecordCount.entrySet().stream().collect( + Collectors.toMap( + entry -> streamNameToIndex.inverse().get(entry.getKey()), + Map.Entry::getValue))); + } + + /** + * Swap out stream indices for stream names and return total records emitted by stream. + */ + @Override + public Map getStreamToEmittedRecords() { + return streamToTotalRecordsEmitted.entrySet().stream().collect(Collectors.toMap( + entry -> streamNameToIndex.inverse().get(entry.getKey()), + Map.Entry::getValue)); } + /** + * Swap out stream indices for stream names and return total bytes emitted by stream. + */ @Override - public long getRecordCount() { - return recordCount.get(); + public Map getStreamToEmittedBytes() { + return streamToTotalBytesEmitted.entrySet().stream().collect(Collectors.toMap( + entry -> streamNameToIndex.inverse().get(entry.getKey()), + Map.Entry::getValue)); } + /** + * Compute sum of emitted record counts across all streams. + */ @Override - public long getBytesCount() { - return numBytes.get(); + public long getTotalRecordsEmitted() { + return streamToTotalRecordsEmitted.values().stream().reduce(0L, Long::sum); + } + + /** + * Compute sum of emitted bytes across all streams. + */ + @Override + public long getTotalBytesEmitted() { + return streamToTotalBytesEmitted.values().stream().reduce(0L, Long::sum); + } + + /** + * Compute sum of committed record counts across all streams. If the delta tracker has exceeded its + * capacity, return empty because committed record counts cannot be reliably computed. + */ + @Override + public Optional getTotalRecordsCommitted() { + if (unreliableCommittedCounts) { + return Optional.empty(); + } + return Optional.of(stateDeltaTracker.getStreamToCommittedRecords().values().stream().reduce(0L, Long::sum)); } @Override - public Optional getOutputState() { - return Optional.ofNullable(outputState.get()); + public Long getTotalStateMessagesEmitted() { + return totalEmittedStateMessages.get(); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/MessageTracker.java b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/MessageTracker.java index 5213abf76a990..9e0a770e60cf2 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/MessageTracker.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/MessageTracker.java @@ -6,42 +6,100 @@ import io.airbyte.config.State; import io.airbyte.protocol.models.AirbyteMessage; +import java.util.Map; import java.util.Optional; -import java.util.function.Consumer; /** * Interface to handle extracting metadata from the stream of data flowing from a Source to a * Destination. */ -public interface MessageTracker extends Consumer { +public interface MessageTracker { /** - * Accepts an AirbyteMessage and tracks any metadata about it that is required by the Platform. + * Accepts an AirbyteMessage emitted from a source and tracks any metadata about it that is required + * by the Platform. * * @param message message to derive metadata from. */ - @Override - void accept(AirbyteMessage message); + void acceptFromSource(AirbyteMessage message); /** - * Gets the records replicated. + * Accepts an AirbyteMessage emitted from a destination and tracks any metadata about it that is + * required by the Platform. * - * @return total records that passed from Source to Destination. + * @param message message to derive metadata from. + */ + void acceptFromDestination(AirbyteMessage message); + + /** + * Get the current source state of the stream. + * + * @return returns the last StateMessage that was accepted from the source. If no StateMessage was + * accepted, empty. + */ + Optional getSourceOutputState(); + + /** + * Get the current destination state of the stream. + * + * @return returns the last StateMessage that was accepted from the destination. If no StateMessage + * was accepted, empty. + */ + Optional getDestinationOutputState(); + + /** + * Get the per-stream committed record count. + * + * @return returns a map of committed record count by stream name. If committed record counts cannot + * be computed, empty. + */ + Optional> getStreamToCommittedRecords(); + + /** + * Get the per-stream emitted record count. This includes messages that were emitted by the source, + * but never committed by the destination. + * + * @return returns a map of emitted record count by stream name. + */ + Map getStreamToEmittedRecords(); + + /** + * Get the per-stream emitted byte count. This includes messages that were emitted by the source, + * but never committed by the destination. + * + * @return returns a map of emitted record count by stream name. + */ + Map getStreamToEmittedBytes(); + + /** + * Get the overall emitted record count. This includes messages that were emitted by the source, but + * never committed by the destination. + * + * @return returns the total count of emitted records across all streams. + */ + long getTotalRecordsEmitted(); + + /** + * Get the overall emitted bytes. This includes messages that were emitted by the source, but never + * committed by the destination. + * + * @return returns the total emitted bytes across all streams. */ - long getRecordCount(); + long getTotalBytesEmitted(); /** - * Gets the bytes replicated. + * Get the overall committed record count. * - * @return total bytes that passed from Source to Destination. + * @return returns the total count of committed records across all streams. If total committed + * record count cannot be computed, empty. */ - long getBytesCount(); + Optional getTotalRecordsCommitted(); /** - * Get the current state of the stream. + * Get the overall emitted state message count. * - * @return returns the last StateMessage that was accepted. If no StateMessage was accepted, empty. + * @return returns the total count of emitted state messages. */ - Optional getOutputState(); + Long getTotalStateMessagesEmitted(); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/StateDeltaTracker.java b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/StateDeltaTracker.java new file mode 100644 index 0000000000000..93963fd38e01c --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/StateDeltaTracker.java @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.protocols.airbyte; + +import com.google.common.annotations.VisibleForTesting; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import lombok.extern.slf4j.Slf4j; + +/** + * This class tracks "deltas" between states in compact {@code byte[]}s with the following schema: + * + *
+ *  [(state hash),(stream index),(record count)...] with the last two elements repeating per stream in the delta.
+ * 
+ *

+ * This class also maintains a {@code Set} of {@code committedStateHashes} so that it can accumulate + * both committed and total record counts per stream. + *

+ * The StateDeltaTracker is initialized with a memory limit. If this memory limit is exceeded, new + * states deltas will not be added and per-stream record counts will not be able to be computed. + * This is to prevent OutOfMemoryErrors from crashing the sync. + */ +@Slf4j +public class StateDeltaTracker { + + private static final int STATE_HASH_BYTES = Integer.BYTES; + private static final int STREAM_INDEX_BYTES = Short.BYTES; + private static final int RECORD_COUNT_BYTES = Long.BYTES; + private static final int BYTES_PER_STREAM = STREAM_INDEX_BYTES + RECORD_COUNT_BYTES; + + private final Set committedStateHashes; + private final Map streamToCommittedRecords; + + /** + * Every time a state is added, a new byte[] containing the state hash and per-stream delta will be + * added to this list. Every time a state is committed, state deltas up to the committed state are + * removed from the head of the list and aggregated into the committed count map. The source thread + * adds while the destination thread removes, so synchronization is necessary to provide + * thread-safety. + */ + @VisibleForTesting + protected final List stateDeltas; + + @VisibleForTesting + protected long remainingCapacity; + @VisibleForTesting + protected boolean capacityExceeded; + + public StateDeltaTracker(final long memoryLimitBytes) { + this.committedStateHashes = new HashSet<>(); + this.streamToCommittedRecords = new HashMap<>(); + this.stateDeltas = new ArrayList<>(); + this.remainingCapacity = memoryLimitBytes; + this.capacityExceeded = false; + } + + /** + * Converts the given state hash and per-stream record count map into a {@code byte[]} and stores + * it. + *

+ * This method leverages a synchronized block to provide thread safety between the source thread + * calling addState while the destination thread calls commitStateHash. + * + * @throws StateDeltaTrackerException thrown when the memory footprint of stateDeltas exceeds + * available capacity. + */ + public void addState(final int stateHash, final Map streamIndexToRecordCount) throws StateDeltaTrackerException { + synchronized (this) { + final int size = STATE_HASH_BYTES + (streamIndexToRecordCount.size() * BYTES_PER_STREAM); + + if (capacityExceeded || remainingCapacity < size) { + capacityExceeded = true; + throw new StateDeltaTrackerException("Memory capacity is exceeded for StateDeltaTracker."); + } + + final ByteBuffer delta = ByteBuffer.allocate(size); + + delta.putInt(stateHash); + + for (final Map.Entry entry : streamIndexToRecordCount.entrySet()) { + delta.putShort(entry.getKey()); + delta.putLong(entry.getValue()); + } + + stateDeltas.add(delta.array()); + remainingCapacity -= delta.array().length; + } + } + + /** + * Mark the given {@code stateHash} as committed. + *

+ * This method leverages a synchronized block to provide thread safety between the source thread + * calling addState while the destination thread calls commitStateHash. + * + * @throws StateDeltaTrackerException thrown when committed counts can no longer be reliably + * computed. + */ + public void commitStateHash(final int stateHash) throws StateDeltaTrackerException { + synchronized (this) { + if (capacityExceeded) { + throw new StateDeltaTrackerException("Memory capacity exceeded for StateDeltaTracker, so states cannot be reliably committed"); + } + if (committedStateHashes.contains(stateHash)) { + throw new StateDeltaTrackerException( + String.format("State hash %d was already committed, likely indicating a state hash collision", stateHash)); + } + + committedStateHashes.add(stateHash); + int currStateHash; + do { + if (stateDeltas.isEmpty()) { + throw new StateDeltaTrackerException(String.format("Delta was not stored for state hash %d", stateHash)); + } + // as deltas are removed and aggregated into committed count map, reclaim capacity + final ByteBuffer currDelta = ByteBuffer.wrap(stateDeltas.remove(0)); + remainingCapacity += currDelta.capacity(); + + currStateHash = currDelta.getInt(); + + final int numStreams = (currDelta.capacity() - STATE_HASH_BYTES) / BYTES_PER_STREAM; + for (int i = 0; i < numStreams; i++) { + final short streamIndex = currDelta.getShort(); + final long recordCount = currDelta.getLong(); + + // aggregate delta into committed count map + final long committedRecordCount = streamToCommittedRecords.getOrDefault(streamIndex, 0L); + streamToCommittedRecords.put(streamIndex, committedRecordCount + recordCount); + } + } while (currStateHash != stateHash); // repeat until each delta up to the committed state is aggregated + } + } + + public Map getStreamToCommittedRecords() { + return streamToCommittedRecords; + } + + /** + * Thrown when the StateDeltaTracker encounters an issue that prevents it from reliably computing + * committed record deltas. + */ + public static class StateDeltaTrackerException extends Exception { + + public StateDeltaTrackerException(final String message) { + super(message); + } + + } + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java index bb4aae0e1c84e..8eb3533b4e1d8 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java @@ -117,7 +117,7 @@ public StandardSyncOutput replicate(final JobRunConfig jobRunConfig, return fullSyncInput; }; - CheckedSupplier, Exception> workerFactory; + final CheckedSupplier, Exception> workerFactory; if (containerOrchestratorEnabled) { workerFactory = getContainerLauncherWorkerFactory(sourceLauncherConfig, destinationLauncherConfig, jobRunConfig, syncInput); @@ -156,6 +156,8 @@ private static StandardSyncOutput reduceReplicationOutput(final ReplicationOutpu syncSummary.setStartTime(output.getReplicationAttemptSummary().getStartTime()); syncSummary.setEndTime(output.getReplicationAttemptSummary().getEndTime()); syncSummary.setStatus(output.getReplicationAttemptSummary().getStatus()); + syncSummary.setTotalStats(output.getReplicationAttemptSummary().getTotalStats()); + syncSummary.setStreamStats(output.getReplicationAttemptSummary().getStreamStats()); final StandardSyncOutput standardSyncOutput = new StandardSyncOutput(); standardSyncOutput.setState(output.getState()); @@ -195,7 +197,6 @@ private CheckedSupplier, Exception> airbyteSource, new NamespacingMapper(syncInput.getNamespaceDefinition(), syncInput.getNamespaceFormat(), syncInput.getPrefix()), new DefaultAirbyteDestination(workerConfigs, destinationLauncher), - new AirbyteMessageTracker(), new AirbyteMessageTracker()); }; } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java index a0f53ce0f005c..f62e7b4d848e5 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java @@ -29,6 +29,8 @@ import io.airbyte.config.StandardSyncInput; import io.airbyte.config.StandardSyncSummary.ReplicationStatus; import io.airbyte.config.State; +import io.airbyte.config.StreamSyncStats; +import io.airbyte.config.SyncStats; import io.airbyte.config.WorkerDestinationConfig; import io.airbyte.config.WorkerSourceConfig; import io.airbyte.config.helpers.LogClientSingleton; @@ -44,6 +46,8 @@ import java.nio.file.Files; import java.nio.file.Path; import java.time.Duration; +import java.util.Collections; +import java.util.List; import java.util.Optional; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; @@ -78,8 +82,7 @@ class DefaultReplicationWorkerTest { private StandardSyncInput syncInput; private WorkerSourceConfig sourceConfig; private WorkerDestinationConfig destinationConfig; - private AirbyteMessageTracker sourceMessageTracker; - private AirbyteMessageTracker destinationMessageTracker; + private AirbyteMessageTracker messageTracker; @SuppressWarnings("unchecked") @BeforeEach @@ -97,8 +100,7 @@ void setup() throws Exception { source = mock(AirbyteSource.class); mapper = mock(NamespacingMapper.class); destination = mock(AirbyteDestination.class); - sourceMessageTracker = mock(AirbyteMessageTracker.class); - destinationMessageTracker = mock(AirbyteMessageTracker.class); + messageTracker = mock(AirbyteMessageTracker.class); when(source.isFinished()).thenReturn(false, false, false, true); when(destination.isFinished()).thenReturn(false, false, false, true); @@ -121,8 +123,7 @@ void test() throws Exception { source, mapper, destination, - sourceMessageTracker, - destinationMessageTracker); + messageTracker); worker.run(syncInput, jobRoot); @@ -144,8 +145,7 @@ void testSourceNonZeroExitValue() throws Exception { source, mapper, destination, - sourceMessageTracker, - destinationMessageTracker); + messageTracker); final ReplicationOutput output = worker.run(syncInput, jobRoot); assertEquals(ReplicationStatus.FAILED, output.getReplicationAttemptSummary().getStatus()); @@ -161,8 +161,7 @@ void testDestinationNonZeroExitValue() throws Exception { source, mapper, destination, - sourceMessageTracker, - destinationMessageTracker); + messageTracker); final ReplicationOutput output = worker.run(syncInput, jobRoot); assertEquals(ReplicationStatus.FAILED, output.getReplicationAttemptSummary().getStatus()); @@ -181,8 +180,7 @@ void testLoggingInThreads() throws IOException, WorkerException { source, mapper, destination, - sourceMessageTracker, - destinationMessageTracker); + messageTracker); worker.run(syncInput, jobRoot); @@ -213,7 +211,7 @@ void testLogMaskRegex() throws IOException { void testCancellation() throws InterruptedException { final AtomicReference output = new AtomicReference<>(); when(source.isFinished()).thenReturn(false); - when(destinationMessageTracker.getOutputState()).thenReturn(Optional.of(new State().withState(STATE_MESSAGE.getState().getData()))); + when(messageTracker.getDestinationOutputState()).thenReturn(Optional.of(new State().withState(STATE_MESSAGE.getState().getData()))); final ReplicationWorker worker = new DefaultReplicationWorker( JOB_ID, @@ -221,8 +219,7 @@ void testCancellation() throws InterruptedException { source, mapper, destination, - sourceMessageTracker, - destinationMessageTracker); + messageTracker); final Thread workerThread = new Thread(() -> { try { @@ -235,7 +232,7 @@ void testCancellation() throws InterruptedException { workerThread.start(); // verify the worker is actually running before we kill it. - while (Mockito.mockingDetails(sourceMessageTracker).getInvocations().size() < 5) { + while (Mockito.mockingDetails(messageTracker).getInvocations().size() < 5) { LOGGER.info("waiting for worker to start running"); sleep(100); } @@ -249,9 +246,12 @@ void testCancellation() throws InterruptedException { @Test void testPopulatesOutputOnSuccess() throws WorkerException { final JsonNode expectedState = Jsons.jsonNode(ImmutableMap.of("updated_at", 10L)); - when(sourceMessageTracker.getRecordCount()).thenReturn(12L); - when(sourceMessageTracker.getBytesCount()).thenReturn(100L); - when(destinationMessageTracker.getOutputState()).thenReturn(Optional.of(new State().withState(expectedState))); + when(messageTracker.getDestinationOutputState()).thenReturn(Optional.of(new State().withState(expectedState))); + when(messageTracker.getTotalRecordsEmitted()).thenReturn(12L); + when(messageTracker.getTotalBytesEmitted()).thenReturn(100L); + when(messageTracker.getTotalStateMessagesEmitted()).thenReturn(3L); + when(messageTracker.getStreamToEmittedBytes()).thenReturn(Collections.singletonMap("stream1", 100L)); + when(messageTracker.getStreamToEmittedRecords()).thenReturn(Collections.singletonMap("stream1", 12L)); final ReplicationWorker worker = new DefaultReplicationWorker( JOB_ID, @@ -259,15 +259,27 @@ void testPopulatesOutputOnSuccess() throws WorkerException { source, mapper, destination, - sourceMessageTracker, - destinationMessageTracker); + messageTracker); final ReplicationOutput actual = worker.run(syncInput, jobRoot); final ReplicationOutput replicationOutput = new ReplicationOutput() .withReplicationAttemptSummary(new ReplicationAttemptSummary() .withRecordsSynced(12L) .withBytesSynced(100L) - .withStatus(ReplicationStatus.COMPLETED)) + .withStatus(ReplicationStatus.COMPLETED) + .withTotalStats(new SyncStats() + .withRecordsEmitted(12L) + .withBytesEmitted(100L) + .withStateMessagesEmitted(3L) + .withRecordsCommitted(12L)) // since success, should use emitted count + .withStreamStats(Collections.singletonList( + new StreamSyncStats() + .withStreamName("stream1") + .withStats(new SyncStats() + .withBytesEmitted(100L) + .withRecordsEmitted(12L) + .withRecordsCommitted(12L) // since success, should use emitted count + .withStateMessagesEmitted(null))))) .withOutputCatalog(syncInput.getCatalog()) .withState(new State().withState(expectedState)); @@ -291,7 +303,7 @@ void testPopulatesOutputOnSuccess() throws WorkerException { @Test void testPopulatesStateOnFailureIfAvailable() throws Exception { doThrow(new IllegalStateException("induced exception")).when(source).close(); - when(destinationMessageTracker.getOutputState()).thenReturn(Optional.of(new State().withState(STATE_MESSAGE.getState().getData()))); + when(messageTracker.getDestinationOutputState()).thenReturn(Optional.of(new State().withState(STATE_MESSAGE.getState().getData()))); final ReplicationWorker worker = new DefaultReplicationWorker( JOB_ID, @@ -299,8 +311,7 @@ void testPopulatesStateOnFailureIfAvailable() throws Exception { source, mapper, destination, - sourceMessageTracker, - destinationMessageTracker); + messageTracker); final ReplicationOutput actual = worker.run(syncInput, jobRoot); assertNotNull(actual); @@ -317,8 +328,7 @@ void testRetainsStateOnFailureIfNewStateNotAvailable() throws Exception { source, mapper, destination, - sourceMessageTracker, - destinationMessageTracker); + messageTracker); final ReplicationOutput actual = worker.run(syncInput, jobRoot); @@ -326,6 +336,45 @@ void testRetainsStateOnFailureIfNewStateNotAvailable() throws Exception { assertEquals(syncInput.getState().getState(), actual.getState().getState()); } + @Test + void testPopulatesStatsOnFailureIfAvailable() throws Exception { + doThrow(new IllegalStateException("induced exception")).when(source).close(); + when(messageTracker.getTotalRecordsEmitted()).thenReturn(12L); + when(messageTracker.getTotalBytesEmitted()).thenReturn(100L); + when(messageTracker.getTotalRecordsCommitted()).thenReturn(Optional.of(6L)); + when(messageTracker.getTotalStateMessagesEmitted()).thenReturn(3L); + when(messageTracker.getStreamToEmittedBytes()).thenReturn(Collections.singletonMap("stream1", 100L)); + when(messageTracker.getStreamToEmittedRecords()).thenReturn(Collections.singletonMap("stream1", 12L)); + when(messageTracker.getStreamToCommittedRecords()).thenReturn(Optional.of(Collections.singletonMap("stream1", 6L))); + + final ReplicationWorker worker = new DefaultReplicationWorker( + JOB_ID, + JOB_ATTEMPT, + source, + mapper, + destination, + messageTracker); + + final ReplicationOutput actual = worker.run(syncInput, jobRoot); + final SyncStats expectedTotalStats = new SyncStats() + .withRecordsEmitted(12L) + .withBytesEmitted(100L) + .withStateMessagesEmitted(3L) + .withRecordsCommitted(6L); + final List expectedStreamStats = Collections.singletonList( + new StreamSyncStats() + .withStreamName("stream1") + .withStats(new SyncStats() + .withBytesEmitted(100L) + .withRecordsEmitted(12L) + .withRecordsCommitted(6L) + .withStateMessagesEmitted(null))); + + assertNotNull(actual); + assertEquals(expectedTotalStats, actual.getReplicationAttemptSummary().getTotalStats()); + assertEquals(expectedStreamStats, actual.getReplicationAttemptSummary().getStreamStats()); + } + @Test void testDoesNotPopulatesStateOnFailureIfNotAvailable() throws Exception { final StandardSyncInput syncInputWithoutState = Jsons.clone(syncInput); @@ -339,8 +388,7 @@ void testDoesNotPopulatesStateOnFailureIfNotAvailable() throws Exception { source, mapper, destination, - sourceMessageTracker, - destinationMessageTracker); + messageTracker); final ReplicationOutput actual = worker.run(syncInputWithoutState, jobRoot); @@ -350,7 +398,7 @@ void testDoesNotPopulatesStateOnFailureIfNotAvailable() throws Exception { @Test void testDoesNotPopulateOnIrrecoverableFailure() { - doThrow(new IllegalStateException("induced exception")).when(sourceMessageTracker).getRecordCount(); + doThrow(new IllegalStateException("induced exception")).when(messageTracker).getTotalRecordsEmitted(); final ReplicationWorker worker = new DefaultReplicationWorker( JOB_ID, @@ -358,8 +406,7 @@ void testDoesNotPopulateOnIrrecoverableFailure() { source, mapper, destination, - sourceMessageTracker, - destinationMessageTracker); + messageTracker); assertThrows(WorkerException.class, () -> worker.run(syncInput, jobRoot)); } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTrackerTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTrackerTest.java index 030e4c403e33b..8634c6eedbdb9 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTrackerTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTrackerTest.java @@ -7,58 +7,275 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; +import com.google.common.base.Charsets; import io.airbyte.commons.json.Jsons; import io.airbyte.config.State; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.workers.protocols.airbyte.StateDeltaTracker.StateDeltaTrackerException; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.testcontainers.shaded.com.google.common.base.Charsets; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; +@ExtendWith(MockitoExtension.class) class AirbyteMessageTrackerTest { + private static final String STREAM_1 = "stream1"; + private static final String STREAM_2 = "stream2"; + private static final String STREAM_3 = "stream3"; + + private AirbyteMessageTracker messageTracker; + + @Mock + private StateDeltaTracker mStateDeltaTracker; + + @BeforeEach + public void setup() { + this.messageTracker = new AirbyteMessageTracker(mStateDeltaTracker); + } + @Test - public void testIncrementsWhenRecord() { - final AirbyteMessage message = new AirbyteMessage() - .withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withData(Jsons.jsonNode(ImmutableMap.of("name", "rudolph")))); + public void testGetTotalRecordsStatesAndBytesEmitted() { + final AirbyteMessage r1 = createRecordMessage(STREAM_1, 123); + final AirbyteMessage s1 = createStateMessage(1); + final AirbyteMessage s2 = createStateMessage(2); - final AirbyteMessageTracker messageTracker = new AirbyteMessageTracker(); - messageTracker.accept(message); - messageTracker.accept(message); - messageTracker.accept(message); + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(s1); + messageTracker.acceptFromSource(s2); - assertEquals(3, messageTracker.getRecordCount()); - assertEquals(3 * Jsons.serialize(message.getRecord().getData()).getBytes(Charsets.UTF_8).length, messageTracker.getBytesCount()); + assertEquals(3, messageTracker.getTotalRecordsEmitted()); + assertEquals(3 * Jsons.serialize(r1.getRecord().getData()).getBytes(Charsets.UTF_8).length, messageTracker.getTotalBytesEmitted()); + assertEquals(2, messageTracker.getTotalStateMessagesEmitted()); } @Test - public void testRetainsLatestState() { - final JsonNode oldStateValue = Jsons.jsonNode(ImmutableMap.builder().put("lastSync", "1598900000").build()); - final AirbyteMessage oldStateMessage = new AirbyteMessage() - .withType(AirbyteMessage.Type.STATE) - .withState(new AirbyteStateMessage().withData(oldStateValue)); + public void testRetainsLatestSourceAndDestinationState() { + final int s1Value = 111; + final int s2Value = 222; + final int s3Value = 333; + final AirbyteMessage s1 = createStateMessage(s1Value); + final AirbyteMessage s2 = createStateMessage(s2Value); + final AirbyteMessage s3 = createStateMessage(s3Value); - final JsonNode newStateValue = Jsons.jsonNode(ImmutableMap.builder().put("lastSync", "1598993526").build()); - final AirbyteMessage newStateMessage = new AirbyteMessage() - .withType(AirbyteMessage.Type.STATE) - .withState(new AirbyteStateMessage().withData(newStateValue)); + messageTracker.acceptFromSource(s1); + messageTracker.acceptFromSource(s2); + messageTracker.acceptFromSource(s3); + messageTracker.acceptFromDestination(s1); + messageTracker.acceptFromDestination(s2); - final AirbyteMessageTracker messageTracker = new AirbyteMessageTracker(); - messageTracker.accept(oldStateMessage); - messageTracker.accept(oldStateMessage); - messageTracker.accept(newStateMessage); + assertTrue(messageTracker.getSourceOutputState().isPresent()); + assertEquals(new State().withState(Jsons.jsonNode(s3Value)), messageTracker.getSourceOutputState().get()); - assertTrue(messageTracker.getOutputState().isPresent()); - assertEquals(new State().withState(newStateValue), messageTracker.getOutputState().get()); + assertTrue(messageTracker.getDestinationOutputState().isPresent()); + assertEquals(new State().withState(Jsons.jsonNode(s2Value)), messageTracker.getDestinationOutputState().get()); } @Test public void testReturnEmptyStateIfNoneEverAccepted() { - final AirbyteMessageTracker MessageTracker = new AirbyteMessageTracker(); - assertTrue(MessageTracker.getOutputState().isEmpty()); + assertTrue(messageTracker.getSourceOutputState().isEmpty()); + assertTrue(messageTracker.getDestinationOutputState().isEmpty()); + } + + @Test + public void testEmittedRecordsByStream() { + final AirbyteMessage r1 = createRecordMessage(STREAM_1, 1); + final AirbyteMessage r2 = createRecordMessage(STREAM_2, 2); + final AirbyteMessage r3 = createRecordMessage(STREAM_3, 3); + + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(r2); + messageTracker.acceptFromSource(r2); + messageTracker.acceptFromSource(r3); + messageTracker.acceptFromSource(r3); + messageTracker.acceptFromSource(r3); + + final Map expected = new HashMap<>(); + expected.put(STREAM_1, 1L); + expected.put(STREAM_2, 2L); + expected.put(STREAM_3, 3L); + + assertEquals(expected, messageTracker.getStreamToEmittedRecords()); + } + + @Test + public void testEmittedBytesByStream() { + final AirbyteMessage r1 = createRecordMessage(STREAM_1, 1); + final AirbyteMessage r2 = createRecordMessage(STREAM_2, 2); + final AirbyteMessage r3 = createRecordMessage(STREAM_3, 3); + + final long r1Bytes = Jsons.serialize(r1.getRecord().getData()).getBytes(Charsets.UTF_8).length; + final long r2Bytes = Jsons.serialize(r2.getRecord().getData()).getBytes(Charsets.UTF_8).length; + final long r3Bytes = Jsons.serialize(r3.getRecord().getData()).getBytes(Charsets.UTF_8).length; + + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(r2); + messageTracker.acceptFromSource(r2); + messageTracker.acceptFromSource(r3); + messageTracker.acceptFromSource(r3); + messageTracker.acceptFromSource(r3); + + final Map expected = new HashMap<>(); + expected.put(STREAM_1, r1Bytes); + expected.put(STREAM_2, r2Bytes * 2); + expected.put(STREAM_3, r3Bytes * 3); + + assertEquals(expected, messageTracker.getStreamToEmittedBytes()); + } + + @Test + public void testGetCommittedRecordsByStream() { + final AirbyteMessage r1 = createRecordMessage(STREAM_1, 1); + final AirbyteMessage r2 = createRecordMessage(STREAM_2, 2); + final AirbyteMessage r3 = createRecordMessage(STREAM_3, 3); + final AirbyteMessage s1 = createStateMessage(1); + final AirbyteMessage s2 = createStateMessage(2); + + messageTracker.acceptFromSource(r1); // should make stream 1 index 0 + messageTracker.acceptFromSource(r2); // should make stream 2 index 1 + messageTracker.acceptFromSource(r2); + messageTracker.acceptFromSource(s1); // emit state 1 + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(r2); + messageTracker.acceptFromDestination(s1); // commit state 1 + messageTracker.acceptFromSource(r3); // should make stream 3 index 2 + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(s2); // emit state 2 + + final Map countsByIndex = new HashMap<>(); + final Map expected = new HashMap<>(); + Mockito.when(mStateDeltaTracker.getStreamToCommittedRecords()).thenReturn(countsByIndex); + + countsByIndex.put((short) 0, 1L); + countsByIndex.put((short) 1, 2L); + // result only contains counts up to state 1 + expected.put(STREAM_1, 1L); + expected.put(STREAM_2, 2L); + assertEquals(expected, messageTracker.getStreamToCommittedRecords().get()); + + countsByIndex.clear(); + expected.clear(); + messageTracker.acceptFromDestination(s2); // now commit state 2 + countsByIndex.put((short) 0, 3L); + countsByIndex.put((short) 1, 3L); + countsByIndex.put((short) 2, 1L); + // result updated with counts between state 1 and state 2 + expected.put(STREAM_1, 3L); + expected.put(STREAM_2, 3L); + expected.put(STREAM_3, 1L); + assertEquals(expected, messageTracker.getStreamToCommittedRecords().get()); + } + + @Test + public void testGetCommittedRecordsByStream_emptyWhenAddStateThrowsException() throws Exception { + Mockito.doThrow(new StateDeltaTrackerException("induced exception")).when(mStateDeltaTracker).addState(Mockito.anyInt(), Mockito.anyMap()); + + final AirbyteMessage r1 = createRecordMessage(STREAM_1, 1); + final AirbyteMessage s1 = createStateMessage(1); + + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(s1); + messageTracker.acceptFromDestination(s1); + + assertTrue(messageTracker.getStreamToCommittedRecords().isEmpty()); + } + + @Test + public void testGetCommittedRecordsByStream_emptyWhenCommitStateHashThrowsException() throws Exception { + Mockito.doThrow(new StateDeltaTrackerException("induced exception")).when(mStateDeltaTracker).commitStateHash(Mockito.anyInt()); + + final AirbyteMessage r1 = createRecordMessage(STREAM_1, 1); + final AirbyteMessage s1 = createStateMessage(1); + + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(s1); + messageTracker.acceptFromDestination(s1); + + assertTrue(messageTracker.getStreamToCommittedRecords().isEmpty()); + } + + @Test + public void testTotalRecordsCommitted() { + final AirbyteMessage r1 = createRecordMessage(STREAM_1, 1); + final AirbyteMessage r2 = createRecordMessage(STREAM_2, 2); + final AirbyteMessage r3 = createRecordMessage(STREAM_3, 3); + final AirbyteMessage s1 = createStateMessage(1); + final AirbyteMessage s2 = createStateMessage(2); + + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(r2); + messageTracker.acceptFromSource(r2); + messageTracker.acceptFromSource(s1); // emit state 1 + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(r2); + messageTracker.acceptFromDestination(s1); // commit state 1 + messageTracker.acceptFromSource(r3); + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(s2); // emit state 2 + + final Map countsByIndex = new HashMap<>(); + Mockito.when(mStateDeltaTracker.getStreamToCommittedRecords()).thenReturn(countsByIndex); + + countsByIndex.put((short) 0, 1L); + countsByIndex.put((short) 1, 2L); + // result only contains counts up to state 1 + assertEquals(3L, messageTracker.getTotalRecordsCommitted().get()); + + countsByIndex.clear(); + messageTracker.acceptFromDestination(s2); // now commit state 2 + countsByIndex.put((short) 0, 3L); + countsByIndex.put((short) 1, 3L); + countsByIndex.put((short) 2, 1L); + // result updated with counts between state 1 and state 2 + assertEquals(7L, messageTracker.getTotalRecordsCommitted().get()); + } + + @Test + public void testGetTotalRecordsCommitted_emptyWhenAddStateThrowsException() throws Exception { + Mockito.doThrow(new StateDeltaTrackerException("induced exception")).when(mStateDeltaTracker).addState(Mockito.anyInt(), Mockito.anyMap()); + + final AirbyteMessage r1 = createRecordMessage(STREAM_1, 1); + final AirbyteMessage s1 = createStateMessage(1); + + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(s1); + messageTracker.acceptFromDestination(s1); + + assertTrue(messageTracker.getTotalRecordsCommitted().isEmpty()); + } + + @Test + public void testGetTotalRecordsCommitted_emptyWhenCommitStateHashThrowsException() throws Exception { + Mockito.doThrow(new StateDeltaTrackerException("induced exception")).when(mStateDeltaTracker).commitStateHash(Mockito.anyInt()); + + final AirbyteMessage r1 = createRecordMessage(STREAM_1, 1); + final AirbyteMessage s1 = createStateMessage(1); + + messageTracker.acceptFromSource(r1); + messageTracker.acceptFromSource(s1); + messageTracker.acceptFromDestination(s1); + + assertTrue(messageTracker.getTotalRecordsCommitted().isEmpty()); + } + + private AirbyteMessage createRecordMessage(final String streamName, final int recordData) { + return new AirbyteMessage() + .withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(streamName).withData(Jsons.jsonNode(recordData))); + } + + private AirbyteMessage createStateMessage(final int stateData) { + return new AirbyteMessage() + .withType(AirbyteMessage.Type.STATE) + .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(stateData))); } } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/StateDeltaTrackerTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/StateDeltaTrackerTest.java new file mode 100644 index 0000000000000..f7a50d038bc02 --- /dev/null +++ b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/StateDeltaTrackerTest.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.protocols.airbyte; + +import io.airbyte.workers.protocols.airbyte.StateDeltaTracker.StateDeltaTrackerException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class StateDeltaTrackerTest { + + private static final int STATE_1_HASH = 1; + private static final int STATE_2_HASH = 2; + private static final int STATE_3_HASH = Integer.MAX_VALUE; + private static final int NEVER_ADDED_STATE_HASH = 20; + + private static final short STREAM_INDEX_1 = (short) 111; + private static final short STREAM_INDEX_2 = (short) 222; + private static final short STREAM_INDEX_3 = (short) 333; + private static final short STREAM_INDEX_4 = Short.MAX_VALUE; + + private static final long STATE_1_STREAM_1_COUNT = 11L; + private static final long STATE_1_STREAM_2_COUNT = 12L; + + private static final long STATE_2_STREAM_1_COUNT = 21L; + private static final long STATE_2_STREAM_3_COUNT = 23L; + + private static final long STATE_3_STREAM_3_COUNT = 33L; + private static final long STATE_3_STREAM_4_COUNT = 34L; + + // enough capacity for above 3 states, which are each 24 bytes (8 byte hash + two 10 byte stream + // counts + private static final long INITIAL_DELTA_MEMORY_CAPACITY = 72L; + + private StateDeltaTracker stateDeltaTracker; + + @BeforeEach + public void setup() throws Exception { + final Map state1Counts = new HashMap<>(); + state1Counts.put(STREAM_INDEX_1, STATE_1_STREAM_1_COUNT); + state1Counts.put(STREAM_INDEX_2, STATE_1_STREAM_2_COUNT); + + final Map state2Counts = new HashMap<>(); + state2Counts.put(STREAM_INDEX_1, STATE_2_STREAM_1_COUNT); + state2Counts.put(STREAM_INDEX_3, STATE_2_STREAM_3_COUNT); + + final Map state3Counts = new HashMap<>(); + state3Counts.put(STREAM_INDEX_3, STATE_3_STREAM_3_COUNT); + state3Counts.put(STREAM_INDEX_4, STATE_3_STREAM_4_COUNT); + + stateDeltaTracker = new StateDeltaTracker(INITIAL_DELTA_MEMORY_CAPACITY); + stateDeltaTracker.addState(STATE_1_HASH, state1Counts); + stateDeltaTracker.addState(STATE_2_HASH, state2Counts); + stateDeltaTracker.addState(STATE_3_HASH, state3Counts); + } + + @Test + public void testAddState_throwsExceptionWhenCapacityExceeded() { + Assertions.assertThrows(StateDeltaTrackerException.class, () -> stateDeltaTracker.addState(4, Collections.singletonMap((short) 444, 44L))); + Assertions.assertTrue(stateDeltaTracker.capacityExceeded); + } + + @Test + public void testCommitStateHash_throwsExceptionWhenStateHashConflict() throws Exception { + stateDeltaTracker.commitStateHash(STATE_1_HASH); + stateDeltaTracker.commitStateHash(STATE_2_HASH); + + Assertions.assertThrows(StateDeltaTrackerException.class, () -> stateDeltaTracker.commitStateHash(STATE_1_HASH)); + } + + @Test + public void testCommitStateHash_throwsExceptionIfCapacityExceededEarlier() { + stateDeltaTracker.capacityExceeded = true; + Assertions.assertThrows(StateDeltaTrackerException.class, () -> stateDeltaTracker.commitStateHash(STATE_1_HASH)); + } + + @Test + public void testCommitStateHash_throwsExceptionIfCommitStateHashCalledBeforeAddingState() { + Assertions.assertThrows(StateDeltaTrackerException.class, () -> stateDeltaTracker.commitStateHash(NEVER_ADDED_STATE_HASH)); + } + + @Test + public void testGetCommittedRecordsByStream() throws Exception { + // before anything is committed, returned map should be empty and deltas should contain three states + final Map expected = new HashMap<>(); + Assertions.assertEquals(expected, stateDeltaTracker.getStreamToCommittedRecords()); + Assertions.assertEquals(3, stateDeltaTracker.stateDeltas.size()); + + stateDeltaTracker.commitStateHash(STATE_1_HASH); + expected.put(STREAM_INDEX_1, STATE_1_STREAM_1_COUNT); + expected.put(STREAM_INDEX_2, STATE_1_STREAM_2_COUNT); + Assertions.assertEquals(expected, stateDeltaTracker.getStreamToCommittedRecords()); + Assertions.assertEquals(2, stateDeltaTracker.stateDeltas.size()); + expected.clear(); + + stateDeltaTracker.commitStateHash(STATE_2_HASH); + expected.put(STREAM_INDEX_1, STATE_1_STREAM_1_COUNT + STATE_2_STREAM_1_COUNT); + expected.put(STREAM_INDEX_2, STATE_1_STREAM_2_COUNT); + expected.put(STREAM_INDEX_3, STATE_2_STREAM_3_COUNT); + Assertions.assertEquals(expected, stateDeltaTracker.getStreamToCommittedRecords()); + Assertions.assertEquals(1, stateDeltaTracker.stateDeltas.size()); + expected.clear(); + + stateDeltaTracker.commitStateHash(STATE_3_HASH); + expected.put(STREAM_INDEX_1, STATE_1_STREAM_1_COUNT + STATE_2_STREAM_1_COUNT); + expected.put(STREAM_INDEX_2, STATE_1_STREAM_2_COUNT); + expected.put(STREAM_INDEX_3, STATE_2_STREAM_3_COUNT + STATE_3_STREAM_3_COUNT); + expected.put(STREAM_INDEX_4, STATE_3_STREAM_4_COUNT); + Assertions.assertEquals(expected, stateDeltaTracker.getStreamToCommittedRecords()); + + // since all states are committed, capacity should be freed and the delta queue should be empty + Assertions.assertEquals(INITIAL_DELTA_MEMORY_CAPACITY, stateDeltaTracker.remainingCapacity); + Assertions.assertEquals(0, stateDeltaTracker.stateDeltas.size()); + } + +} From c2656de00951f4fc621fcc6492601f75d4f3af38 Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Tue, 11 Jan 2022 14:50:34 -0800 Subject: [PATCH 098/215] Improve migration performances (#9396) Caches the list of running workflow and avoid fetching this list for every sync being migrated. This also add a watch that logs the time taken by the migrations. --- .../workers/temporal/TemporalClient.java | 55 ++++++++++++++++++- .../workers/temporal/TemporalClientTest.java | 6 +- 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java index 542053fc8c4af..d6d116a4eec99 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java @@ -39,6 +39,7 @@ import io.temporal.serviceclient.WorkflowServiceStubs; import java.io.IOException; import java.nio.file.Path; +import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; @@ -47,6 +48,7 @@ import java.util.stream.Collectors; import lombok.Value; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.time.StopWatch; @Slf4j public class TemporalClient { @@ -152,12 +154,61 @@ public TemporalResponse submitSync(final long jobId, final i } public void migrateSyncIfNeeded(final Set connectionIds) { + final StopWatch globalMigrationWatch = new StopWatch(); + globalMigrationWatch.start(); + refreshRunningWorkflow(); + connectionIds.forEach((connectionId) -> { - if (!isWorkflowRunning(getConnectionManagerName(connectionId))) { + final StopWatch singleSyncMigrationWatch = new StopWatch(); + singleSyncMigrationWatch.start(); + if (!isInRunningWorkflowCache(getConnectionManagerName(connectionId))) { log.info("Migrating: " + connectionId); - submitConnectionUpdaterAsync(connectionId); + try { + submitConnectionUpdaterAsync(connectionId); + } catch (final Exception e) { + log.error("New workflow submission failed, retrying", e); + refreshRunningWorkflow(); + submitConnectionUpdaterAsync(connectionId); + } } + singleSyncMigrationWatch.stop(); + log.info("Sync migration took: " + singleSyncMigrationWatch.formatTime()); }); + globalMigrationWatch.stop(); + + log.info("The migration to the new scheduler took: " + globalMigrationWatch.formatTime()); + } + + private final Set workflowNames = new HashSet<>(); + + boolean isInRunningWorkflowCache(final String workflowName) { + return workflowNames.contains(workflowName); + } + + @VisibleForTesting + void refreshRunningWorkflow() { + workflowNames.clear(); + ByteString token; + ListOpenWorkflowExecutionsRequest openWorkflowExecutionsRequest = + ListOpenWorkflowExecutionsRequest.newBuilder() + .setNamespace(client.getOptions().getNamespace()) + .build(); + do { + final ListOpenWorkflowExecutionsResponse listOpenWorkflowExecutionsRequest = + service.blockingStub().listOpenWorkflowExecutions(openWorkflowExecutionsRequest); + final Set workflowExecutionInfos = listOpenWorkflowExecutionsRequest.getExecutionsList().stream() + .map((workflowExecutionInfo -> workflowExecutionInfo.getExecution().getWorkflowId())) + .collect(Collectors.toSet()); + workflowNames.addAll(workflowExecutionInfos); + token = listOpenWorkflowExecutionsRequest.getNextPageToken(); + + openWorkflowExecutionsRequest = + ListOpenWorkflowExecutionsRequest.newBuilder() + .setNamespace(client.getOptions().getNamespace()) + .setNextPageToken(token) + .build(); + + } while (token != null && token.size() > 0); } public void submitConnectionUpdaterAsync(final UUID connectionId) { diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalClientTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalClientTest.java index 0a412dcf49775..7bfd851e3f66c 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalClientTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalClientTest.java @@ -209,10 +209,12 @@ public void migrateCalled() { final UUID migratedId = UUID.randomUUID(); doReturn(false) - .when(temporalClient).isWorkflowRunning(TemporalClient.getConnectionManagerName(nonMigratedId)); + .when(temporalClient).isInRunningWorkflowCache(TemporalClient.getConnectionManagerName(nonMigratedId)); doReturn(true) - .when(temporalClient).isWorkflowRunning(TemporalClient.getConnectionManagerName(migratedId)); + .when(temporalClient).isInRunningWorkflowCache(TemporalClient.getConnectionManagerName(migratedId)); + doNothing() + .when(temporalClient).refreshRunningWorkflow(); doNothing() .when(temporalClient).submitConnectionUpdaterAsync(nonMigratedId); From 5643c1747152defb81d760911884f36618617abd Mon Sep 17 00:00:00 2001 From: Octavia Squidington III <90398440+octavia-squidington-iii@users.noreply.github.com> Date: Wed, 12 Jan 2022 06:57:24 +0800 Subject: [PATCH 099/215] Bump Airbyte version from 0.35.4-alpha to 0.35.5-alpha (#9421) Co-authored-by: pmossman --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-bootloader/Dockerfile | 4 ++-- airbyte-container-orchestrator/Dockerfile | 6 +++--- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 4 ++-- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 10 +++++----- charts/airbyte/values.yaml | 10 +++++----- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 12 ++++++------ kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 12 ++++++------ 17 files changed, 42 insertions(+), 42 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 6140765cd1819..1b8fb7e71e393 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.35.4-alpha +current_version = 0.35.5-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index a538aedd6eedc..b21d58434f634 100644 --- a/.env +++ b/.env @@ -10,7 +10,7 @@ ### SHARED ### -VERSION=0.35.4-alpha +VERSION=0.35.5-alpha # When using the airbyte-db via default docker image CONFIG_ROOT=/data diff --git a/airbyte-bootloader/Dockerfile b/airbyte-bootloader/Dockerfile index 05c9912913f43..cc06fe86c6683 100644 --- a/airbyte-bootloader/Dockerfile +++ b/airbyte-bootloader/Dockerfile @@ -5,6 +5,6 @@ ENV APPLICATION airbyte-bootloader WORKDIR /app -ADD bin/${APPLICATION}-0.35.4-alpha.tar /app +ADD bin/${APPLICATION}-0.35.5-alpha.tar /app -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}"] diff --git a/airbyte-container-orchestrator/Dockerfile b/airbyte-container-orchestrator/Dockerfile index 434cf1cc8e2ca..1fe6b714a4fb6 100644 --- a/airbyte-container-orchestrator/Dockerfile +++ b/airbyte-container-orchestrator/Dockerfile @@ -19,12 +19,12 @@ RUN add-apt-repository \ RUN apt-get update && apt-get install -y docker-ce-cli jq ENV APPLICATION airbyte-container-orchestrator -ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}" +ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}" WORKDIR /app # Move orchestrator app -ADD bin/${APPLICATION}-0.35.4-alpha.tar /app +ADD bin/${APPLICATION}-0.35.5-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index b5bc7078bee92..92eb38ba59023 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD bin/${APPLICATION}-0.35.4-alpha.tar /app +ADD bin/${APPLICATION}-0.35.5-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index d68bdbccb7298..a5b4e3ecb0127 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD bin/${APPLICATION}-0.35.4-alpha.tar /app +ADD bin/${APPLICATION}-0.35.5-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 5c8026758dec6..888bf1f20d1f8 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,12 +1,12 @@ { "name": "airbyte-webapp", - "version": "0.35.4-alpha", + "version": "0.35.5-alpha", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "airbyte-webapp", - "version": "0.35.4-alpha", + "version": "0.35.5-alpha", "dependencies": { "@fortawesome/fontawesome-svg-core": "^1.2.36", "@fortawesome/free-brands-svg-icons": "^5.15.4", diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 1ad7582e5899c..f71dd4be04ee4 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.35.4-alpha", + "version": "0.35.5-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 21a62fc319937..abc336fec7ebb 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -30,7 +30,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD bin/${APPLICATION}-0.35.4-alpha.tar /app +ADD bin/${APPLICATION}-0.35.5-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.4-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index ac6ab7f7bd7cd..d031e09ad5854 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.35.4-alpha" +appVersion: "0.35.5-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index de420001da11d..ba04b7cb037d1 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.4-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.5-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -55,7 +55,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.4-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.5-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -86,7 +86,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.4-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.5-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -120,7 +120,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.4-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.5-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -148,7 +148,7 @@ | ----------------------------- | -------------------------------------------------------------------- | -------------------- | | `bootloader.image.repository` | The repository to use for the airbyte bootloader image. | `airbyte/bootloader` | | `bootloader.image.pullPolicy` | the pull policy to use for the airbyte bootloader image | `IfNotPresent` | -| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.4-alpha` | +| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.5-alpha` | ### Temporal parameters diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 617c953f31cef..470c65bd08668 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -43,7 +43,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.35.4-alpha + tag: 0.35.5-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -140,7 +140,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.35.4-alpha + tag: 0.35.5-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -245,7 +245,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.35.4-alpha + tag: 0.35.5-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -357,7 +357,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.35.4-alpha + tag: 0.35.5-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## @@ -446,7 +446,7 @@ bootloader: image: repository: airbyte/bootloader pullPolicy: IfNotPresent - tag: 0.35.4-alpha + tag: 0.35.5-alpha ## @section Temporal parameters ## TODO: Move to consuming temporal from a dedicated helm chart diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 52d978da25808..5b640eb3c218e 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -101,7 +101,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.35.4-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.35.5-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index 561e0493afc98..b5cc97f7e2584 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.4-alpha +AIRBYTE_VERSION=0.35.5-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 481a7bd157321..b466956a7624b 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: airbyte/bootloader - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: airbyte/scheduler - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: airbyte/server - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: airbyte/webapp - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: airbyte/worker - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index 561e0493afc98..b5cc97f7e2584 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.4-alpha +AIRBYTE_VERSION=0.35.5-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index c22629b231506..32de4f039f260 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: airbyte/bootloader - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: airbyte/scheduler - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: airbyte/server - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: airbyte/webapp - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: airbyte/worker - newTag: 0.35.4-alpha + newTag: 0.35.5-alpha - name: temporalio/auto-setup newTag: 1.7.0 From 6ba51462fa2cc476a05d2ff721a3fdf3e645c118 Mon Sep 17 00:00:00 2001 From: Juozas V Date: Wed, 12 Jan 2022 01:55:33 +0200 Subject: [PATCH 100/215] Source Amazon Seller Partner: add all remaining brand analytics report streams (#9312) * Add all brand analytic streams * Parametrize results key * Update schemas * Parametrize result_key * Add missing result_key * Remove duplicate * Update configured catalogs * Cleanup * Add configured catalogs for brand analytics * Run gradlew format * Update documentation --- .../acceptance-test-config.yml | 10 ++++ .../integration_tests/configured_catalog.json | 9 ---- ...og_brand_analytics_alternate_purchase.json | 40 ++++++++++++++ ...talog_brand_analytics_item_comparison.json | 40 ++++++++++++++ ...catalog_brand_analytics_market_basket.json | 40 ++++++++++++++ ...talog_brand_analytics_repeat_purchase.json | 54 +++++++++++++++++++ ..._catalog_brand_analytics_search_terms.json | 41 ++++++++++++++ ...D_ANALYTICS_ALTERNATE_PURCHASE_REPORT.json | 28 ++++++++++ ...RAND_ANALYTICS_ITEM_COMPARISON_REPORT.json | 28 ++++++++++ ..._BRAND_ANALYTICS_MARKET_BASKET_REPORT.json | 28 ++++++++++ ...RAND_ANALYTICS_REPEAT_PURCHASE_REPORT.json | 42 +++++++++++++++ .../source_amazon_seller_partner/source.py | 8 +++ .../source_amazon_seller_partner/streams.py | 46 +++++++++++----- .../sources/amazon-seller-partner.md | 5 ++ 14 files changed, 398 insertions(+), 21 deletions(-) create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_alternate_purchase.json create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_item_comparison.json create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_market_basket.json create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_repeat_purchase.json create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_search_terms.json create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT.json create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT.json create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT.json create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT.json diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml b/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml index 0c9ac604800a3..6d9ae3dfbc248 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml @@ -28,6 +28,16 @@ tests: # "GET_VENDOR_INVENTORY_HEALTH_AND_PLANNING_REPORT", # "VendorDirectFulfillmentShipping", # ] +# - config_path: "secrets/config.json" +# configured_catalog_path: "integration_tests/configured_catalog_brand_analytics_alternate_purchase.json" +# - config_path: "secrets/config.json" +# configured_catalog_path: "integration_tests/configured_catalog_brand_analytics_item_comparison.json" +# - config_path: "secrets/config.json" +# configured_catalog_path: "integration_tests/configured_catalog_brand_analytics_market_basket.json" +# - config_path: "secrets/config.json" +# configured_catalog_path: "integration_tests/configured_catalog_brand_analytics_repeat_purchase.json.json" +# - config_path: "secrets/config.json" +# configured_catalog_path: "integration_tests/configured_catalog_brand_analytics_search_terms.json" # TODO: uncomment when Orders (or any other incremental) stream is filled with data # incremental: # - config_path: "secrets/config.json" diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog.json index 0387865e646d1..5c897b44e0b2d 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog.json @@ -104,15 +104,6 @@ "sync_mode": "incremental", "destination_sync_mode": "append", "cursor_field": ["Date"] - }, - { - "stream": { - "name": "GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" } ] } diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_alternate_purchase.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_alternate_purchase.json new file mode 100644 index 0000000000000..2ce8fbb81064a --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_alternate_purchase.json @@ -0,0 +1,40 @@ +{ + "streams": [ + { + "stream": { + "name": "GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT", + "json_schema": { + "title": "Brand Analytics Alternate Purchase Reports", + "description": "Brand Analytics Alternate Purchase Reports", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "startDate": { + "type": ["null", "string"], + "format": "date" + }, + "endDate": { + "type": ["null", "string"], + "format": "date" + }, + "asin": { + "type": ["null", "string"] + }, + "purchasedAsin": { + "type": ["null", "string"] + }, + "purchasedRank": { + "type": ["null", "integer"] + }, + "purchasedPct": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_item_comparison.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_item_comparison.json new file mode 100644 index 0000000000000..4d7300e63157f --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_item_comparison.json @@ -0,0 +1,40 @@ +{ + "streams": [ + { + "stream": { + "name": "GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT", + "json_schema": { + "title": "Brand Analytics Item Comparison Reports", + "description": "Brand Analytics Item Comparison Reports", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "startDate": { + "type": ["null", "string"], + "format": "date" + }, + "endDate": { + "type": ["null", "string"], + "format": "date" + }, + "asin": { + "type": ["null", "string"] + }, + "comparedAsin": { + "type": ["null", "string"] + }, + "comparedRank": { + "type": ["null", "integer"] + }, + "comparedPct": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_market_basket.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_market_basket.json new file mode 100644 index 0000000000000..87a75b8aaecee --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_market_basket.json @@ -0,0 +1,40 @@ +{ + "streams": [ + { + "stream": { + "name": "GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT", + "json_schema": { + "title": "Brand Analytics Market Basket Reports", + "description": "Brand Analytics Market Basket Reports", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "startDate": { + "type": ["null", "string"], + "format": "date" + }, + "endDate": { + "type": ["null", "string"], + "format": "date" + }, + "asin": { + "type": ["null", "string"] + }, + "purchasedWithAsin": { + "type": ["null", "string"] + }, + "purchasedWithRank": { + "type": ["null", "integer"] + }, + "combinationPct": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_repeat_purchase.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_repeat_purchase.json new file mode 100644 index 0000000000000..656b89dfd3100 --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_repeat_purchase.json @@ -0,0 +1,54 @@ +{ + "streams": [ + { + "stream": { + "name": "GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT", + "json_schema": { + "title": "Brand Analytics Repeat Purchase Reports", + "description": "Brand Analytics Repeat Purchase Reports", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "startDate": { + "type": ["null", "string"], + "format": "date" + }, + "endDate": { + "type": ["null", "string"], + "format": "date" + }, + "asin": { + "type": ["null", "string"] + }, + "orders": { + "type": ["null", "integer"] + }, + "uniqueCustomers": { + "type": ["null", "integer"] + }, + "repeatCustomersPctTotal": { + "type": ["null", "number"] + }, + "repeatPurchaseRevenue": { + "type": "object", + "properties": { + "amount": { + "type": ["null", "number"] + }, + "currencyCode": { + "type": ["null", "string"] + } + } + }, + "repeatPurchaseRevenuePctTotal": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_search_terms.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_search_terms.json new file mode 100644 index 0000000000000..d841e81f9a4ba --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_search_terms.json @@ -0,0 +1,41 @@ +{ + "streams": [ + { + "stream": { + "name": "GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT", + "json_schema": { + "title": "Brand Analytics Search Terms Reports", + "description": "Brand Analytics Search Terms Reports", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "departmentName": { + "type": ["null", "string"] + }, + "searchTerm": { + "type": ["null", "string"] + }, + "searchFrequencyRank": { + "type": ["null", "number"] + }, + "clickedAsin": { + "type": ["null", "string"] + }, + "clickShareRank": { + "type": ["null", "number"] + }, + "clickShare": { + "type": ["null", "number"] + }, + "conversionShare": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT.json new file mode 100644 index 0000000000000..84b78bc293858 --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT.json @@ -0,0 +1,28 @@ +{ + "title": "Brand Analytics Alternate Purchase Reports", + "description": "Brand Analytics Alternate Purchase Reports", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "startDate": { + "type": ["null", "string"], + "format": "date" + }, + "endDate": { + "type": ["null", "string"], + "format": "date" + }, + "asin": { + "type": ["null", "string"] + }, + "purchasedAsin": { + "type": ["null", "string"] + }, + "purchasedRank": { + "type": ["null", "integer"] + }, + "purchasedPct": { + "type": ["null", "number"] + } + } +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT.json new file mode 100644 index 0000000000000..cae42a1150bb3 --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT.json @@ -0,0 +1,28 @@ +{ + "title": "Brand Analytics Item Comparison Reports", + "description": "Brand Analytics Item Comparison Reports", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "startDate": { + "type": ["null", "string"], + "format": "date" + }, + "endDate": { + "type": ["null", "string"], + "format": "date" + }, + "asin": { + "type": ["null", "string"] + }, + "comparedAsin": { + "type": ["null", "string"] + }, + "comparedRank": { + "type": ["null", "integer"] + }, + "comparedPct": { + "type": ["null", "number"] + } + } +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT.json new file mode 100644 index 0000000000000..88473f308b034 --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT.json @@ -0,0 +1,28 @@ +{ + "title": "Brand Analytics Market Basket Reports", + "description": "Brand Analytics Market Basket Reports", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "startDate": { + "type": ["null", "string"], + "format": "date" + }, + "endDate": { + "type": ["null", "string"], + "format": "date" + }, + "asin": { + "type": ["null", "string"] + }, + "purchasedWithAsin": { + "type": ["null", "string"] + }, + "purchasedWithRank": { + "type": ["null", "integer"] + }, + "combinationPct": { + "type": ["null", "number"] + } + } +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT.json new file mode 100644 index 0000000000000..46da2d4f03077 --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT.json @@ -0,0 +1,42 @@ +{ + "title": "Brand Analytics Repeat Purchase Reports", + "description": "Brand Analytics Repeat Purchase Reports", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "startDate": { + "type": ["null", "string"], + "format": "date" + }, + "endDate": { + "type": ["null", "string"], + "format": "date" + }, + "asin": { + "type": ["null", "string"] + }, + "orders": { + "type": ["null", "integer"] + }, + "uniqueCustomers": { + "type": ["null", "integer"] + }, + "repeatCustomersPctTotal": { + "type": ["null", "number"] + }, + "repeatPurchaseRevenue": { + "type": "object", + "properties": { + "amount": { + "type": ["null", "number"] + }, + "currencyCode": { + "type": ["null", "string"] + } + } + }, + "repeatPurchaseRevenuePctTotal": { + "type": ["null", "number"] + } + } +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py index a036001f05f36..e46d83e575f3f 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py @@ -15,6 +15,10 @@ from source_amazon_seller_partner.auth import AWSAuthenticator, AWSSignature from source_amazon_seller_partner.constants import AWSEnvironment, AWSRegion, get_marketplaces from source_amazon_seller_partner.streams import ( + BrandAnalyticsAlternatePurchaseReports, + BrandAnalyticsItemComparisonReports, + BrandAnalyticsMarketBasketReports, + BrandAnalyticsRepeatPurchaseReports, BrandAnalyticsSearchTermsReports, FbaInventoryReports, FbaOrdersReports, @@ -150,7 +154,11 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: VendorInventoryHealthReports(**stream_kwargs), Orders(**stream_kwargs), SellerFeedbackReports(**stream_kwargs), + BrandAnalyticsMarketBasketReports(**stream_kwargs), BrandAnalyticsSearchTermsReports(**stream_kwargs), + BrandAnalyticsRepeatPurchaseReports(**stream_kwargs), + BrandAnalyticsAlternatePurchaseReports(**stream_kwargs), + BrandAnalyticsItemComparisonReports(**stream_kwargs), ] def spec(self, *args, **kwargs) -> ConnectorSpecification: diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py index 2d8c880cf3e3d..f26dca2d8d192 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py @@ -142,6 +142,7 @@ class ReportsAmazonSPStream(Stream, ABC): path_prefix = f"reports/{REPORTS_API_VERSION}" sleep_seconds = 30 data_field = "payload" + result_key = None def __init__( self, @@ -288,8 +289,7 @@ def parse_response(self, response: requests.Response) -> Iterable[Mapping]: document_records = self.parse_document(document) yield from document_records - @staticmethod - def parse_document(document): + def parse_document(self, document): return csv.DictReader(StringIO(document), delimiter="\t") def report_options(self) -> Mapping[str, Any]: @@ -388,17 +388,10 @@ class VendorInventoryHealthReports(ReportsAmazonSPStream): name = "GET_VENDOR_INVENTORY_HEALTH_AND_PLANNING_REPORT" -class BrandAnalyticsSearchTermsReports(ReportsAmazonSPStream): - """ - Field definitions: https://sellercentral.amazon.co.uk/help/hub/reference/G5NXWNY8HUD3VDCW - """ - - name = "GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT" - - @staticmethod - def parse_document(document): +class BrandAnalyticsStream(ReportsAmazonSPStream): + def parse_document(self, document): parsed = json_lib.loads(document) - return parsed.get("dataByDepartmentAndSearchTerm", {}) + return parsed.get(self.result_key, []) def _report_data( self, @@ -452,6 +445,35 @@ def _augmented_data(report_options) -> Mapping[str, Any]: } +class BrandAnalyticsMarketBasketReports(BrandAnalyticsStream): + name = "GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT" + result_key = "dataByAsin" + + +class BrandAnalyticsSearchTermsReports(BrandAnalyticsStream): + """ + Field definitions: https://sellercentral.amazon.co.uk/help/hub/reference/G5NXWNY8HUD3VDCW + """ + + name = "GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT" + result_key = "dataByDepartmentAndSearchTerm" + + +class BrandAnalyticsRepeatPurchaseReports(BrandAnalyticsStream): + name = "GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT" + result_key = "dataByAsin" + + +class BrandAnalyticsAlternatePurchaseReports(BrandAnalyticsStream): + name = "GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT" + result_key = "dataByAsin" + + +class BrandAnalyticsItemComparisonReports(BrandAnalyticsStream): + name = "GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT" + result_key = "dataByAsin" + + class IncrementalReportsAmazonSPStream(ReportsAmazonSPStream): @property @abstractmethod diff --git a/docs/integrations/sources/amazon-seller-partner.md b/docs/integrations/sources/amazon-seller-partner.md index e3b9c272fc332..71d336fc1a1f9 100644 --- a/docs/integrations/sources/amazon-seller-partner.md +++ b/docs/integrations/sources/amazon-seller-partner.md @@ -24,6 +24,10 @@ This source is capable of syncing the following streams: - [Orders](https://github.com/amzn/selling-partner-api-docs/blob/main/references/orders-api/ordersV0.md) \(incremental\) - [VendorDirectFulfillmentShipping](https://github.com/amzn/selling-partner-api-docs/blob/main/references/vendor-direct-fulfillment-shipping-api/vendorDirectFulfillmentShippingV1.md) - [Seller Feedback Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#performance-reports) +- [Brand Analytics Alternate Purchase Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#brand-analytics-reports) +- [Brand Analytics Item Comparison Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#brand-analytics-reports) +- [Brand Analytics Market Basket Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#brand-analytics-reports) +- [Brand Analytics Repeat Purchase Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#brand-analytics-reports) - [Brand Analytics Search Terms Report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#brand-analytics-reports) ## Getting started @@ -63,6 +67,7 @@ Information about rate limits you may find [here](https://github.com/amzn/sellin | Version | Date | Pull Request | Subject | | :------- | :--------- | :------------------------------------------------------- | :--------------------------------------------------------------------- | +| `0.2.12` | 2022-01-05 | [\#9312](https://github.com/airbytehq/airbyte/pull/9312) | Add all remaining brand analytics report streams | `0.2.11` | 2022-01-05 | [\#9115](https://github.com/airbytehq/airbyte/pull/9115) | Fix reading only 100 orders | | `0.2.10` | 2021-12-31 | [\#9236](https://github.com/airbytehq/airbyte/pull/9236) | Fix NoAuth deprecation warning | | `0.2.9` | 2021-12-30 | [\#9212](https://github.com/airbytehq/airbyte/pull/9212) | Normalize GET_SELLER_FEEDBACK_DATA header field names | From 826f55d51962f51a2a1c4989c354da717cb8f7a3 Mon Sep 17 00:00:00 2001 From: Harshith Mullapudi Date: Wed, 12 Jan 2022 08:17:32 +0530 Subject: [PATCH 101/215] Publish source amazon seller partner: added new reports (#9424) * chore: change version number in Dockerfile * chore: update spec file --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-amazon-seller-partner/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index e834552141572..6f853b92ecad3 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -22,7 +22,7 @@ - name: Amazon Seller Partner sourceDefinitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460 dockerRepository: airbyte/source-amazon-seller-partner - dockerImageTag: 0.2.11 + dockerImageTag: 0.2.12 sourceType: api documentationUrl: https://docs.airbyte.io/integrations/sources/amazon-seller-partner icon: amazonsellerpartner.svg diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 78f81c1aed04f..374be877a4b75 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -157,7 +157,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-amazon-seller-partner:0.2.11" +- dockerImage: "airbyte/source-amazon-seller-partner:0.2.12" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" changelogUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile index 485b51f7050cc..38c7033939b84 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.11 +LABEL io.airbyte.version=0.2.12 LABEL io.airbyte.name=airbyte/source-amazon-seller-partner From 5f6785dcbf13e2aa15fa2d6f18f926c2fff6eccf Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Tue, 11 Jan 2022 23:12:37 -0800 Subject: [PATCH 102/215] =?UTF-8?q?=F0=9F=90=9E=20Destination=20S3=20&=20G?= =?UTF-8?q?CS=20Avro:=20support=20array=20with=20unknown=20item=20type=20(?= =?UTF-8?q?#9367)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Support array field with empty items specification * Remove all exceptions * Format code * Bump connector versions * Bump bigquery versions * Update docs * Remove unused code * Update doc for PR #9363 * Update doc about defaulting all improperly typed fields to string * Ignore bigquery * Update version and doc * Update doc * Bump version in seed --- .../seed/destination_definitions.yaml | 4 +- .../resources/seed/destination_specs.yaml | 6 +-- .../connectors/destination-gcs/Dockerfile | 2 +- .../connectors/destination-s3/Dockerfile | 2 +- .../s3/avro/JsonToAvroSchemaConverter.java | 37 +++++++++----- .../json_conversion_test_cases.json | 50 +++++++++++++++++++ .../type_conversion_test_cases.json | 21 +++++++- docs/integrations/destinations/gcs.md | 1 + docs/integrations/destinations/s3.md | 1 + .../json-avro-conversion.md | 8 ++- 10 files changed, 108 insertions(+), 24 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 7f0fbadec12d4..bfcd84549460e 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -60,7 +60,7 @@ - name: Google Cloud Storage (GCS) destinationDefinitionId: ca8f6566-e555-4b40-943a-545bf123117a dockerRepository: airbyte/destination-gcs - dockerImageTag: 0.1.19 + dockerImageTag: 0.1.20 documentationUrl: https://docs.airbyte.io/integrations/destinations/gcs icon: googlecloudstorage.svg - name: Google PubSub @@ -167,7 +167,7 @@ - name: S3 destinationDefinitionId: 4816b78f-1489-44c1-9060-4b19d5fa9362 dockerRepository: airbyte/destination-s3 - dockerImageTag: 0.2.2 + dockerImageTag: 0.2.3 documentationUrl: https://docs.airbyte.io/integrations/destinations/s3 icon: s3.svg - name: SFTP-JSON diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 535ea3c6abbfd..eaf2558800231 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -1143,7 +1143,7 @@ - "overwrite" - "append" supportsNamespaces: true -- dockerImage: "airbyte/destination-gcs:0.1.19" +- dockerImage: "airbyte/destination-gcs:0.1.20" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/gcs" connectionSpecification: @@ -3396,7 +3396,7 @@ supported_destination_sync_modes: - "append" - "overwrite" -- dockerImage: "airbyte/destination-s3:0.2.2" +- dockerImage: "airbyte/destination-s3:0.2.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/s3" connectionSpecification: @@ -3594,8 +3594,6 @@ \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." type: "integer" default: 5 - minimum: 5 - maximum: 525 examples: - 5 - title: "CSV: Comma-Separated Values" diff --git a/airbyte-integrations/connectors/destination-gcs/Dockerfile b/airbyte-integrations/connectors/destination-gcs/Dockerfile index 793b2e0cead17..90be81f532ed4 100644 --- a/airbyte-integrations/connectors/destination-gcs/Dockerfile +++ b/airbyte-integrations/connectors/destination-gcs/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-gcs COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.19 +LABEL io.airbyte.version=0.1.20 LABEL io.airbyte.name=airbyte/destination-gcs diff --git a/airbyte-integrations/connectors/destination-s3/Dockerfile b/airbyte-integrations/connectors/destination-s3/Dockerfile index 27ab7eebf8b2e..74be9eb7dc0e5 100644 --- a/airbyte-integrations/connectors/destination-s3/Dockerfile +++ b/airbyte-integrations/connectors/destination-s3/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-s3 COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.2.2 +LABEL io.airbyte.version=0.2.3 LABEL io.airbyte.name=airbyte/destination-s3 diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java index 7b6b4e7220b41..555e4a0bb7c15 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java @@ -51,6 +51,9 @@ static List getNonNullTypes(final String fieldName, final JsonNo .filter(type -> type != JsonSchemaType.NULL).collect(Collectors.toList()); } + /** + * When no type is specified, it will default to string. + */ static List getTypes(final String fieldName, final JsonNode fieldDefinition) { final Optional combinedRestriction = getCombinedRestriction(fieldDefinition); if (combinedRestriction.isPresent()) { @@ -59,7 +62,8 @@ static List getTypes(final String fieldName, final JsonNode fiel final JsonNode typeProperty = fieldDefinition.get("type"); if (typeProperty == null || typeProperty.isNull()) { - throw new IllegalStateException(String.format("Field %s has no type", fieldName)); + LOGGER.warn("Field \"{}\" has no type specification. It will default to string", fieldName); + return Collections.singletonList(JsonSchemaType.STRING); } if (typeProperty.isArray()) { @@ -72,7 +76,8 @@ static List getTypes(final String fieldName, final JsonNode fiel return Collections.singletonList(JsonSchemaType.fromJsonSchemaType(typeProperty.asText())); } - throw new IllegalStateException("Unexpected type: " + typeProperty); + LOGGER.warn("Field \"{}\" has unexpected type {}. It will default to string.", fieldName, typeProperty); + return Collections.singletonList(JsonSchemaType.STRING); } static Optional getCombinedRestriction(final JsonNode fieldDefinition) { @@ -120,7 +125,7 @@ public Schema getAvroSchema(final JsonNode jsonSchema, final SchemaBuilder.RecordBuilder builder = SchemaBuilder.record(stdName); if (!stdName.equals(fieldName)) { standardizedNames.put(fieldName, stdName); - LOGGER.warn("Schema name contains illegal character(s) and is standardized: {} -> {}", fieldName, + LOGGER.warn("Schema name \"{}\" contains illegal character(s) and is standardized to \"{}\"", fieldName, stdName); builder.doc( String.format("%s%s%s", @@ -159,7 +164,7 @@ public Schema getAvroSchema(final JsonNode jsonSchema, final SchemaBuilder.FieldBuilder fieldBuilder = assembler.name(stdFieldName); if (!stdFieldName.equals(subfieldName)) { standardizedNames.put(subfieldName, stdFieldName); - LOGGER.warn("Field name contains illegal character(s) and is standardized: {} -> {}", + LOGGER.warn("Field name \"{}\" contains illegal character(s) and is standardized to \"{}\"", subfieldName, stdFieldName); fieldBuilder.doc(String.format("%s%s%s", AvroConstants.DOC_KEY_ORIGINAL_NAME, @@ -231,26 +236,33 @@ Schema parseSingleType(final String fieldName, case ARRAY -> { final JsonNode items = fieldDefinition.get("items"); if (items == null) { - LOGGER.warn("Array field {} does not specify the items type. It will be assumed to be an array of strings", fieldName); + LOGGER.warn("Array field \"{}\" does not specify the items type. It will default to an array of strings", fieldName); fieldSchema = Schema.createArray(Schema.createUnion(NULL_SCHEMA, STRING_SCHEMA)); } else if (items.isObject()) { - fieldSchema = - Schema.createArray( - parseJsonField(String.format("%s.items", fieldName), fieldNamespace, items, appendExtraProps, addStringToLogicalTypes)); + if (!items.has("type") || items.get("type").isNull()) { + LOGGER.warn("Array field \"{}\" does not specify the items type. it will default to an array of strings", fieldName); + fieldSchema = Schema.createArray(Schema.createUnion(NULL_SCHEMA, STRING_SCHEMA)); + } else { + // Objects inside Json array has no names. We name it with the ".items" suffix. + final String elementFieldName = fieldName + ".items"; + fieldSchema = Schema.createArray(parseJsonField(elementFieldName, fieldNamespace, items, appendExtraProps, addStringToLogicalTypes)); + } } else if (items.isArray()) { final List arrayElementTypes = parseJsonTypeUnion(fieldName, fieldNamespace, (ArrayNode) items, appendExtraProps, addStringToLogicalTypes); arrayElementTypes.add(0, NULL_SCHEMA); fieldSchema = Schema.createArray(Schema.createUnion(arrayElementTypes)); } else { - throw new IllegalStateException( - String.format("Array field %s has invalid items property: %s", fieldName, items)); + LOGGER.warn("Array field \"{}\" has invalid items specification: {}. It will default to an array of strings.", fieldName, items); + fieldSchema = Schema.createArray(Schema.createUnion(NULL_SCHEMA, STRING_SCHEMA)); } } case OBJECT -> fieldSchema = getAvroSchema(fieldDefinition, fieldName, fieldNamespace, false, appendExtraProps, addStringToLogicalTypes, false); - default -> throw new IllegalStateException( - String.format("Unexpected type for field %s: %s", fieldName, fieldType)); + default -> { + LOGGER.warn("Field \"{}\" has invalid type definition: {}. It will default to string.", fieldName, fieldDefinition); + fieldSchema = Schema.createUnion(NULL_SCHEMA, STRING_SCHEMA); + } } return fieldSchema; } @@ -267,7 +279,6 @@ List parseJsonTypeUnion(final String fieldName, final ArrayNode types, final boolean appendExtraProps, final boolean addStringToLogicalTypes) { - final List typeList = MoreIterators.toList(types.elements()); final List schemas = MoreIterators.toList(types.elements()) .stream() .flatMap(definition -> getNonNullTypes(fieldName, definition).stream().flatMap(type -> { diff --git a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json index 92e989ec1ab48..1a490a2835c28 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json +++ b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json @@ -1512,5 +1512,55 @@ ], "_airbyte_additional_properties": null } + }, + { + "schemaName": "array_field_with_empty_items", + "namespace": "namespace20", + "appendAirbyteFields": false, + "jsonSchema": { + "type": "object", + "properties": { + "array_field": { + "type": "array", + "items": {} + } + } + }, + "jsonObject": { + "array_field": [1234, true, "false", 0.001] + }, + "avroSchema": { + "type": "record", + "name": "array_field_with_empty_items", + "namespace": "namespace20", + "fields": [ + { + "name": "array_field", + "type": [ + "null", + { + "type": "array", + "items": ["null", "string"] + } + ], + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": [ + "null", + { + "type": "map", + "values": "string" + } + ], + "default": null + } + ] + }, + "avroObject": { + "array_field": ["1234", "true", "false", "0.001"], + "_airbyte_additional_properties": null + } } ] diff --git a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json index 39af73a95cf26..fabee9775aa85 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json +++ b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json @@ -201,7 +201,7 @@ ] }, { - "fieldName": "array_field_without_items_type", + "fieldName": "array_field_without_items", "jsonFieldSchema": { "type": "array" }, @@ -212,5 +212,24 @@ "items": ["null", "string"] } ] + }, + { + "fieldName": "array_field_with_empty_items", + "jsonFieldSchema": { + "type": "array", + "items": {} + }, + "avroFieldType": [ + "null", + { + "type": "array", + "items": ["null", "string"] + } + ] + }, + { + "fieldName": "field_without_type", + "jsonFieldSchema": {}, + "avroFieldType": ["null", "string"] } ] diff --git a/docs/integrations/destinations/gcs.md b/docs/integrations/destinations/gcs.md index 2bb45eee300a1..eb7dbf37d5878 100644 --- a/docs/integrations/destinations/gcs.md +++ b/docs/integrations/destinations/gcs.md @@ -229,6 +229,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.20 | 2022-01-11 | [\#9367](https://github.com/airbytehq/airbyte/pull/9367) | Avro & Parquet: support array field with unknown item type; default any improperly typed field to string. | | 0.1.19 | 2022-01-10 | [\#9121](https://github.com/airbytehq/airbyte/pull/9121) | Fixed check method for GCS mode to verify if all roles assigned to user | | 0.1.18 | 2021-12-30 | [\#8809](https://github.com/airbytehq/airbyte/pull/8809) | Update connector fields title/description | | 0.1.17 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | diff --git a/docs/integrations/destinations/s3.md b/docs/integrations/destinations/s3.md index cd887d24c8c5c..79a73bb120eb8 100644 --- a/docs/integrations/destinations/s3.md +++ b/docs/integrations/destinations/s3.md @@ -223,6 +223,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.3 | 2022-01-11 | [\#9367](https://github.com/airbytehq/airbyte/pull/9367) | Avro & Parquet: support array field with unknown item type; default any improperly typed field to string. | | 0.2.2 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | | 0.2.1 | 2021-12-20 | [\#8974](https://github.com/airbytehq/airbyte/pull/8974) | Release a new version to ensure there is no excessive logging. | | 0.2.0 | 2021-12-15 | [\#8607](https://github.com/airbytehq/airbyte/pull/8607) | Change the output filename for CSV files - it's now `bucketPath/namespace/streamName/timestamp_epochMillis_randomUuid.csv` | diff --git a/docs/understanding-airbyte/json-avro-conversion.md b/docs/understanding-airbyte/json-avro-conversion.md index d5e48d3f6bb7f..25472d897c9c2 100644 --- a/docs/understanding-airbyte/json-avro-conversion.md +++ b/docs/understanding-airbyte/json-avro-conversion.md @@ -161,7 +161,7 @@ This is not supported in Avro schema. As a compromise, the converter creates a u } ``` -If the Json array has multiple object items, these objects will be recursively merged into one Avro record. For example, the following Json array expects two different objects, each with a different `id` field. +If the Json array has multiple object items, these objects will be recursively merged into one Avro record. For example, the following Json array expects two different objects. The first object has an `id` field, and second has an `id` and `message` field. Their `id` fields have slightly different types. Json schema: @@ -223,7 +223,7 @@ Json object: } ``` -Furthermore, the fields under the `id` record, `id_part_1` and `id_part_2`, will also have their schemas merged. +After conversion, the two object schemas will be merged into one. Furthermore, the fields under the `id` record, `id_part_1` and `id_part_2`, will also be merged. In this way, all possible valid elements from the Json array can be converted to Avro records. Avro schema: @@ -468,6 +468,10 @@ the corresponding Avro schema and record will be: } ``` +### Untyped Field + +Any field without property type specification will default to a `string` field, and its value will be serialized to string. + ## Example Based on the above rules, here is an overall example. Given the following Json schema: From 80666cf117c7a87072208721a51ecef16e56444c Mon Sep 17 00:00:00 2001 From: bmatt <36943357+bmatticus@users.noreply.github.com> Date: Wed, 12 Jan 2022 04:09:37 -0500 Subject: [PATCH 103/215] Destination Azure Blob Storage: Added BufferedOutputStream to fix block count issue and improve performance (#9190) --- .../b4c5d105-31fd-4817-96b6-cb923bfc04cb.json | 2 +- .../resources/seed/destination_definitions.yaml | 2 +- .../main/resources/seed/destination_specs.yaml | 14 +++++++++++++- .../destination-azure-blob-storage/Dockerfile | 2 +- .../AzureBlobStorageDestinationConfig.java | 14 ++++++++++++++ .../AzureBlobStorageDestinationConstants.java | 1 + .../csv/AzureBlobStorageCsvWriter.java | 9 +++++---- .../jsonl/AzureBlobStorageJsonlWriter.java | 7 ++++--- .../src/main/resources/spec.json | 10 ++++++++++ .../AzureBlobDestinationAcceptanceTest.java | 4 +++- .../AzureBlobDestinationTest.java | 16 ++++++++++++++++ .../destinations/azureblobstorage.md | 3 +++ 12 files changed, 72 insertions(+), 12 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json index 4582e30382950..3c436bbd29d64 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "b4c5d105-31fd-4817-96b6-cb923bfc04cb", "name": "Azure Blob Storage", "dockerRepository": "airbyte/destination-azure-blob-storage", - "dockerImageTag": "0.1.0", + "dockerImageTag": "0.1.1", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/azureblobstorage", "icon": "azureblobstorage.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index bfcd84549460e..38afff07e973b 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -1,7 +1,7 @@ - name: Azure Blob Storage destinationDefinitionId: b4c5d105-31fd-4817-96b6-cb923bfc04cb dockerRepository: airbyte/destination-azure-blob-storage - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.1 documentationUrl: https://docs.airbyte.io/integrations/destinations/azureblobstorage icon: azureblobstorage.svg - name: Amazon SQS diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index eaf2558800231..4ce2d219022de 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -1,7 +1,7 @@ # This file is generated by io.airbyte.config.specs.SeedConnectorSpecGenerator. # Do NOT edit this file directly. See generator class for more details. --- -- dockerImage: "airbyte/destination-azure-blob-storage:0.1.0" +- dockerImage: "airbyte/destination-azure-blob-storage:0.1.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/azureblobstorage" connectionSpecification: @@ -38,11 +38,23 @@ examples: - "airbyte5storage" azure_blob_storage_account_key: + title: "Azure Blob Storage account key" description: "The Azure blob storage account key." airbyte_secret: true type: "string" examples: - "Z8ZkZpteggFx394vm+PJHnGTvdRncaYS+JhLKdj789YNmD+iyGTnG+PV+POiuYNhBg/ACS+LKjd%4FG3FHGN12Nd==" + azure_blob_storage_output_buffer_size: + title: "Azure Blob Storage output buffer size" + type: "integer" + description: "The amount of megabytes to buffer for the output stream to\ + \ Azure. This will impact memory footprint on workers, but may need adjustment\ + \ for performance and appropriate block size in Azure." + minimum: 1 + maximum: 2047 + default: 5 + examples: + - 5 format: title: "Output Format" type: "object" diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile b/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile index a33b5ab5272de..8e644aa025fe7 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-azure-blob-storage COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/destination-azure-blob-storage diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java index fb2777db8a5e7..8d575214b678c 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java @@ -15,6 +15,7 @@ public class AzureBlobStorageDestinationConfig { private final String accountName; private final String accountKey; private final String containerName; + private final int outputStreamBufferSize; private final AzureBlobStorageFormatConfig formatConfig; public AzureBlobStorageDestinationConfig( @@ -22,11 +23,13 @@ public AzureBlobStorageDestinationConfig( final String accountName, final String accountKey, final String containerName, + final int outputStreamBufferSize, final AzureBlobStorageFormatConfig formatConfig) { this.endpointUrl = endpointUrl; this.accountName = accountName; this.accountKey = accountKey; this.containerName = containerName; + this.outputStreamBufferSize = outputStreamBufferSize; this.formatConfig = formatConfig; } @@ -50,12 +53,22 @@ public AzureBlobStorageFormatConfig getFormatConfig() { return formatConfig; } + public int getOutputStreamBufferSize() { + // Convert from MB to Bytes + return outputStreamBufferSize * 1024 * 1024; + } + public static AzureBlobStorageDestinationConfig getAzureBlobStorageConfig(final JsonNode config) { final String accountNameFomConfig = config.get("azure_blob_storage_account_name").asText(); final String accountKeyFromConfig = config.get("azure_blob_storage_account_key").asText(); final JsonNode endpointFromConfig = config .get("azure_blob_storage_endpoint_domain_name"); final JsonNode containerName = config.get("azure_blob_storage_container_name"); + final int outputStreamBufferSizeFromConfig = + config.get("azure_blob_storage_output_buffer_size") != null + ? config.get("azure_blob_storage_output_buffer_size").asInt(DEFAULT_STORAGE_OUTPUT_BUFFER_SIZE) + : DEFAULT_STORAGE_OUTPUT_BUFFER_SIZE; + final JsonNode blobName = config.get("azure_blob_storage_blob_name"); // streamId final String endpointComputed = String.format(Locale.ROOT, DEFAULT_STORAGE_ENDPOINT_FORMAT, @@ -72,6 +85,7 @@ public static AzureBlobStorageDestinationConfig getAzureBlobStorageConfig(final accountNameFomConfig, accountKeyFromConfig, containerNameComputed, + outputStreamBufferSizeFromConfig, AzureBlobStorageFormatConfigs.getAzureBlobStorageFormatConfig(config)); } diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConstants.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConstants.java index 1cd15481997c4..ebf5e90326f7c 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConstants.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConstants.java @@ -10,6 +10,7 @@ public final class AzureBlobStorageDestinationConstants { public static final String DEFAULT_STORAGE_ENDPOINT_HTTP_PROTOCOL = "https"; public static final String DEFAULT_STORAGE_ENDPOINT_DOMAIN_NAME = "blob.core.windows.net"; public static final String DEFAULT_STORAGE_ENDPOINT_FORMAT = "%s://%s.%s"; + public static final int DEFAULT_STORAGE_OUTPUT_BUFFER_SIZE = 5; private AzureBlobStorageDestinationConstants() {} diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java index 420202eac6d15..23e31bbf4d9ce 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java @@ -11,6 +11,7 @@ import io.airbyte.integrations.destination.azure_blob_storage.writer.BaseAzureBlobStorageWriter; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.io.BufferedOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.nio.charset.StandardCharsets; @@ -28,7 +29,7 @@ public class AzureBlobStorageCsvWriter extends BaseAzureBlobStorageWriter implem private final CsvSheetGenerator csvSheetGenerator; private final CSVPrinter csvPrinter; - private final BlobOutputStream blobOutputStream; + private final BufferedOutputStream blobOutputStream; public AzureBlobStorageCsvWriter(final AzureBlobStorageDestinationConfig config, final AppendBlobClient appendBlobClient, @@ -44,17 +45,17 @@ public AzureBlobStorageCsvWriter(final AzureBlobStorageDestinationConfig config, .create(configuredStream.getStream().getJsonSchema(), formatConfig); - this.blobOutputStream = appendBlobClient.getBlobOutputStream(); + this.blobOutputStream = new BufferedOutputStream(appendBlobClient.getBlobOutputStream(), config.getOutputStreamBufferSize()); if (isNewlyCreatedBlob) { this.csvPrinter = new CSVPrinter( - new PrintWriter(blobOutputStream, true, StandardCharsets.UTF_8), + new PrintWriter(blobOutputStream, false, StandardCharsets.UTF_8), CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL) .withHeader(csvSheetGenerator.getHeaderRow().toArray(new String[0]))); } else { // no header required for append this.csvPrinter = new CSVPrinter( - new PrintWriter(blobOutputStream, true, StandardCharsets.UTF_8), + new PrintWriter(blobOutputStream, false, StandardCharsets.UTF_8), CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL)); } } diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java index aed0849ede5e5..6a0406be7a7e2 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java @@ -17,6 +17,7 @@ import io.airbyte.integrations.destination.azure_blob_storage.writer.BaseAzureBlobStorageWriter; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.io.BufferedOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.nio.charset.StandardCharsets; @@ -32,7 +33,7 @@ public class AzureBlobStorageJsonlWriter extends BaseAzureBlobStorageWriter impl private static final ObjectMapper MAPPER = MoreMappers.initMapper(); private static final ObjectWriter WRITER = MAPPER.writer(); - private final BlobOutputStream blobOutputStream; + private final BufferedOutputStream blobOutputStream; private final PrintWriter printWriter; public AzureBlobStorageJsonlWriter(final AzureBlobStorageDestinationConfig config, @@ -41,8 +42,8 @@ public AzureBlobStorageJsonlWriter(final AzureBlobStorageDestinationConfig confi final boolean isNewlyCreatedBlob) { super(config, appendBlobClient, configuredStream); // at this moment we already receive appendBlobClient initialized - this.blobOutputStream = appendBlobClient.getBlobOutputStream(); - this.printWriter = new PrintWriter(blobOutputStream, true, StandardCharsets.UTF_8); + this.blobOutputStream = new BufferedOutputStream(appendBlobClient.getBlobOutputStream(), config.getOutputStreamBufferSize()); + this.printWriter = new PrintWriter(blobOutputStream, false, StandardCharsets.UTF_8); } @Override diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json index b3b7c6ea78979..d07ea2ae7f01e 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/resources/spec.json @@ -35,6 +35,7 @@ "examples": ["airbyte5storage"] }, "azure_blob_storage_account_key": { + "title": "Azure Blob Storage account key", "description": "The Azure blob storage account key.", "airbyte_secret": true, "type": "string", @@ -42,6 +43,15 @@ "Z8ZkZpteggFx394vm+PJHnGTvdRncaYS+JhLKdj789YNmD+iyGTnG+PV+POiuYNhBg/ACS+LKjd%4FG3FHGN12Nd==" ] }, + "azure_blob_storage_output_buffer_size": { + "title": "Azure Blob Storage output buffer size (Megabytes)", + "type": "integer", + "description": "The amount of megabytes to buffer for the output stream to Azure. This will impact memory footprint on workers, but may need adjustment for performance and appropriate block size in Azure.", + "minimum": 1, + "maximum": 2047, + "default": 5, + "examples": [5] + }, "format": { "title": "Output Format", "type": "object", diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobDestinationAcceptanceTest.java index c067153276e9d..f4679ccf84006 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobDestinationAcceptanceTest.java @@ -45,8 +45,8 @@ public void testCheckInvalidAccountName() { final JsonNode invalidConfig = Jsons.jsonNode(ImmutableMap.builder() .put("azure_blob_storage_account_name", "someInvalidName") .put("azure_blob_storage_account_key", config.get("azure_blob_storage_account_key")) + .put("format", getJsonlFormatConfig()) .build()); - final AzureBlobStorageDestination azureBlobStorageDestination = new AzureBlobStorageDestination(); final AirbyteConnectionStatus checkResult = azureBlobStorageDestination.check(invalidConfig); @@ -58,6 +58,7 @@ public void testCheckInvalidKey() { final JsonNode invalidConfig = Jsons.jsonNode(ImmutableMap.builder() .put("azure_blob_storage_account_name", config.get("azure_blob_storage_account_name")) .put("azure_blob_storage_account_key", "someInvalidKey") + .put("format", getJsonlFormatConfig()) .build()); final AzureBlobStorageDestination azureBlobStorageDestination = new AzureBlobStorageDestination(); final AirbyteConnectionStatus checkResult = azureBlobStorageDestination.check(invalidConfig); @@ -71,6 +72,7 @@ public void testCheckInvaliDomainName() { .put("azure_blob_storage_account_name", config.get("azure_blob_storage_account_name")) .put("azure_blob_storage_account_key", config.get("azure_blob_storage_account_key")) .put("azure_blob_storage_endpoint_domain_name", "invalidDomain.com.invalid123") + .put("format", getJsonlFormatConfig()) .build()); final AzureBlobStorageDestination azureBlobStorageDestination = new AzureBlobStorageDestination(); final AirbyteConnectionStatus checkResult = azureBlobStorageDestination.check(invalidConfig); diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/test/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobDestinationTest.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/test/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobDestinationTest.java index 941bef0365c43..586debf3b06eb 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/test/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobDestinationTest.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/test/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobDestinationTest.java @@ -80,6 +80,22 @@ public void testSpec() throws Exception { assertNotNull(connectionSpecification); } + @Test + public void testConfigObjectCustomOutputBufferSize() { + final JsonNode config = Jsons.jsonNode(ImmutableMap.builder() + .put("azure_blob_storage_account_name", "accName") + .put("azure_blob_storage_account_key", "accKey") + .put("azure_blob_storage_endpoint_domain_name", "accDomainName.com") + .put("azure_blob_storage_output_buffer_size", 10) + .put("format", getFormatConfig()) + .build()); + final AzureBlobStorageDestinationConfig azureBlobStorageConfig = AzureBlobStorageDestinationConfig + .getAzureBlobStorageConfig(config); + + assertEquals(10 * 1024 * 1024, + azureBlobStorageConfig.getOutputStreamBufferSize()); + } + private JsonNode getFormatConfig() { return Jsons.deserialize("{\n" + " \"format_type\": \"JSONL\"\n" diff --git a/docs/integrations/destinations/azureblobstorage.md b/docs/integrations/destinations/azureblobstorage.md index 8c8c29b260687..94809438f453e 100644 --- a/docs/integrations/destinations/azureblobstorage.md +++ b/docs/integrations/destinations/azureblobstorage.md @@ -22,6 +22,7 @@ The Airbyte Azure Blob Storage destination allows you to sync data to Azure Blob | Azure blob storage container \(Bucket\) Name | string | A name of the Azure blob storage container. If not exists - will be created automatically. If leave empty, then will be created automatically airbytecontainer+timestamp. | | Azure Blob Storage account name | string | The account's name of the Azure Blob Storage. | | The Azure blob storage account key | string | Azure blob storage account key. Example: `abcdefghijklmnopqrstuvwxyz/0123456789+ABCDEFGHIJKLMNOPQRSTUVWXYZ/0123456789%++sampleKey==`. | +| Azure Blob Storage output buffer size | integer | Azure Blob Storage output buffer size, in megabytes. Example: 5 | | Format | object | Format specific configuration. See below for details. | ⚠️ Please note that under "Full Refresh Sync" mode, data in the configured blob will be wiped out before each sync. We recommend you to provision a dedicated Azure Blob Storage Container resource for this sync to prevent unexpected data deletion from misconfiguration. ⚠️ @@ -136,5 +137,7 @@ They will be like this in the output file: | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.1 | 2021-12-29 | [\#5332](https://github.com/airbytehq/airbyte/pull/9190) | Added BufferedOutputStream wrapper to blob output stream to improve performance and fix issues with 50,000 block limit. Also disabled autoflush on PrintWriter. | | 0.1.0 | 2021-08-30 | [\#5332](https://github.com/airbytehq/airbyte/pull/5332) | Initial release with JSONL and CSV output. | + From 158151230c105649be3ceb0c0a06b15579311d23 Mon Sep 17 00:00:00 2001 From: Serhii Chvaliuk Date: Wed, 12 Jan 2022 16:28:04 +0200 Subject: [PATCH 104/215] Remove intermediate docker images with DOCKER_BUILDKIT=1 (#9408) * DOCKER_BUILDKIT=1 added Signed-off-by: Sergey Chvalyuk --- airbyte-integrations/bases/source-acceptance-test/Dockerfile | 2 +- .../bases/source-acceptance-test/unit_tests/test_utils.py | 2 +- tools/bin/build_image.sh | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/bases/source-acceptance-test/Dockerfile b/airbyte-integrations/bases/source-acceptance-test/Dockerfile index 7f9633a02b62b..ad60ff025240f 100644 --- a/airbyte-integrations/bases/source-acceptance-test/Dockerfile +++ b/airbyte-integrations/bases/source-acceptance-test/Dockerfile @@ -33,7 +33,7 @@ COPY pytest.ini setup.py ./ COPY source_acceptance_test ./source_acceptance_test RUN pip install . -LABEL io.airbyte.version=0.1.41 +LABEL io.airbyte.version=0.1.42 LABEL io.airbyte.name=airbyte/source-acceptance-test ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin", "-r", "fEsx"] diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py index 2d8840e50c5cc..e8042c0ad9613 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py @@ -281,7 +281,7 @@ def test_docker_runner(command, wait_timeout, expected_count): assert set(lines) == set(["something\n"]) assert len(lines) == expected_count - for container in client.containers.list(all=True): + for container in client.containers.list(all=True, ignore_removed=True): assert container.id != new_container.id, "Container should be removed after reading" diff --git a/tools/bin/build_image.sh b/tools/bin/build_image.sh index 6bf078efa9426..3c41ced70fdc6 100755 --- a/tools/bin/build_image.sh +++ b/tools/bin/build_image.sh @@ -8,6 +8,8 @@ DOCKERFILE="$3" TAGGED_IMAGE="$4" ID_FILE="$5" FOLLOW_SYMLINKS="$6" +# https://docs.docker.com/develop/develop-images/build_enhancements/ +export DOCKER_BUILDKIT=1 cd "$ROOT_DIR" . tools/lib/lib.sh From 5383439e6dd7f5aead72a3b22b91fafc526bfcc6 Mon Sep 17 00:00:00 2001 From: VitaliiMaltsev <39538064+VitaliiMaltsev@users.noreply.github.com> Date: Wed, 12 Jan 2022 19:03:09 +0200 Subject: [PATCH 105/215] Destination Snowflake add test to avoid duplicated staged data (#9412) * fix for jdk 17 * added unit test * refactoring * replace Exception with SQLException Co-authored-by: vmaltsev --- ...owflakeInternalStagingConsumerFactory.java | 4 +- .../snowflake/SnowflakeDestinationTest.java | 71 ++++++++++++++++++- 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java index 74f1e2358c1c8..4017ca7618794 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java @@ -22,6 +22,8 @@ import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; import io.airbyte.protocol.models.DestinationSyncMode; + +import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -178,7 +180,7 @@ private OnCloseFunction onCloseFunction(final JdbcDatabase database, path); try { sqlOperations.copyIntoTmpTableFromStage(database, path, srcTableName, schemaName); - } catch (Exception e){ + } catch (SQLException e){ sqlOperations.cleanUpStage(database, path); LOGGER.info("Cleaning stage path {}", path); throw new RuntimeException("Failed to upload data from stage "+ path, e); diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java index 2143cdcd22521..021fa9ed88feb 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java @@ -4,14 +4,36 @@ package io.airbyte.integrations.destination.snowflake; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.*; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.io.IOs; import io.airbyte.commons.jackson.MoreMappers; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.AirbyteMessageConsumer; +import io.airbyte.integrations.base.Destination; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.DestinationSyncMode; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaPrimitive; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; +import java.nio.file.Path; +import java.sql.SQLException; +import java.time.Instant; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + public class SnowflakeDestinationTest { private static final ObjectMapper mapper = MoreMappers.initMapper(); @@ -53,4 +75,49 @@ public void useInsertStrategyTest() { assertFalse(SnowflakeDestination.isS3Copy(stubConfig)); } + @Test + public void testCleanupStageOnFailure() throws Exception { + + JdbcDatabase mockDb = mock(JdbcDatabase.class); + SnowflakeStagingSqlOperations sqlOperations = mock(SnowflakeStagingSqlOperations.class); + final var testMessages = generateTestMessages(); + final JsonNode config = Jsons.deserialize(IOs.readFile(Path.of("secrets/insert_config.json"))); + + AirbyteMessageConsumer airbyteMessageConsumer = new SnowflakeInternalStagingConsumerFactory() + .create(Destination::defaultOutputRecordCollector, mockDb, + sqlOperations, new SnowflakeSQLNameTransformer(), config, getCatalog()); + doThrow(SQLException.class).when(sqlOperations).copyIntoTmpTableFromStage(any(),anyString(),anyString(),anyString()); + + airbyteMessageConsumer.start(); + for (AirbyteMessage m : testMessages) { + airbyteMessageConsumer.accept(m); + } + assertThrows(RuntimeException.class, airbyteMessageConsumer::close); + + verify(sqlOperations, times(1)).cleanUpStage(any(),anyString()); + } + + private List generateTestMessages() { + return IntStream.range(0, 3) + .boxed() + .map(i -> new AirbyteMessage() + .withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream("test") + .withNamespace("test_staging") + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "human " + i))))) + .collect(Collectors.toList()); + } + + ConfiguredAirbyteCatalog getCatalog() { + return new ConfiguredAirbyteCatalog().withStreams(List.of( + CatalogHelpers.createConfiguredAirbyteStream( + "test", + "test_staging", + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)) + .withDestinationSyncMode(DestinationSyncMode.OVERWRITE))); + } + } From e1968f63f9bfb90fec5ed1784d893338515f5b0f Mon Sep 17 00:00:00 2001 From: Abhi Vaidyanatha Date: Wed, 12 Jan 2022 13:21:38 -0800 Subject: [PATCH 106/215] Add Deploy on K8s Button to README (#9452) --- README.md | 2 +- docs/.gitbook/assets/deploy_locally.svg | 18 ++++++++++++++++++ docs/.gitbook/assets/deploy_on_aws.svg | 18 ++++++++++++++++++ docs/.gitbook/assets/deploy_on_gcp.svg | 18 ++++++++++++++++++ docs/.gitbook/assets/deploy_on_k8s.svg | 18 ++++++++++++++++++ 5 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 docs/.gitbook/assets/deploy_locally.svg create mode 100644 docs/.gitbook/assets/deploy_on_aws.svg create mode 100644 docs/.gitbook/assets/deploy_on_gcp.svg create mode 100644 docs/.gitbook/assets/deploy_on_k8s.svg diff --git a/README.md b/README.md index 719944ec993e8..871157a17068a 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ **Data integration made simple, secure and extensible.** The new open-source standard to sync data from applications, APIs & databases to warehouses, lakes & other destinations. -[![](docs/.gitbook/assets/deploy-locally.svg)](docs/deploying-airbyte/local-deployment.md) [![](docs/.gitbook/assets/deploy-on-aws.svg)](docs/deploying-airbyte/on-aws-ec2.md) [![](docs/.gitbook/assets/deploy-on-gcp.svg)](docs/deploying-airbyte/on-gcp-compute-engine.md) +[![](docs/.gitbook/assets/deploy_locally.svg)](docs/deploying-airbyte/local-deployment.md) [![](docs/.gitbook/assets/deploy_on_aws.svg)](docs/deploying-airbyte/on-aws-ec2.md) [![](docs/.gitbook/assets/deploy_on_gcp.svg)](docs/deploying-airbyte/on-gcp-compute-engine.md) [![](docs/.gitbook/assets/deploy_on_k8s.svg)](docs/deploying-airbyte/on-kubernetes.md) ![](docs/.gitbook/assets/airbyte-ui-for-your-integration-pipelines.png) diff --git a/docs/.gitbook/assets/deploy_locally.svg b/docs/.gitbook/assets/deploy_locally.svg new file mode 100644 index 0000000000000..3a75de1889ddf --- /dev/null +++ b/docs/.gitbook/assets/deploy_locally.svg @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/docs/.gitbook/assets/deploy_on_aws.svg b/docs/.gitbook/assets/deploy_on_aws.svg new file mode 100644 index 0000000000000..1f76d07a134cf --- /dev/null +++ b/docs/.gitbook/assets/deploy_on_aws.svg @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/docs/.gitbook/assets/deploy_on_gcp.svg b/docs/.gitbook/assets/deploy_on_gcp.svg new file mode 100644 index 0000000000000..140e02a32f303 --- /dev/null +++ b/docs/.gitbook/assets/deploy_on_gcp.svg @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/docs/.gitbook/assets/deploy_on_k8s.svg b/docs/.gitbook/assets/deploy_on_k8s.svg new file mode 100644 index 0000000000000..a8a02d4ff1f4a --- /dev/null +++ b/docs/.gitbook/assets/deploy_on_k8s.svg @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + From 9b1ed03fe482f5154f6c1843b1be76de87f3605d Mon Sep 17 00:00:00 2001 From: Abhi Vaidyanatha Date: Wed, 12 Jan 2022 13:56:42 -0800 Subject: [PATCH 107/215] Add Plural deployment instructions (#9455) * Add initial Plural installation instructions. * Add to SUMMARY.md --- docs/SUMMARY.md | 1 + docs/deploying-airbyte/on-kubernetes.md | 4 +++ docs/deploying-airbyte/on-plural.md | 43 +++++++++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 docs/deploying-airbyte/on-plural.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 34ff335e9cf59..56c36d6dec8e2 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -14,6 +14,7 @@ * [On Azure(VM)](deploying-airbyte/on-azure-vm-cloud-shell.md) * [On GCP (Compute Engine)](deploying-airbyte/on-gcp-compute-engine.md) * [On Kubernetes (Beta)](deploying-airbyte/on-kubernetes.md) + * [On Plural (Beta)](deploying-airbyte/on-plural.md) * [On Oracle Cloud Infrastructure VM](deploying-airbyte/on-oci-vm.md) * [On Digital Ocean Droplet](deploying-airbyte/on-digitalocean-droplet.md) * [Operator Guides](operator-guides/README.md) diff --git a/docs/deploying-airbyte/on-kubernetes.md b/docs/deploying-airbyte/on-kubernetes.md index 5b065e1a51055..00d136d4005da 100644 --- a/docs/deploying-airbyte/on-kubernetes.md +++ b/docs/deploying-airbyte/on-kubernetes.md @@ -4,6 +4,10 @@ Airbyte allows scaling sync workloads horizontally using Kubernetes. The core components \(api server, scheduler, etc\) run as deployments while the scheduler launches connector-related pods on different nodes. +## Quickstart + +If you don't want to configure your own K8s cluster and Airbyte instance, you can use the free, open-source project [Plural](https://www.plural.sh/) to bring up a K8s cluster and Airbyte for you. Use [this guide](on-plural.md) to get started. + ## Getting Started ### Cluster Setup diff --git a/docs/deploying-airbyte/on-plural.md b/docs/deploying-airbyte/on-plural.md new file mode 100644 index 0000000000000..f6ea39b3033ad --- /dev/null +++ b/docs/deploying-airbyte/on-plural.md @@ -0,0 +1,43 @@ +# On Plural (Beta) + +## Overview + +Plural is a unified application deployment platform that makes it easy to run open-source software on Kubernetes. It aims to make applications as portable as possible, without sacrificing the ability for the users to own the applications they desire to use. + +## Getting Started + +First, install Plural and the Plural CLI by following steps 1, 2, and 3 of the instructions [here](https://docs.plural.sh/getting-started). Through this, you will also configure your cloud provider and the domain name under which your +application will be deployed to. + +Then create a fresh Git repo to store your Plural installation and from within the repo, run: + +```bash +plural init +``` + +This configures your installation and cloud provider for the repo. You're now ready to install Airbyte on your Plural repo! + +## Installing Airbyte + +To install Airbyte on your Plural repo, simply run: + +```bash +plural bundle install airbyte airbyte-aws +``` + +The CLI will prompt you to choose whether or not you want to use Plural OIDC, which means you're using Plural as your identity provider for SSO. + +After this, run: + +```bash +plural build +plural deploy --commit "Initial Deploy." +``` + +## Accessing your Airbyte Installation + +Now, just head over to airbyte.SUBDOMAIN_NAME.onplural.sh to access the Airbyte UI. + +## Monitoring your Installation + +To monitor and manage your Airbyte installation, head over to the Plural control panel at console.YOUR_ORGANIZATION.onplural.sh. From 22db152ab6fefaddc674c84731246c516eaedafe Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Thu, 13 Jan 2022 14:28:51 +0800 Subject: [PATCH 108/215] Use patched version. (#9466) Due to actions/runner#1605, all our builds are failing. This changes pins us to machulav/ec2-github-runner#88 until a more stable solution is available. Either the upstream action merges this in and we bump our version, or Github reverts their breaking change. --- .github/actions/start-aws-runner/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/start-aws-runner/action.yml b/.github/actions/start-aws-runner/action.yml index 1e39cda0eb437..c9fab572b7171 100644 --- a/.github/actions/start-aws-runner/action.yml +++ b/.github/actions/start-aws-runner/action.yml @@ -40,7 +40,7 @@ runs: aws-region: us-east-2 - name: Start EC2 runner id: start-ec2-runner - uses: machulav/ec2-github-runner@v2.3.0 + uses: skyzh/ec2-github-runner@ba2298a67875dfdd29a88fafbc1ba27f4f94af39 with: mode: start github-token: ${{ inputs.github-token }} From dc24d87fe1c3bead75bef16a10daefe6ec0e9c03 Mon Sep 17 00:00:00 2001 From: Augustin Date: Thu, 13 Jan 2022 08:41:30 +0100 Subject: [PATCH 109/215] Documentation: high level suggestions on how to secure Airbyte (#9448) --- docs/operator-guides/securing-airbyte.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 docs/operator-guides/securing-airbyte.md diff --git a/docs/operator-guides/securing-airbyte.md b/docs/operator-guides/securing-airbyte.md new file mode 100644 index 0000000000000..d1d7c3b3c02c2 --- /dev/null +++ b/docs/operator-guides/securing-airbyte.md @@ -0,0 +1,19 @@ +# Securing Airbyte access + +Airbyte, in its open-source version, does not support RBAC to manage access to the UI. + +However, multiple options exist for the operators to implement access control themselves. + +To secure access to Airbyte you have three options: +* Networking restrictions: deploy Airbyte in a private network or use a firewall to filter which IP is allowed to access your host. +* Put Airbyte behind a reverse proxy and handle the access control on the reverse proxy side. +* If you deployed Airbyte on a cloud provider: + * GCP: use the [Identidy-Aware proxy](https://cloud.google.com/iap) service + * AWS: use the [AWS Systems Manager Session Manager](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager.html) service + +**Non exhaustive** online resources list to set up auth on your reverse proxy: +* [Configure HTTP Basic Auth on NGINX for Airbyte](https://shadabshaukat.medium.com/deploy-and-secure-airbyte-with-nginx-reverse-proxy-basic-authentication-lets-encrypt-ssl-72bee223a4d9) +* [Kubernetes: Basic auth on a Nginx ingress controller](https://kubernetes.github.io/ingress-nginx/examples/auth/basic/) +* [How to set up Okta SSO on an NGINX reverse proxy](https://developer.okta.com/blog/2018/08/28/nginx-auth-request) +* [How to enable HTTP Basic Auth on Caddy](https://caddyserver.com/docs/caddyfile/directives/basicauth) +* [SSO for Traefik](https://github.com/thomseddon/traefik-forward-auth) From ecfc9e1cc586c7d7725e3c92ed178a5ea14ff7bf Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Thu, 13 Jan 2022 15:55:13 +0530 Subject: [PATCH 110/215] Correcting base URL for the example API (#9245) --- docs/connector-development/tutorials/cdk-speedrun.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/connector-development/tutorials/cdk-speedrun.md b/docs/connector-development/tutorials/cdk-speedrun.md index f216a1c8b6993..fdefb56ba76f3 100644 --- a/docs/connector-development/tutorials/cdk-speedrun.md +++ b/docs/connector-development/tutorials/cdk-speedrun.md @@ -130,7 +130,7 @@ In your `source.py` file, add this `Pokemon` class. This stream represents an en ```python class Pokemon(HttpStream): - url_base = "https://api.exchangeratesapi.io/" + url_base = "https://pokeapi.co/api/v2/" # Set this as a noop. primary_key = None From 2b0d0bdef634c2284a3e4945aa0551e5693419f2 Mon Sep 17 00:00:00 2001 From: VitaliiMaltsev <39538064+VitaliiMaltsev@users.noreply.github.com> Date: Thu, 13 Jan 2022 15:41:34 +0200 Subject: [PATCH 111/215] Source MongoDB fetch authorized collections only (#9238) * fix for jdk 17 * Source MongoDB show authorized collections * add javadoc * fixed checkstyle * add CHANGELOG * fix checkstyle * refactoring * bump version anf fix checkstyle Co-authored-by: vmaltsev --- .../b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- ...owflakeInternalStagingConsumerFactory.java | 34 +++++++++---------- .../SnowflakeSQLNameTransformer.java | 3 +- .../SnowflakeStagingSqlOperations.java | 1 + .../connectors/source-mongodb-v2/Dockerfile | 2 +- .../connectors/source-mongodb-v2/build.gradle | 2 +- .../MongoDbSource.java | 25 ++++++++++++-- docs/integrations/sources/mongodb-v2.md | 1 + 9 files changed, 47 insertions(+), 25 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json index ca312b3e32319..3543414abd73c 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e", "name": "MongoDb", "dockerRepository": "airbyte/source-mongodb-v2", - "dockerImageTag": "0.1.10", + "dockerImageTag": "0.1.11", "documentationUrl": "https://docs.airbyte.io/integrations/sources/mongodb-v2", "icon": "mongodb.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 6f853b92ecad3..caf5401fa0ff1 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -426,7 +426,7 @@ - name: MongoDb sourceDefinitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e dockerRepository: airbyte/source-mongodb-v2 - dockerImageTag: 0.1.10 + dockerImageTag: 0.1.11 documentationUrl: https://docs.airbyte.io/integrations/sources/mongodb-v2 icon: mongodb.svg sourceType: database diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java index 4017ca7618794..52844d93c786d 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java @@ -22,8 +22,6 @@ import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; import io.airbyte.protocol.models.DestinationSyncMode; - -import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -51,11 +49,11 @@ public class SnowflakeInternalStagingConsumerFactory { private final String CURRENT_SYNC_PATH = UUID.randomUUID().toString(); public AirbyteMessageConsumer create(final Consumer outputRecordCollector, - final JdbcDatabase database, - final SnowflakeStagingSqlOperations sqlOperations, - final SnowflakeSQLNameTransformer namingResolver, - final JsonNode config, - final ConfiguredAirbyteCatalog catalog) { + final JdbcDatabase database, + final SnowflakeStagingSqlOperations sqlOperations, + final SnowflakeSQLNameTransformer namingResolver, + final JsonNode config, + final ConfiguredAirbyteCatalog catalog) { final List writeConfigs = createWriteConfigs(namingResolver, config, catalog); return new BufferedStreamConsumer( @@ -135,10 +133,10 @@ private static AirbyteStreamNameNamespacePair toNameNamespacePair(final WriteCon } private RecordWriter recordWriterFunction(final JdbcDatabase database, - final SqlOperations snowflakeSqlOperations, - final List writeConfigs, - final ConfiguredAirbyteCatalog catalog, - final SnowflakeSQLNameTransformer namingResolver) { + final SqlOperations snowflakeSqlOperations, + final List writeConfigs, + final ConfiguredAirbyteCatalog catalog, + final SnowflakeSQLNameTransformer namingResolver) { final Map pairToWriteConfig = writeConfigs.stream() .collect(Collectors.toUnmodifiableMap( @@ -160,9 +158,9 @@ private RecordWriter recordWriterFunction(final JdbcDatabase database, } private OnCloseFunction onCloseFunction(final JdbcDatabase database, - final SnowflakeStagingSqlOperations sqlOperations, - final List writeConfigs, - final SnowflakeSQLNameTransformer namingResolver) { + final SnowflakeStagingSqlOperations sqlOperations, + final List writeConfigs, + final SnowflakeSQLNameTransformer namingResolver) { return (hasFailed) -> { if (!hasFailed) { final List queryList = new ArrayList<>(); @@ -176,14 +174,14 @@ private OnCloseFunction onCloseFunction(final JdbcDatabase database, final String path = namingResolver.getStagingPath(schemaName, dstTableName, CURRENT_SYNC_PATH); LOGGER.info("Uploading data from stage: stream {}. schema {}, tmp table {}, stage path {}", writeConfig.getStreamName(), schemaName, - srcTableName, - path); + srcTableName, + path); try { sqlOperations.copyIntoTmpTableFromStage(database, path, srcTableName, schemaName); - } catch (SQLException e){ + } catch (Exception e) { sqlOperations.cleanUpStage(database, path); LOGGER.info("Cleaning stage path {}", path); - throw new RuntimeException("Failed to upload data from stage "+ path, e); + throw new RuntimeException("Failed to upload data from stage " + path, e); } sqlOperations.createTableIfNotExists(database, schemaName, dstTableName); diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java index 18c97338f39a6..373c3aa099830 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java @@ -18,6 +18,7 @@ public String getStageName(String schemaName, String outputTableName) { } public String getStagingPath(String schemaName, String tableName, String currentSyncPath) { - return (getStageName(schemaName,tableName)+"/staged/"+currentSyncPath).toUpperCase(); + return (getStageName(schemaName, tableName) + "/staged/" + currentSyncPath).toUpperCase(); } + } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java index 6fa6a7c65df55..a1ba41ed47fa5 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java @@ -85,4 +85,5 @@ public void cleanUpStage(JdbcDatabase database, String path) throws SQLException public boolean isSchemaExists(JdbcDatabase database, String outputSchema) throws Exception { return database.query(SHOW_SCHEMAS).map(schemas -> schemas.get(NAME).asText()).anyMatch(outputSchema::equalsIgnoreCase); } + } diff --git a/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile b/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile index de0380fd37d97..6984e28285045 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile +++ b/airbyte-integrations/connectors/source-mongodb-v2/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-mongodb-v2 COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.10 +LABEL io.airbyte.version=0.1.11 LABEL io.airbyte.name=airbyte/source-mongodb-v2 diff --git a/airbyte-integrations/connectors/source-mongodb-v2/build.gradle b/airbyte-integrations/connectors/source-mongodb-v2/build.gradle index 89f3ccef41950..b8a449c3c8799 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2/build.gradle +++ b/airbyte-integrations/connectors/source-mongodb-v2/build.gradle @@ -16,7 +16,7 @@ dependencies { implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) implementation project(':airbyte-integrations:connectors:source-relational-db') - implementation 'org.mongodb:mongodb-driver-sync:4.3.0' + implementation 'org.mongodb:mongodb-driver-sync:4.4.0' testImplementation 'org.testcontainers:mongodb:1.15.3' diff --git a/airbyte-integrations/connectors/source-mongodb-v2/src/main/java/io.airbyte.integrations.source.mongodb/MongoDbSource.java b/airbyte-integrations/connectors/source-mongodb-v2/src/main/java/io.airbyte.integrations.source.mongodb/MongoDbSource.java index 9af7c89f6c8f1..98e10ef509049 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2/src/main/java/io.airbyte.integrations.source.mongodb/MongoDbSource.java +++ b/airbyte-integrations/connectors/source-mongodb-v2/src/main/java/io.airbyte.integrations.source.mongodb/MongoDbSource.java @@ -90,7 +90,7 @@ public List> getCheckOperations(final throws Exception { final List> checkList = new ArrayList<>(); checkList.add(database -> { - if (database.getCollectionNames().isEmpty()) { + if (getAuthorizedCollections(database).isEmpty()) { throw new Exception("Unable to execute any operation on the source!"); } else { LOGGER.info("The source passed the basic operation test!"); @@ -114,7 +114,7 @@ protected List>> discoverInternal(final MongoDat throws Exception { final List>> tableInfos = new ArrayList<>(); - for (final String collectionName : database.getCollectionNames()) { + for (final String collectionName : getAuthorizedCollections(database)) { final MongoCollection collection = database.getCollection(collectionName); final Map uniqueFields = MongoUtils.getUniqueFields(collection); @@ -135,6 +135,27 @@ protected List>> discoverInternal(final MongoDat return tableInfos; } + private Set getAuthorizedCollections(MongoDatabase database) { + /* + * db.runCommand ({listCollections: 1.0, authorizedCollections: true, nameOnly: true }) the command + * returns only those collections for which the user has privileges. For example, if a user has find + * action on specific collections, the command returns only those collections; or, if a user has + * find or any other action, on the database resource, the command lists all collections in the + * database. + */ + Document document = database.getDatabase().runCommand(new Document("listCollections", 1) + .append("authorizedCollections", true) + .append("nameOnly", true)) + .append("filter", "{ 'type': 'collection' }"); + return document.toBsonDocument() + .get("cursor").asDocument() + .getArray("firstBatch") + .stream() + .map(bsonValue -> bsonValue.asDocument().getString("name").getValue()) + .collect(Collectors.toSet()); + + } + @Override protected List>> discoverInternal(final MongoDatabase database, final String schema) throws Exception { // MondoDb doesn't support schemas diff --git a/docs/integrations/sources/mongodb-v2.md b/docs/integrations/sources/mongodb-v2.md index 0db57aa9c996c..7f93671fd9417 100644 --- a/docs/integrations/sources/mongodb-v2.md +++ b/docs/integrations/sources/mongodb-v2.md @@ -102,6 +102,7 @@ For more information regarding configuration parameters, please see [MongoDb Doc | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.11 | 2022-01-10 | [9238](https://github.com/airbytehq/airbyte/pull/9238) | Return only those collections for which the user has privileges | | 0.1.10 | 2021-12-30 | [9202](https://github.com/airbytehq/airbyte/pull/9202) | Update connector fields title/description | | 0.1.9 | 2021-12-07 | [8491](https://github.com/airbytehq/airbyte/pull/8491) | Configure 10000 limit doc reading during Discovery step | | 0.1.8 | 2021-11-29 | [8306](https://github.com/airbytehq/airbyte/pull/8306) | Added milliseconds for date format for cursor | From 453470358962a096cd675a06eac9f11552aa3620 Mon Sep 17 00:00:00 2001 From: Eugene Date: Thu, 13 Jan 2022 16:24:38 +0200 Subject: [PATCH 112/215] =?UTF-8?q?=F0=9F=8E=89Source=20Postgres:=20Set=20?= =?UTF-8?q?up=20connection=20-=20add=20schema=20selection=20(#9360)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [1435] Source Postgres: Set up connection - added schema selection --- .../decd338e-5647-4c0b-adf4-da0e75f5a750.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 21 ++++++++++--- .../base/ssh/SshBastionContainer.java | 5 +++ .../source/mssql/CdcMssqlSourceTest.java | 1 + .../source-postgres-strict-encrypt/Dockerfile | 2 +- .../src/test/resources/expected_spec.json | 18 +++++++++-- .../connectors/source-postgres/Dockerfile | 2 +- .../source/postgres/PostgresSource.java | 31 +++++++++++++++++++ .../src/main/resources/spec.json | 20 +++++++++--- ...stractSshPostgresSourceAcceptanceTest.java | 2 +- .../CdcPostgresSourceAcceptanceTest.java | 1 + .../CdcPostgresSourceDatatypeTest.java | 5 ++- .../sources/PostgresSourceAcceptanceTest.java | 1 + .../sources/PostgresSourceDatatypeTest.java | 8 ++--- .../PostgresRdsSourcePerformanceTest.java | 10 ++++-- .../postgres/CdcPostgresSourceTest.java | 1 + .../PostgresJdbcSourceAcceptanceTest.java | 2 ++ .../postgres/PostgresSourceSSLTest.java | 1 + .../source/postgres/PostgresSourceTest.java | 2 ++ .../source/postgres/PostgresSpecTest.java | 8 +++++ docs/integrations/sources/postgres.md | 5 +-- 22 files changed, 122 insertions(+), 28 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json index e97a5f07c782b..a85e89d23cdee 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "decd338e-5647-4c0b-adf4-da0e75f5a750", "name": "Postgres", "dockerRepository": "airbyte/source-postgres", - "dockerImageTag": "0.4.1", + "dockerImageTag": "0.4.2", "documentationUrl": "https://docs.airbyte.io/integrations/sources/postgres", "icon": "postgresql.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index caf5401fa0ff1..d7ea734d5397f 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -537,7 +537,7 @@ - name: Postgres sourceDefinitionId: decd338e-5647-4c0b-adf4-da0e75f5a750 dockerRepository: airbyte/source-postgres - dockerImageTag: 0.4.1 + dockerImageTag: 0.4.2 documentationUrl: https://docs.airbyte.io/integrations/sources/postgres icon: postgresql.svg sourceType: database diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 374be877a4b75..7024482bc301e 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -5503,7 +5503,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-postgres:0.4.1" +- dockerImage: "airbyte/source-postgres:0.4.2" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/postgres" connectionSpecification: @@ -5537,28 +5537,39 @@ description: "Name of the database." type: "string" order: 2 + schemas: + title: "Schemas" + description: "The list of schemas to sync from. Defaults to user. Case sensitive." + type: "array" + items: + type: "string" + minItems: 0 + uniqueItems: true + default: + - "public" + order: 3 username: title: "User" description: "Username to use to access the database." type: "string" - order: 3 + order: 4 password: title: "Password" description: "Password associated with the username." type: "string" airbyte_secret: true - order: 4 + order: 5 ssl: title: "Connect using SSL" description: "Encrypt client/server communications for increased security." type: "boolean" default: false - order: 5 + order: 6 replication_method: type: "object" title: "Replication Method" description: "Replication method to use for extracting data from the database." - order: 6 + order: 7 oneOf: - title: "Standard" additionalProperties: false diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/ssh/SshBastionContainer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/ssh/SshBastionContainer.java index a9422a3671bf3..d3b1af0e408f8 100644 --- a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/ssh/SshBastionContainer.java +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/ssh/SshBastionContainer.java @@ -11,6 +11,7 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import java.io.IOException; +import java.util.List; import java.util.Objects; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.JdbcDatabaseContainer; @@ -57,6 +58,10 @@ public ImmutableMap.Builder getBasicDbConfigBuider(final JdbcDat return getBasicDbConfigBuider(db, db.getDatabaseName()); } + public ImmutableMap.Builder getBasicDbConfigBuider(final JdbcDatabaseContainer db, final List schemas) { + return getBasicDbConfigBuider(db, db.getDatabaseName()).put("schemas", schemas); + } + public ImmutableMap.Builder getBasicDbConfigBuider(final JdbcDatabaseContainer db, final String schemaName) { return ImmutableMap.builder() .put("host", Objects.requireNonNull(db.getContainerInfo().getNetworkSettings() diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java index 3a52aa5c616da..26efe159ff879 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java @@ -77,6 +77,7 @@ private void init() { .put("host", container.getHost()) .put("port", container.getFirstMappedPort()) .put("database", dbName) + .put("schemas", List.of(MODELS_SCHEMA, MODELS_SCHEMA + "_random")) .put("username", TEST_USER_NAME) .put("password", TEST_USER_PASSWORD) .put("replication_method", "CDC") diff --git a/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile index a303e5b1dd99d..bc6b8eb314c79 100644 --- a/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres-strict-encrypt COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.6 +LABEL io.airbyte.version=0.1.7 LABEL io.airbyte.name=airbyte/source-postgres-strict-encrypt diff --git a/airbyte-integrations/connectors/source-postgres-strict-encrypt/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/source-postgres-strict-encrypt/src/test/resources/expected_spec.json index f48918dc55348..c786b05f15be5 100644 --- a/airbyte-integrations/connectors/source-postgres-strict-encrypt/src/test/resources/expected_spec.json +++ b/airbyte-integrations/connectors/source-postgres-strict-encrypt/src/test/resources/expected_spec.json @@ -29,24 +29,36 @@ "type": "string", "order": 2 }, + "schemas": { + "title": "Schemas", + "description": "The list of schemas to sync from. Defaults to user. Case sensitive.", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 0, + "uniqueItems": true, + "default": ["public"], + "order": 3 + }, "username": { "title": "User", "description": "Username to use to access the database.", "type": "string", - "order": 3 + "order": 4 }, "password": { "title": "Password", "description": "Password associated with the username.", "type": "string", "airbyte_secret": true, - "order": 4 + "order": 5 }, "replication_method": { "type": "object", "title": "Replication Method", "description": "Replication method to use for extracting data from the database.", - "order": 6, + "order": 7, "oneOf": [ { "title": "Standard", diff --git a/airbyte-integrations/connectors/source-postgres/Dockerfile b/airbyte-integrations/connectors/source-postgres/Dockerfile index 7b76636deed89..98017bc25cc9b 100644 --- a/airbyte-integrations/connectors/source-postgres/Dockerfile +++ b/airbyte-integrations/connectors/source-postgres/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.4.1 +LABEL io.airbyte.version=0.4.2 LABEL io.airbyte.name=airbyte/source-postgres diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 9cdc63b7a1801..1c8c0c7713575 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -50,6 +50,7 @@ public class PostgresSource extends AbstractJdbcSource implements Sour public static final String CDC_LSN = "_ab_cdc_lsn"; static final String DRIVER_CLASS = "org.postgresql.Driver"; + private List schemas; public static Source sshWrappedSource() { return new SshWrappedSource(new PostgresSource(), List.of("host"), List.of("port")); @@ -81,6 +82,17 @@ public JsonNode toDatabaseConfigStatic(final JsonNode config) { additionalParameters.add("sslmode=require"); } + if (config.has("schemas") && config.get("schemas").isArray()) { + schemas = new ArrayList<>(); + for (final JsonNode schema : config.get("schemas")) { + schemas.add(schema.asText()); + } + } + + if (schemas != null && !schemas.isEmpty()) { + additionalParameters.add("currentSchema=" + String.join(",", schemas)); + } + additionalParameters.forEach(x -> jdbcUrl.append(x).append("&")); final ImmutableMap.Builder configBuilder = ImmutableMap.builder() @@ -116,6 +128,25 @@ public AirbyteCatalog discover(final JsonNode config) throws Exception { return catalog; } + @Override + public List>> discoverInternal(JdbcDatabase database) throws Exception { + if (schemas != null && !schemas.isEmpty()) { + // process explicitly selected (from UI) schemas + final List>> internals = new ArrayList<>(); + for (String schema : schemas) { + LOGGER.debug("Discovering schema: {}", schema); + internals.addAll(super.discoverInternal(database, schema)); + } + for (TableInfo> info : internals) { + LOGGER.debug("Found table (schema: {}): {}", info.getNameSpace(), info.getName()); + } + return internals; + } else { + LOGGER.info("No schemas explicitly set on UI to process, so will process all of existing schemas in DB"); + return super.discoverInternal(database); + } + } + @Override public List> getCheckOperations(final JsonNode config) throws Exception { diff --git a/airbyte-integrations/connectors/source-postgres/src/main/resources/spec.json b/airbyte-integrations/connectors/source-postgres/src/main/resources/spec.json index 8f14ee312f0c6..bf8d5c02acada 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-postgres/src/main/resources/spec.json @@ -29,31 +29,43 @@ "type": "string", "order": 2 }, + "schemas": { + "title": "Schemas", + "description": "The list of schemas to sync from. Defaults to user. Case sensitive.", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 0, + "uniqueItems": true, + "default": ["public"], + "order": 3 + }, "username": { "title": "User", "description": "Username to use to access the database.", "type": "string", - "order": 3 + "order": 4 }, "password": { "title": "Password", "description": "Password associated with the username.", "type": "string", "airbyte_secret": true, - "order": 4 + "order": 5 }, "ssl": { "title": "Connect using SSL", "description": "Encrypt client/server communications for increased security.", "type": "boolean", "default": false, - "order": 5 + "order": 6 }, "replication_method": { "type": "object", "title": "Replication Method", "description": "Replication method to use for extracting data from the database.", - "order": 6, + "order": 7, "oneOf": [ { "title": "Standard", diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java index b0c855a42a82f..081b8928cf88a 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java @@ -43,7 +43,7 @@ public abstract class AbstractSshPostgresSourceAcceptanceTest extends SourceAcce @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { startTestContainers(); - config = bastion.getTunnelConfig(getTunnelMethod(), bastion.getBasicDbConfigBuider(db)); + config = bastion.getTunnelConfig(getTunnelMethod(), bastion.getBasicDbConfigBuider(db, List.of("public"))); populateDatabaseTestData(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java index 62124ae28e375..ed8c94b8b51f6 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java @@ -67,6 +67,7 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc .put("host", container.getHost()) .put("port", container.getFirstMappedPort()) .put("database", container.getDatabaseName()) + .put("schemas", List.of(NAMESPACE)) .put("username", container.getUsername()) .put("password", container.getPassword()) .put("replication_method", replicationMethod) diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java index 479eb293a88cf..c4964892eac64 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java @@ -13,12 +13,14 @@ import io.airbyte.integrations.standardtest.source.TestDataHolder; import io.airbyte.integrations.standardtest.source.TestDestinationEnv; import io.airbyte.protocol.models.JsonSchemaPrimitive; +import java.util.List; import org.jooq.SQLDialect; import org.testcontainers.containers.PostgreSQLContainer; import org.testcontainers.utility.MountableFile; public class CdcPostgresSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { + private static final String SCHEMA_NAME = "test"; private static final String SLOT_NAME_BASE = "debezium_slot"; private static final String PUBLICATION = "publication"; private PostgreSQLContainer container; @@ -47,6 +49,7 @@ protected Database setupDatabase() throws Exception { .put("host", container.getHost()) .put("port", container.getFirstMappedPort()) .put("database", container.getDatabaseName()) + .put("schemas", List.of(SCHEMA_NAME)) .put("username", container.getUsername()) .put("password", container.getPassword()) .put("replication_method", replicationMethod) @@ -83,7 +86,7 @@ protected Database setupDatabase() throws Exception { @Override protected String getNameSpace() { - return "test"; + return SCHEMA_NAME; } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java index 9b8b0a634324e..184215bc93c45 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java @@ -47,6 +47,7 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc .put("host", container.getHost()) .put("port", container.getFirstMappedPort()) .put("database", container.getDatabaseName()) + .put("schemas", Jsons.jsonNode(List.of("public"))) .put("username", container.getUsername()) .put("password", container.getPassword()) .put("ssl", false) diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java index 37c0c23d314f2..d381704afb0aa 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java @@ -16,15 +16,13 @@ import java.sql.SQLException; import java.util.Set; import org.jooq.SQLDialect; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.testcontainers.containers.PostgreSQLContainer; public class PostgresSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { private PostgreSQLContainer container; private JsonNode config; - private static final Logger LOGGER = LoggerFactory.getLogger(PostgresSourceDatatypeTest.class); + private static final String SCHEMA_NAME = "test"; @Override protected Database setupDatabase() throws SQLException { @@ -54,7 +52,7 @@ protected Database setupDatabase() throws SQLException { SQLDialect.POSTGRES); database.query(ctx -> { - ctx.execute("CREATE SCHEMA TEST;"); + ctx.execute(String.format("CREATE SCHEMA %S;", SCHEMA_NAME)); ctx.execute("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');"); ctx.execute("CREATE TYPE inventory_item AS (name text, supplier_id integer, price numeric);"); // In one of the test case, we have some money values with currency symbol. Postgres can only @@ -74,7 +72,7 @@ protected Database setupDatabase() throws SQLException { @Override protected String getNameSpace() { - return "test"; + return SCHEMA_NAME; } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java index dfb916d2c16ac..dff446f51cfde 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java @@ -10,6 +10,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.standardtest.source.performancetest.AbstractSourcePerformanceTest; import java.nio.file.Path; +import java.util.List; import java.util.stream.Stream; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.provider.Arguments; @@ -17,6 +18,8 @@ public class PostgresRdsSourcePerformanceTest extends AbstractSourcePerformanceTest { private static final String PERFORMANCE_SECRET_CREDS = "secrets/performance-config.json"; + private static final List SCHEMAS = List.of("test1000tables240columns200recordsDb", + "newregular25tables50000records", "newsmall1000tableswith10000rows"); @Override protected String getImageName() { @@ -35,6 +38,7 @@ protected void setupDatabase(String dbName) { .put("host", plainConfig.get("host")) .put("port", plainConfig.get("port")) .put("database", plainConfig.get("database")) + .put("schemas", SCHEMAS) .put("username", plainConfig.get("username")) .put("password", plainConfig.get("password")) .put("ssl", true) @@ -53,9 +57,9 @@ protected void setupDatabase(String dbName) { @BeforeAll public static void beforeAll() { AbstractSourcePerformanceTest.testArgs = Stream.of( - Arguments.of("test1000tables240columns200recordsDb", "test1000tables240columns200recordsDb", 200, 240, 1000), - Arguments.of("newregular25tables50000records", "newregular25tables50000records", 50000, 8, 25), - Arguments.of("newsmall1000tableswith10000rows", "newsmall1000tableswith10000rows", 10000, 8, 1000)); + Arguments.of(SCHEMAS.get(0), SCHEMAS.get(0), 200, 240, 1000), + Arguments.of(SCHEMAS.get(1), SCHEMAS.get(1), 50000, 8, 25), + Arguments.of(SCHEMAS.get(2), SCHEMAS.get(2), 10000, 8, 1000)); } } diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java index 099a0de74efc8..ac4c0c98b9a60 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java @@ -98,6 +98,7 @@ private JsonNode getConfig(final String dbName) { .put("host", container.getHost()) .put("port", container.getFirstMappedPort()) .put("database", dbName) + .put("schemas", List.of(MODELS_SCHEMA, MODELS_SCHEMA + "_random")) .put("username", container.getUsername()) .put("password", container.getPassword()) .put("ssl", false) diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java index 3f447048eb4e5..cd4a2e124ccc9 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java @@ -17,6 +17,7 @@ import io.airbyte.protocol.models.ConnectorSpecification; import io.airbyte.test.utils.PostgreSQLContainerHelper; import java.sql.JDBCType; +import java.util.List; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; @@ -44,6 +45,7 @@ public void setup() throws Exception { .put("host", PSQL_DB.getHost()) .put("port", PSQL_DB.getFirstMappedPort()) .put("database", dbName) + .put("schemas", List.of(SCHEMA_NAME, SCHEMA_NAME2)) .put("username", PSQL_DB.getUsername()) .put("password", PSQL_DB.getPassword()) .put("ssl", false) diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java index 5fac2db71d792..7eb501aee277b 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java @@ -133,6 +133,7 @@ private JsonNode getConfig(final PostgreSQLContainer psqlDb, final String dbN .put("host", psqlDb.getHost()) .put("port", psqlDb.getFirstMappedPort()) .put("database", dbName) + .put("schemas", List.of("public")) .put("username", psqlDb.getUsername()) .put("password", psqlDb.getPassword()) .put("ssl", true) diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java index 32148908b31a5..94a5cf0e6582b 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java @@ -171,6 +171,7 @@ private JsonNode getConfig(final PostgreSQLContainer psqlDb, final String dbN .put("host", psqlDb.getHost()) .put("port", psqlDb.getFirstMappedPort()) .put("database", dbName) + .put("schemas", List.of(SCHEMA_NAME)) .put("username", psqlDb.getUsername()) .put("password", psqlDb.getPassword()) .put("ssl", false) @@ -182,6 +183,7 @@ private JsonNode getConfig(final PostgreSQLContainer psqlDb, final String dbN .put("host", psqlDb.getHost()) .put("port", psqlDb.getFirstMappedPort()) .put("database", dbName) + .put("schemas", List.of(SCHEMA_NAME)) .put("username", user) .put("password", password) .put("ssl", false) diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSpecTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSpecTest.java index 77f34c70f1468..01e08fb8456c2 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSpecTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSpecTest.java @@ -30,6 +30,7 @@ public class PostgresSpecTest { + "\"password\" : \"pwd\", " + "\"username\" : \"postgres\", " + "\"database\" : \"postgres_db\", " + + "\"schemas\" : [\"public\"], " + "\"port\" : 5432, " + "\"host\" : \"localhost\", " + "\"ssl\" : true, " @@ -53,6 +54,13 @@ void testDatabaseMissing() { assertFalse(validator.test(schema, config)); } + @Test + void testSchemaMissing() { + final JsonNode config = Jsons.deserialize(CONFIGURATION); + ((ObjectNode) config).remove("schemas"); + assertTrue(validator.test(schema, config)); + } + @Test void testWithoutReplicationMethod() { final JsonNode config = Jsons.deserialize(CONFIGURATION); diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index 84350062df93e..70b2f10d2dca6 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -15,7 +15,7 @@ | Custom Types | Yes | | | Arrays | Yes | Byte-arrays are not supported yet. | | Generating an RSA Private Key | No | Coming Soon. | -| Schema Selection | No | Track issue [here.](https://github.com/airbytehq/airbyte/issues/1435) | +| Schema Selection | Yes | The 'public' schema is set by default. Multiple schemas may be used at one time. No schemas set explicitly - will sync all of existing. | The Postgres source does not alter the schema present in your database. Depending on the destination connected to this source, however, the schema may be altered. See the destination's documentation for more details. @@ -257,7 +257,8 @@ According to Postgres [documentation](https://www.postgresql.org/docs/14/datatyp | Version | Date | Pull Request | Subject | |:--------|:-----------|:-------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------| -| 0.4.1 | 2022-01-05 | [9116](https://github.com/airbytehq/airbyte/pull/9116) | Added materialized views processing | +| 0.4.2 | 2022-01-13 | [9360](https://github.com/airbytehq/airbyte/pull/9360) | Added schema selection | +| 0.4.1 | 2022-01-05 | [9116](https://github.com/airbytehq/airbyte/pull/9116) | Added materialized views processing | | 0.4.0 | 2021-12-13 | [8726](https://github.com/airbytehq/airbyte/pull/8726) | Support all Postgres types | | 0.3.17 | 2021-12-01 | [8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | | 0.3.16 | 2021-11-28 | [7995](https://github.com/airbytehq/airbyte/pull/7995) | Fixed money type with amount > 1000 | From 9909cd69f240d9ca047c990cf546135ac1dbaf41 Mon Sep 17 00:00:00 2001 From: Serhii Chvaliuk Date: Thu, 13 Jan 2022 16:49:32 +0200 Subject: [PATCH 113/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20HubSpot:=20`eng?= =?UTF-8?q?agements`=20stream,=20remove=20auto-generated=20`properties`=20?= =?UTF-8?q?(#9385)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * extend schemas/engagements.json * fix unix newlines Signed-off-by: Sergey Chvalyuk --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-hubspot/Dockerfile | 2 +- .../source-hubspot/source_hubspot/api.py | 8 +- .../source_hubspot/schemas/engagements.json | 164 ++++++++++++++++++ docs/integrations/sources/hubspot.md | 14 +- 6 files changed, 176 insertions(+), 16 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index d7ea734d5397f..74993fe19c8cb 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -287,7 +287,7 @@ - name: HubSpot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot - dockerImageTag: 0.1.30 + dockerImageTag: 0.1.31 documentationUrl: https://docs.airbyte.io/integrations/sources/hubspot icon: hubspot.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 7024482bc301e..dd09676679a88 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2846,7 +2846,7 @@ path_in_connector_config: - "credentials" - "client_secret" -- dockerImage: "airbyte/source-hubspot:0.1.30" +- dockerImage: "airbyte/source-hubspot:0.1.31" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index c8ddb3ac162e7..b8bd788b20cda 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -34,5 +34,5 @@ COPY source_hubspot ./source_hubspot ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.30 +LABEL io.airbyte.version=0.1.31 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py index 7d02a46eacb52..708836d4d3b80 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py @@ -265,10 +265,7 @@ def _cast_value(declared_field_types: List, field_name: str, field_value: Any, d def _cast_record_fields_if_needed(self, record: Mapping, properties: Mapping[str, Any] = None) -> Mapping: - if self.entity not in {"contact", "engagement", "product", "quote", "ticket", "company", "deal", "line_item"}: - return record - - if not record.get("properties"): + if not self.entity or not record.get("properties"): return record properties = properties or self.properties @@ -363,7 +360,7 @@ def parse_response(self, response: Union[Mapping[str, Any], List[dict]]) -> Iter 'message': 'This hapikey (....) does not have proper permissions! (requires any of [automation-access])', 'correlationId': '111111-2222-3333-4444-55555555555'} """ - logger.warning(f"Stream `{self.entity}` cannot be procced. {response.get('message')}") + logger.warning(f"Stream `{self.name}` cannot be procced. {response.get('message')}") return if response.get(self.data_field) is None: @@ -713,7 +710,6 @@ class EngagementStream(Stream): Docs: https://legacydocs.hubspot.com/docs/methods/engagements/get-all-engagements """ - entity = "engagement" url = "/engagements/v1/engagements/paged" more_key = "hasMore" limit = 250 diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/engagements.json b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/engagements.json index 364649a21081e..d00ba5d6a3423 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/engagements.json +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/engagements.json @@ -5,6 +5,12 @@ "id": { "type": ["null", "integer"] }, + "uid": { + "type": ["null", "string"] + }, + "teamId": { + "type": ["null", "integer"] + }, "portalId": { "type": ["null", "integer"] }, @@ -14,6 +20,12 @@ "createdAt": { "type": ["null", "integer"] }, + "createdBy": { + "type": ["null", "integer"] + }, + "modifiedBy": { + "type": ["null", "integer"] + }, "lastUpdated": { "type": ["null", "integer"] }, @@ -26,6 +38,33 @@ "timestamp": { "type": ["null", "integer"] }, + "bodyPreview": { + "type": ["null", "string"] + }, + "bodyPreviewHtml": { + "type": ["null", "string"] + }, + "bodyPreviewIsTruncated": { + "type": ["null", "boolean"] + }, + "allAccessibleTeamIds": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + }, + "activityType": { + "type": ["null", "string"] + }, + "gdprDeleted": { + "type": ["null", "boolean"] + }, + "source": { + "type": ["null", "string"] + }, + "sourceId": { + "type": ["null", "string"] + }, "associations": { "type": ["null", "object"], "properties": { @@ -46,6 +85,24 @@ "items": { "type": ["null", "integer"] } + }, + "ownerIds": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + }, + "workflowIds": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + }, + "ticketIds": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } } } }, @@ -77,6 +134,17 @@ }, "lastName": { "type": ["null", "string"] + }, + "raw": { + "type": ["null", "string"] + } + } + }, + "sender": { + "type": ["null", "object"], + "properties": { + "email": { + "type": ["null", "string"] } } }, @@ -87,6 +155,15 @@ "properties": { "email": { "type": ["null", "string"] + }, + "firstName": { + "type": ["null", "string"] + }, + "lastName": { + "type": ["null", "string"] + }, + "raw": { + "type": ["null", "string"] } } } @@ -98,6 +175,15 @@ "properties": { "email": { "type": ["null", "string"] + }, + "firstName": { + "type": ["null", "string"] + }, + "lastName": { + "type": ["null", "string"] + }, + "raw": { + "type": ["null", "string"] } } } @@ -157,6 +243,84 @@ }, "disposition": { "type": ["null", "string"] + }, + "completionDate": { + "type": ["null", "integer"] + }, + "taskType": { + "type": ["null", "string"] + }, + "reminders": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + }, + "threadId": { + "type": ["null", "string", "integer"] + }, + "messageId": { + "type": ["null", "string"] + }, + "loggedFrom": { + "type": ["null", "string"] + }, + "attachedVideoOpened": { + "type": ["null", "boolean"] + }, + "attachedVideoWatched": { + "type": ["null", "boolean"] + }, + "trackerKey": { + "type": ["null", "string"] + }, + "sendDefaultReminder": { + "type": ["null", "boolean"] + }, + "source": { + "type": ["null", "string"] + }, + "unknownVisitorConversation": { + "type": ["null", "boolean"] + }, + "facsimileSendId": { + "type": ["null", "string"] + }, + "sentVia": { + "type": ["null", "string"] + }, + "sequenceStepOrder": { + "type": ["null", "integer"] + }, + "externalUrl": { + "type": ["null", "string"] + }, + "postSendStatus": { + "type": ["null", "string"] + }, + "errorMessage": { + "type": ["null", "string"] + }, + "recipientDropReasons": { + "type": ["null", "string"] + }, + "calleeObjectId": { + "type": ["null", "integer"] + }, + "calleeObjectType": { + "type": ["null", "string"] + }, + "mediaProcessingStatus": { + "type": ["null", "string"] + }, + "sourceId": { + "type": ["null", "string"] + }, + "priority": { + "type": ["null", "string"] + }, + "isAllDay": { + "type": ["null", "boolean"] } } } diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index 81c77c50f345f..90ac117be7bf0 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -37,15 +37,15 @@ This source is capable of syncing the following tables and their data: * [Workflows](https://legacydocs.hubspot.com/docs/methods/workflows/v3/get_workflows) ### A note on the `engagements` stream -Objects in the `engagements` stream can have one of the following types: `note`, `email`, `task`, `meeting`, `call`. +Objects in the `engagements` stream can have one of the following types: `note`, `email`, `task`, `meeting`, `call`. -Depending on the type of engagement, different properties will be set for that object in the `engagements_metadata` table in the destination. +Depending on the type of engagement, different properties will be set for that object in the `engagements_metadata` table in the destination. * A `call` engagement will have a corresponding `engagements_metadata` object with non-null values in the `toNumber`, `fromNumber`, `status`, `externalId`, `durationMilliseconds`, `externalAccountId`, `recordingUrl`, `body`, and `disposition` columns. -* An `email` engagement will have a corresponding `engagements_metadata` object with with non-null values in the `subject`, `html`, and `text` columns. In addition, there will be records in four related tables, `engagements_metadata_from`, `engagements_metadata_to`, `engagements_metadata_cc`, `engagements_metadata_bcc`. -* A `meeting` engagement will have a corresponding `engagements_metadata` object with non-null values in the `body`, `startTime`, `endTime`, and `title` columns. -* A `note` engagement will have a corresponding `engagements_metadata` object with non-null values in the `body` column. -* A `task` engagement will have a corresponding `engagements_metadata` object with non-null values in the `body`, `status`, and `forObjectType` columns. +* An `email` engagement will have a corresponding `engagements_metadata` object with with non-null values in the `subject`, `html`, and `text` columns. In addition, there will be records in four related tables, `engagements_metadata_from`, `engagements_metadata_to`, `engagements_metadata_cc`, `engagements_metadata_bcc`. +* A `meeting` engagement will have a corresponding `engagements_metadata` object with non-null values in the `body`, `startTime`, `endTime`, and `title` columns. +* A `note` engagement will have a corresponding `engagements_metadata` object with non-null values in the `body` column. +* A `task` engagement will have a corresponding `engagements_metadata` object with non-null values in the `body`, `status`, and `forObjectType` columns. **Note**: HubSpot API currently only supports `quotes` endpoint using API Key, using Oauth it is impossible to access this stream (as reported by [community.hubspot.com](https://community.hubspot.com/t5/APIs-Integrations/Help-with-using-Feedback-CRM-API-and-Quotes-CRM-API/m-p/449104/highlight/true#M44411)). @@ -110,6 +110,7 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:-----------------------------------------------------------------------------------------------------------------------------------------------| +| 0.1.31 | 2022-01-11 | [9385](https://github.com/airbytehq/airbyte/pull/9385) | Remove auto-generated `properties` from `Engagements` stream | | 0.1.30 | 2021-01-10 | [9129](https://github.com/airbytehq/airbyte/pull/9129) | Created Contacts list memberships streams | | 0.1.29 | 2021-12-17 | [8699](https://github.com/airbytehq/airbyte/pull/8699) | Add incremental sync support for `companies`, `contact_lists`, `contacts`, `deals`, `line_items`, `products`, `quotes`, `tickets` streams | | 0.1.28 | 2021-12-15 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update fields and descriptions | @@ -134,4 +135,3 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt | 0.1.9 | 2021-08-11 | [5334](https://github.com/airbytehq/airbyte/pull/5334) | Fix empty strings inside float datatype | | 0.1.8 | 2021-08-06 | [5250](https://github.com/airbytehq/airbyte/pull/5250) | Fix issue with printing exceptions | | 0.1.7 | 2021-07-27 | [4913](https://github.com/airbytehq/airbyte/pull/4913) | Update fields schema | - From d992ccec993938465b9a840ad1a6e1e8b85b4ae2 Mon Sep 17 00:00:00 2001 From: VitaliiMaltsev <39538064+VitaliiMaltsev@users.noreply.github.com> Date: Thu, 13 Jan 2022 17:09:01 +0200 Subject: [PATCH 114/215] Source Mongo DB: update docker image tag in source_specs,yaml (#9481) * fix for jdk 17 * Source Mongo: update docker image tag in source_specs.yaml Co-authored-by: vmaltsev --- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index dd09676679a88..8a42a527dbb1c 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -4382,7 +4382,7 @@ path_in_connector_config: - "credentials" - "client_secret" -- dockerImage: "airbyte/source-mongodb-v2:0.1.10" +- dockerImage: "airbyte/source-mongodb-v2:0.1.11" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" changelogUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" From a495917d1d2747a0273d2e94d45ee91b1f8ce053 Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Thu, 13 Jan 2022 08:00:13 -0800 Subject: [PATCH 115/215] Only migrate active and disable connection (#9454) Filter out the deleted workflows. Change a log to the right level. --- .../src/main/java/io/airbyte/scheduler/app/SchedulerApp.java | 2 +- .../src/main/java/io/airbyte/server/ServerApp.java | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java index b807255cf9017..00d489b636711 100644 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java +++ b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java @@ -140,7 +140,7 @@ public void start() throws IOException { // anymore. cleanupZombies(jobPersistence, jobNotifier); - LOGGER.error("Start running the old scheduler"); + LOGGER.info("Start running the old scheduler"); scheduleJobsPool.scheduleWithFixedDelay( () -> { MDC.setContextMap(mdc); diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java index 8daabdc64fd26..ba13a25ec28c8 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java @@ -13,6 +13,7 @@ import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs; import io.airbyte.config.EnvConfigs; +import io.airbyte.config.StandardSync.Status; import io.airbyte.config.helpers.LogClientSingleton; import io.airbyte.config.init.YamlSeedConfigPersistence; import io.airbyte.config.persistence.ConfigNotFoundException; @@ -235,7 +236,9 @@ private static void migrateExistingConnection(final ConfigRepository configRepos throws JsonValidationException, ConfigNotFoundException, IOException { LOGGER.info("Start migration to the new scheduler..."); final Set connectionIds = - configRepository.listStandardSyncs().stream().map(standardSync -> standardSync.getConnectionId()).collect(Collectors.toSet()); + configRepository.listStandardSyncs().stream() + .filter(standardSync -> standardSync.getStatus() == Status.ACTIVE || standardSync.getStatus() == Status.INACTIVE) + .map(standardSync -> standardSync.getConnectionId()).collect(Collectors.toSet()); temporalWorkerRunFactory.migrateSyncIfNeeded(connectionIds); LOGGER.info("Done migrating to the new scheduler..."); } From c904b449980d832212a67da0edfe36979a80892f Mon Sep 17 00:00:00 2001 From: Andrii Leonets <30464745+DoNotPanicUA@users.noreply.github.com> Date: Thu, 13 Jan 2022 19:16:20 +0200 Subject: [PATCH 116/215] BigQuery Destination : Fix GCS processing of Facebook data (#9415) * Fix GCS Avro file processing with invalid "-" character * Extend test data to cover the case * incr ver * s3 ver upd * add dependency * add dependency --- .../079d5540-f236-4294-ba7c-ade8fd918496.json | 2 +- .../22f6c74f-5699-40ff-833c-4a879ea40133.json | 2 +- .../4816b78f-1489-44c1-9060-4b19d5fa9362.json | 2 +- .../seed/destination_definitions.yaml | 4 +- .../resources/seed/destination_specs.yaml | 6 +-- .../Dockerfile | 2 +- .../build.gradle | 1 + .../BigQueryDenormalizedTestDataUtils.java | 8 ++-- .../destination-bigquery/Dockerfile | 2 +- .../connectors/destination-s3/Dockerfile | 2 +- .../s3/avro/AvroNameTransformer.java | 6 ++- .../s3/avro/JsonToAvroSchemaConverter.java | 2 +- docs/integrations/destinations/bigquery.md | 28 ++++++------ docs/integrations/destinations/s3.md | 45 ++++++++++--------- 14 files changed, 60 insertions(+), 52 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json index aac58b6e8ee7a..aee3fcbfd47a5 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "079d5540-f236-4294-ba7c-ade8fd918496", "name": "BigQuery (denormalized typed struct)", "dockerRepository": "airbyte/destination-bigquery-denormalized", - "dockerImageTag": "0.2.2", + "dockerImageTag": "0.2.3", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery", "icon": "bigquery.svg" } diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json index d69347400d6f7..e8bfbb235d976 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "22f6c74f-5699-40ff-833c-4a879ea40133", "name": "BigQuery", "dockerRepository": "airbyte/destination-bigquery", - "dockerImageTag": "0.6.2", + "dockerImageTag": "0.6.3", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery", "icon": "bigquery.svg" } diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json index a7e817b4dba75..07e795e2e3889 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "4816b78f-1489-44c1-9060-4b19d5fa9362", "name": "S3", "dockerRepository": "airbyte/destination-s3", - "dockerImageTag": "0.2.2", + "dockerImageTag": "0.2.4", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/s3", "icon": "s3.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 38afff07e973b..2b5cad4c78691 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -13,13 +13,13 @@ - name: BigQuery destinationDefinitionId: 22f6c74f-5699-40ff-833c-4a879ea40133 dockerRepository: airbyte/destination-bigquery - dockerImageTag: 0.6.2 + dockerImageTag: 0.6.3 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery icon: bigquery.svg - name: BigQuery (denormalized typed struct) destinationDefinitionId: 079d5540-f236-4294-ba7c-ade8fd918496 dockerRepository: airbyte/destination-bigquery-denormalized - dockerImageTag: 0.2.2 + dockerImageTag: 0.2.3 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery icon: bigquery.svg - name: Cassandra diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 4ce2d219022de..06520bfa6bcd0 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -188,7 +188,7 @@ supportsDBT: false supported_destination_sync_modes: - "append" -- dockerImage: "airbyte/destination-bigquery:0.6.2" +- dockerImage: "airbyte/destination-bigquery:0.6.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" connectionSpecification: @@ -378,7 +378,7 @@ - "overwrite" - "append" - "append_dedup" -- dockerImage: "airbyte/destination-bigquery-denormalized:0.2.2" +- dockerImage: "airbyte/destination-bigquery-denormalized:0.2.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" connectionSpecification: @@ -3408,7 +3408,7 @@ supported_destination_sync_modes: - "append" - "overwrite" -- dockerImage: "airbyte/destination-s3:0.2.3" +- dockerImage: "airbyte/destination-s3:0.2.4" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/s3" connectionSpecification: diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile index be38d6f1cd9df..45801e0f01138 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-bigquery-denormalized COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.2.2 +LABEL io.airbyte.version=0.2.3 LABEL io.airbyte.name=airbyte/destination-bigquery-denormalized diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/build.gradle b/airbyte-integrations/connectors/destination-bigquery-denormalized/build.gradle index 5b0c1bd57006c..56d35b764ee84 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/build.gradle +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/build.gradle @@ -22,6 +22,7 @@ dependencies { integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-bigquery-denormalized') + integrationTestJavaImplementation files(project(':airbyte-integrations:bases:base-normalization').airbyteDocker.outputs) implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) } diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java index 5d8e044854595..83c18fa4d2f80 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java @@ -29,7 +29,7 @@ public static JsonNode getSchema() { + " \"string\"\n" + " ]\n" + " },\n" - + " \"permissions\": {\n" + + " \"permission-list\": {\n" + " \"type\": [\n" + " \"array\"\n" + " ],\n" @@ -135,7 +135,7 @@ public static JsonNode getSchemaWithInvalidArrayType() { + " \"string\"\n" + " ]\n" + " },\n" - + " \"permissions\": {\n" + + " \"permission-list\": {\n" + " \"type\": [\n" + " \"array\"\n" + " ],\n" @@ -167,7 +167,7 @@ public static JsonNode getData() { "{\n" + " \"name\": \"Andrii\",\n" + " \"accepts_marketing_updated_at\": \"2021-10-11T06:36:53-07:00\",\n" - + " \"permissions\": [\n" + + " \"permission-list\": [\n" + " {\n" + " \"domain\": \"abs\",\n" + " \"grants\": [\n" @@ -266,7 +266,7 @@ public static JsonNode getDataWithEmptyObjectAndArray() { return Jsons.deserialize( "{\n" + " \"name\": \"Andrii\",\n" - + " \"permissions\": [\n" + + " \"permission-list\": [\n" + " {\n" + " \"domain\": \"abs\",\n" + " \"items\": {},\n" // empty object diff --git a/airbyte-integrations/connectors/destination-bigquery/Dockerfile b/airbyte-integrations/connectors/destination-bigquery/Dockerfile index e4bb7588c7ca2..605329e28e0bc 100644 --- a/airbyte-integrations/connectors/destination-bigquery/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-bigquery COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.6.2 +LABEL io.airbyte.version=0.6.3 LABEL io.airbyte.name=airbyte/destination-bigquery diff --git a/airbyte-integrations/connectors/destination-s3/Dockerfile b/airbyte-integrations/connectors/destination-s3/Dockerfile index 74be9eb7dc0e5..7334a212258e6 100644 --- a/airbyte-integrations/connectors/destination-s3/Dockerfile +++ b/airbyte-integrations/connectors/destination-s3/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-s3 COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.2.3 +LABEL io.airbyte.version=0.2.4 LABEL io.airbyte.name=airbyte/destination-s3 diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java index c1dc15a076d14..c39152b1a4fb4 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java @@ -15,7 +15,7 @@ protected String applyDefaultCase(final String input) { @Override public String getIdentifier(final String name) { - return checkFirsCharInStreamName(convertStreamName(name)); + return replaceForbiddenCharacters(checkFirsCharInStreamName(convertStreamName(name))); } private String checkFirsCharInStreamName(final String name) { @@ -26,4 +26,8 @@ private String checkFirsCharInStreamName(final String name) { } } + private String replaceForbiddenCharacters(final String name) { + return name.replace("-", "_"); + } + } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java index 555e4a0bb7c15..7289d15a8d05b 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java @@ -175,7 +175,7 @@ public Schema getAvroSchema(final JsonNode jsonSchema, // Omit the namespace for root level fields, because it is directly assigned in the builder above. // This may not be the correct choice. ? null - : (fieldNamespace == null ? fieldName : (fieldNamespace + "." + fieldName)); + : (fieldNamespace == null ? stdName : (fieldNamespace + "." + stdName)); fieldBuilder.type(parseJsonField(subfieldName, subfieldNamespace, subfieldDefinition, appendExtraProps, addStringToLogicalTypes)) .withDefault(null); } diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index 67b13f5e79d57..3a72ac40c3365 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -153,6 +153,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | |:--------| :--- | :--- | :--- | +| 0.6.3 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | | 0.6.2 | 2022-01-10 | [\#9121](https://github.com/airbytehq/airbyte/pull/9121) | Fixed check method for GCS mode to verify if all roles assigned to user | | 0.6.1 | 2021-12-22 | [\#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration to UI for GCS staging | | 0.6.0 | 2021-12-17 | [\#8788](https://github.com/airbytehq/airbyte/issues/8788) | BigQuery/BiqQuery denorm Destinations : Add possibility to use different types of GCS files | @@ -169,21 +170,22 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into ### bigquery-denormalized -| Version | Date | Pull Request | Subject | -|:--------| :--- | :--- | :--- | -| 0.2.2 | 2021-12-22 | [\#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration to UI for GCS staging | -| 0.2.1 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | -| 0.2.0 | 2021-12-17 | [\#8788](https://github.com/airbytehq/airbyte/pull/8788) | BigQuery/BiqQuery denorm Destinations : Add possibility to use different types of GCS files | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:-----------------------------------------------------------| :--- | +| 0.2.3 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | +| 0.2.2 | 2021-12-22 | [\#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration to UI for GCS staging | +| 0.2.1 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | +| 0.2.0 | 2021-12-17 | [\#8788](https://github.com/airbytehq/airbyte/pull/8788) | BigQuery/BiqQuery denorm Destinations : Add possibility to use different types of GCS files | | 0.1.11 | 2021-12-16 | [\#8816](https://github.com/airbytehq/airbyte/issues/8816) | Update dataset locations | -| 0.1.10 | 2021-11-09 | [\#7804](https://github.com/airbytehq/airbyte/pull/7804) | handle null values in fields described by a $ref definition | +| 0.1.10 | 2021-11-09 | [\#7804](https://github.com/airbytehq/airbyte/pull/7804) | handle null values in fields described by a $ref definition | | 0.1.9 | 2021-11-08 | [\#7736](https://github.com/airbytehq/airbyte/issues/7736) | Fixed the handling of ObjectNodes with $ref definition key | | 0.1.8 | 2021-10-27 | [\#7413](https://github.com/airbytehq/airbyte/issues/7413) | Fixed DATETIME conversion for BigQuery | | 0.1.7 | 2021-10-26 | [\#7240](https://github.com/airbytehq/airbyte/issues/7240) | Output partitioned/clustered tables | -| 0.1.6 | 2021-09-16 | [\#6145](https://github.com/airbytehq/airbyte/pull/6145) | BigQuery Denormalized support for date, datetime & timestamp types through the json "format" key | -| 0.1.5 | 2021-09-07 | [\#5881](https://github.com/airbytehq/airbyte/pull/5881) | BigQuery Denormalized NPE fix | -| 0.1.4 | 2021-09-04 | [\#5813](https://github.com/airbytehq/airbyte/pull/5813) | fix Stackoverflow error when receive a schema from source where "Array" type doesn't contain a required "items" element | -| 0.1.3 | 2021-08-07 | [\#5261](https://github.com/airbytehq/airbyte/pull/5261) | 🐛 Destination BigQuery\(Denormalized\): Fix processing arrays of records | -| 0.1.2 | 2021-07-30 | [\#5125](https://github.com/airbytehq/airbyte/pull/5125) | Enable `additionalPropertities` in spec.json | -| 0.1.1 | 2021-06-21 | [\#3555](https://github.com/airbytehq/airbyte/pull/3555) | Partial Success in BufferedStreamConsumer | -| 0.1.0 | 2021-06-21 | [\#4176](https://github.com/airbytehq/airbyte/pull/4176) | Destination using Typed Struct and Repeated fields | +| 0.1.6 | 2021-09-16 | [\#6145](https://github.com/airbytehq/airbyte/pull/6145) | BigQuery Denormalized support for date, datetime & timestamp types through the json "format" key | +| 0.1.5 | 2021-09-07 | [\#5881](https://github.com/airbytehq/airbyte/pull/5881) | BigQuery Denormalized NPE fix | +| 0.1.4 | 2021-09-04 | [\#5813](https://github.com/airbytehq/airbyte/pull/5813) | fix Stackoverflow error when receive a schema from source where "Array" type doesn't contain a required "items" element | +| 0.1.3 | 2021-08-07 | [\#5261](https://github.com/airbytehq/airbyte/pull/5261) | 🐛 Destination BigQuery\(Denormalized\): Fix processing arrays of records | +| 0.1.2 | 2021-07-30 | [\#5125](https://github.com/airbytehq/airbyte/pull/5125) | Enable `additionalPropertities` in spec.json | +| 0.1.1 | 2021-06-21 | [\#3555](https://github.com/airbytehq/airbyte/pull/3555) | Partial Success in BufferedStreamConsumer | +| 0.1.0 | 2021-06-21 | [\#4176](https://github.com/airbytehq/airbyte/pull/4176) | Destination using Typed Struct and Repeated fields | diff --git a/docs/integrations/destinations/s3.md b/docs/integrations/destinations/s3.md index 79a73bb120eb8..d9ac12b8c1fcb 100644 --- a/docs/integrations/destinations/s3.md +++ b/docs/integrations/destinations/s3.md @@ -222,26 +222,27 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A ## CHANGELOG | Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.2.3 | 2022-01-11 | [\#9367](https://github.com/airbytehq/airbyte/pull/9367) | Avro & Parquet: support array field with unknown item type; default any improperly typed field to string. | -| 0.2.2 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | -| 0.2.1 | 2021-12-20 | [\#8974](https://github.com/airbytehq/airbyte/pull/8974) | Release a new version to ensure there is no excessive logging. | -| 0.2.0 | 2021-12-15 | [\#8607](https://github.com/airbytehq/airbyte/pull/8607) | Change the output filename for CSV files - it's now `bucketPath/namespace/streamName/timestamp_epochMillis_randomUuid.csv` | -| 0.1.16 | 2021-12-10 | [\#8562](https://github.com/airbytehq/airbyte/pull/8562) | Swap dependencies with destination-jdbc. | -| 0.1.15 | 2021-12-03 | [\#8501](https://github.com/airbytehq/airbyte/pull/8501) | Remove excessive logging for Avro and Parquet invalid date strings. | -| 0.1.14 | 2021-11-09 | [\#7732](https://github.com/airbytehq/airbyte/pull/7732) | Support timestamp in Avro and Parquet | -| 0.1.13 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | -| 0.1.12 | 2021-09-13 | [\#5720](https://github.com/airbytehq/airbyte/issues/5720) | Added configurable block size for stream. Each stream is limited to 10,000 by S3 | -| 0.1.11 | 2021-09-10 | [\#5729](https://github.com/airbytehq/airbyte/pull/5729) | For field names that start with a digit, a `_` will be appended at the beginning for the`Parquet` and `Avro` formats. | -| 0.1.10 | 2021-08-17 | [\#4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | -| 0.1.9 | 2021-07-12 | [\#4666](https://github.com/airbytehq/airbyte/pull/4666) | Fix MinIO output for Parquet format. | -| 0.1.8 | 2021-07-07 | [\#4613](https://github.com/airbytehq/airbyte/pull/4613) | Patched schema converter to support combined restrictions. | -| 0.1.7 | 2021-06-23 | [\#4227](https://github.com/airbytehq/airbyte/pull/4227) | Added Avro and JSONL output. | -| 0.1.6 | 2021-06-16 | [\#4130](https://github.com/airbytehq/airbyte/pull/4130) | Patched the check to verify prefix access instead of full-bucket access. | -| 0.1.5 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Fixed default `max_padding_size_mb` in `spec.json`. | -| 0.1.4 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Added Parquet output. | -| 0.1.3 | 2021-06-13 | [\#4038](https://github.com/airbytehq/airbyte/pull/4038) | Added support for alternative S3. | -| 0.1.2 | 2021-06-10 | [\#4029](https://github.com/airbytehq/airbyte/pull/4029) | Fixed `_airbyte_emitted_at` field to be a UTC instead of local timestamp for consistency. | -| 0.1.1 | 2021-06-09 | [\#3973](https://github.com/airbytehq/airbyte/pull/3973) | Added `AIRBYTE_ENTRYPOINT` in base Docker image for Kubernetes support. | -| 0.1.0 | 2021-06-03 | [\#3672](https://github.com/airbytehq/airbyte/pull/3672) | Initial release with CSV output. | +|:--------| :--- | :--- | :--- | +| 0.2.4 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | +| 0.2.3 | 2022-01-11 | [\#9367](https://github.com/airbytehq/airbyte/pull/9367) | Avro & Parquet: support array field with unknown item type; default any improperly typed field to string. | +| 0.2.2 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | +| 0.2.1 | 2021-12-20 | [\#8974](https://github.com/airbytehq/airbyte/pull/8974) | Release a new version to ensure there is no excessive logging. | +| 0.2.0 | 2021-12-15 | [\#8607](https://github.com/airbytehq/airbyte/pull/8607) | Change the output filename for CSV files - it's now `bucketPath/namespace/streamName/timestamp_epochMillis_randomUuid.csv` | +| 0.1.16 | 2021-12-10 | [\#8562](https://github.com/airbytehq/airbyte/pull/8562) | Swap dependencies with destination-jdbc. | +| 0.1.15 | 2021-12-03 | [\#8501](https://github.com/airbytehq/airbyte/pull/8501) | Remove excessive logging for Avro and Parquet invalid date strings. | +| 0.1.14 | 2021-11-09 | [\#7732](https://github.com/airbytehq/airbyte/pull/7732) | Support timestamp in Avro and Parquet | +| 0.1.13 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | +| 0.1.12 | 2021-09-13 | [\#5720](https://github.com/airbytehq/airbyte/issues/5720) | Added configurable block size for stream. Each stream is limited to 10,000 by S3 | +| 0.1.11 | 2021-09-10 | [\#5729](https://github.com/airbytehq/airbyte/pull/5729) | For field names that start with a digit, a `_` will be appended at the beginning for the`Parquet` and `Avro` formats. | +| 0.1.10 | 2021-08-17 | [\#4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | +| 0.1.9 | 2021-07-12 | [\#4666](https://github.com/airbytehq/airbyte/pull/4666) | Fix MinIO output for Parquet format. | +| 0.1.8 | 2021-07-07 | [\#4613](https://github.com/airbytehq/airbyte/pull/4613) | Patched schema converter to support combined restrictions. | +| 0.1.7 | 2021-06-23 | [\#4227](https://github.com/airbytehq/airbyte/pull/4227) | Added Avro and JSONL output. | +| 0.1.6 | 2021-06-16 | [\#4130](https://github.com/airbytehq/airbyte/pull/4130) | Patched the check to verify prefix access instead of full-bucket access. | +| 0.1.5 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Fixed default `max_padding_size_mb` in `spec.json`. | +| 0.1.4 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Added Parquet output. | +| 0.1.3 | 2021-06-13 | [\#4038](https://github.com/airbytehq/airbyte/pull/4038) | Added support for alternative S3. | +| 0.1.2 | 2021-06-10 | [\#4029](https://github.com/airbytehq/airbyte/pull/4029) | Fixed `_airbyte_emitted_at` field to be a UTC instead of local timestamp for consistency. | +| 0.1.1 | 2021-06-09 | [\#3973](https://github.com/airbytehq/airbyte/pull/3973) | Added `AIRBYTE_ENTRYPOINT` in base Docker image for Kubernetes support. | +| 0.1.0 | 2021-06-03 | [\#3672](https://github.com/airbytehq/airbyte/pull/3672) | Initial release with CSV output. | From 71f5b1cd71e2277aee3abf44f52a4d894b5aeb50 Mon Sep 17 00:00:00 2001 From: Augustin Date: Thu, 13 Jan 2022 19:15:34 +0100 Subject: [PATCH 117/215] =?UTF-8?q?=F0=9F=8E=89=20Destination=20DynamoDB:?= =?UTF-8?q?=20rename=20`dynamodb=5Ftable=5Fname`=20to=20`dynamodb=5Ftable?= =?UTF-8?q?=5Fname=5Fprefix`=20(#9314)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../8ccd8909-4e99-4141-b48d-4984b70b2d89.json | 2 +- .../connectors/destination-dynamodb/Dockerfile | 2 +- .../destination-dynamodb/sample_secrets/config.json | 2 +- .../destination/dynamodb/DynamodbChecker.java | 2 +- .../dynamodb/DynamodbDestinationConfig.java | 12 ++++++------ .../dynamodb/DynamodbOutputTableHelper.java | 10 +++++----- .../destination/dynamodb/DynamodbWriter.java | 2 +- .../src/main/resources/spec.json | 8 ++++---- .../dynamodb/DynamodbDestinationAcceptanceTest.java | 4 ++-- .../dynamodb/DynamodbDestinationTest.java | 4 ++-- docs/integrations/destinations/dynamodb.md | 1 + 11 files changed, 25 insertions(+), 24 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/8ccd8909-4e99-4141-b48d-4984b70b2d89.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/8ccd8909-4e99-4141-b48d-4984b70b2d89.json index 67f4c1be3b7d6..574718d34ab70 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/8ccd8909-4e99-4141-b48d-4984b70b2d89.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/8ccd8909-4e99-4141-b48d-4984b70b2d89.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "8ccd8909-4e99-4141-b48d-4984b70b2d89", "name": "DynamoDB", "dockerRepository": "airbyte/destination-dynamodb", - "dockerImageTag": "0.1.0", + "dockerImageTag": "0.1.1", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/dynamodb", "icon": "dynamodb.svg" } diff --git a/airbyte-integrations/connectors/destination-dynamodb/Dockerfile b/airbyte-integrations/connectors/destination-dynamodb/Dockerfile index 28fbe4e9bb4a7..1f4543e8b882b 100644 --- a/airbyte-integrations/connectors/destination-dynamodb/Dockerfile +++ b/airbyte-integrations/connectors/destination-dynamodb/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-dynamodb COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/destination-dynamodb diff --git a/airbyte-integrations/connectors/destination-dynamodb/sample_secrets/config.json b/airbyte-integrations/connectors/destination-dynamodb/sample_secrets/config.json index 580e6fad531b6..f7b1bf52cfa07 100644 --- a/airbyte-integrations/connectors/destination-dynamodb/sample_secrets/config.json +++ b/airbyte-integrations/connectors/destination-dynamodb/sample_secrets/config.json @@ -1,5 +1,5 @@ { - "dynamodb_table_name": "paste-table-name-here", + "dynamodb_table_name_prefix": "paste-table-name-here", "dynamodb_region": "paste-dynamodb-region-here", "access_key_id": "paste-access-key-id-here", "secret_access_key": "paste-secret-access-key-here" diff --git a/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbChecker.java b/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbChecker.java index 8819cc618c1ee..9a3bb3603b347 100644 --- a/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbChecker.java +++ b/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbChecker.java @@ -23,7 +23,7 @@ public class DynamodbChecker { private static final Logger LOGGER = LoggerFactory.getLogger(DynamodbChecker.class); public static void attemptDynamodbWriteAndDelete(final DynamodbDestinationConfig dynamodbDestinationConfig) throws Exception { - final var prefix = dynamodbDestinationConfig.getTableName(); + final var prefix = dynamodbDestinationConfig.getTableNamePrefix(); final String outputTableName = prefix + "_airbyte_connection_test_" + UUID.randomUUID().toString().replaceAll("-", ""); attemptWriteAndDeleteDynamodbItem(dynamodbDestinationConfig, outputTableName); } diff --git a/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationConfig.java b/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationConfig.java index f69878b44b8ab..9e3823fe9e379 100644 --- a/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationConfig.java +++ b/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationConfig.java @@ -9,19 +9,19 @@ public class DynamodbDestinationConfig { private final String endpoint; - private final String tableName; + private final String tableNamePrefix; private final String accessKeyId; private final String secretAccessKey; private final String region; public DynamodbDestinationConfig( final String endpoint, - final String tableName, + final String tableNamePrefix, final String region, final String accessKeyId, final String secretAccessKey) { this.endpoint = endpoint; - this.tableName = tableName; + this.tableNamePrefix = tableNamePrefix; this.region = region; this.accessKeyId = accessKeyId; this.secretAccessKey = secretAccessKey; @@ -30,7 +30,7 @@ public DynamodbDestinationConfig( public static DynamodbDestinationConfig getDynamodbDestinationConfig(final JsonNode config) { return new DynamodbDestinationConfig( config.get("dynamodb_endpoint") == null ? "" : config.get("dynamodb_endpoint").asText(), - config.get("dynamodb_table_name").asText(), + config.get("dynamodb_table_name_prefix").asText(), config.get("dynamodb_region").asText(), config.get("access_key_id").asText(), config.get("secret_access_key").asText()); @@ -52,8 +52,8 @@ public String getRegion() { return region; } - public String getTableName() { - return tableName; + public String getTableNamePrefix() { + return tableNamePrefix; } } diff --git a/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbOutputTableHelper.java b/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbOutputTableHelper.java index 6709a56ae8235..abdf071917986 100644 --- a/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbOutputTableHelper.java +++ b/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbOutputTableHelper.java @@ -11,15 +11,15 @@ public class DynamodbOutputTableHelper { - public static String getOutputTableName(final String tableName, final AirbyteStream stream) { - return getOutputTableName(tableName, stream.getNamespace(), stream.getName()); + public static String getOutputTableName(final String tableNamePrefix, final AirbyteStream stream) { + return getOutputTableName(tableNamePrefix, stream.getNamespace(), stream.getName()); } - public static String getOutputTableName(final String tableName, final String namespace, final String streamName) { + public static String getOutputTableName(final String tableNamePrefix, final String namespace, final String streamName) { final List paths = new LinkedList<>(); - if (tableName != null) { - paths.add(tableName); + if (tableNamePrefix != null) { + paths.add(tableNamePrefix); } if (namespace != null) { paths.add(new ExtendedNameTransformer().convertStreamName(namespace)); diff --git a/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbWriter.java b/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbWriter.java index 5c2ca3e798c9d..9f116de2d2c73 100644 --- a/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbWriter.java +++ b/airbyte-integrations/connectors/destination-dynamodb/src/main/java/io/airbyte/integrations/destination/dynamodb/DynamodbWriter.java @@ -46,7 +46,7 @@ public DynamodbWriter(final DynamodbDestinationConfig config, this.dynamodb = new DynamoDB(amazonDynamodb); this.configuredStream = configuredStream; this.uploadTimestamp = uploadTimestamp; - this.outputTableName = DynamodbOutputTableHelper.getOutputTableName(config.getTableName(), configuredStream.getStream()); + this.outputTableName = DynamodbOutputTableHelper.getOutputTableName(config.getTableNamePrefix(), configuredStream.getStream()); final DestinationSyncMode syncMode = configuredStream.getDestinationSyncMode(); if (syncMode == null) { diff --git a/airbyte-integrations/connectors/destination-dynamodb/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-dynamodb/src/main/resources/spec.json index 87e9218a72605..5463149917e1d 100644 --- a/airbyte-integrations/connectors/destination-dynamodb/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-dynamodb/src/main/resources/spec.json @@ -9,7 +9,7 @@ "title": "DynamoDB Destination Spec", "type": "object", "required": [ - "dynamodb_table_name", + "dynamodb_table_name_prefix", "dynamodb_region", "access_key_id", "secret_access_key" @@ -23,10 +23,10 @@ "description": "This is your DynamoDB endpoint url.(if you are working with AWS DynamoDB, just leave empty).", "examples": ["http://localhost:9000"] }, - "dynamodb_table_name": { - "title": "DynamoDB Table Name", + "dynamodb_table_name_prefix": { + "title": "Table name prefix", "type": "string", - "description": "The name of the DynamoDB table.", + "description": "The prefix to use when naming DynamoDB tables.", "examples": ["airbyte_sync"] }, "dynamodb_region": { diff --git a/airbyte-integrations/connectors/destination-dynamodb/src/test-integration/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-dynamodb/src/test-integration/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationAcceptanceTest.java index b08b405ac52b0..122e44f118f02 100644 --- a/airbyte-integrations/connectors/destination-dynamodb/src/test-integration/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-dynamodb/src/test-integration/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationAcceptanceTest.java @@ -66,7 +66,7 @@ protected JsonNode getFailCheckConfig() { */ protected List getAllSyncedObjects(final String streamName, final String namespace) { final var dynamodb = new DynamoDB(this.client); - final var tableName = DynamodbOutputTableHelper.getOutputTableName(this.config.getTableName(), streamName, namespace); + final var tableName = DynamodbOutputTableHelper.getOutputTableName(this.config.getTableNamePrefix(), streamName, namespace); final var table = dynamodb.getTable(tableName); final List items = new ArrayList(); final List resultItems = new ArrayList(); @@ -148,7 +148,7 @@ protected void tearDown(final TestDestinationEnv testEnv) { final var dynamodb = new DynamoDB(this.client); final List tables = new ArrayList(); dynamodb.listTables().forEach(o -> { - if (o.getTableName().startsWith(this.config.getTableName())) + if (o.getTableName().startsWith(this.config.getTableNamePrefix())) tables.add(o.getTableName()); }); diff --git a/airbyte-integrations/connectors/destination-dynamodb/src/test/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationTest.java b/airbyte-integrations/connectors/destination-dynamodb/src/test/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationTest.java index 1e2631e4c842f..ceff78a832e0b 100644 --- a/airbyte-integrations/connectors/destination-dynamodb/src/test/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationTest.java +++ b/airbyte-integrations/connectors/destination-dynamodb/src/test/java/io/airbyte/integrations/destination/dynamodb/DynamodbDestinationTest.java @@ -31,14 +31,14 @@ void testGetOutputTableNameWithStream() throws Exception { @Test void testGetDynamodbDestinationdbConfig() throws Exception { final JsonNode json = Jsons.deserialize("{\n" + - " \"dynamodb_table_name\": \"test_table\",\n" + + " \"dynamodb_table_name_prefix\": \"test_table\",\n" + " \"dynamodb_region\": \"test_region\",\n" + " \"access_key_id\": \"test_key_id\",\n" + " \"secret_access_key\": \"test_access_key\"\n" + "}"); final var config = DynamodbDestinationConfig.getDynamodbDestinationConfig(json); - assertEquals(config.getTableName(), "test_table"); + assertEquals(config.getTableNamePrefix(), "test_table"); assertEquals(config.getRegion(), "test_region"); assertEquals(config.getAccessKeyId(), "test_key_id"); assertEquals(config.getSecretAccessKey(), "test_access_key"); diff --git a/docs/integrations/destinations/dynamodb.md b/docs/integrations/destinations/dynamodb.md index 5e01c7e43a22a..a151a3dbca759 100644 --- a/docs/integrations/destinations/dynamodb.md +++ b/docs/integrations/destinations/dynamodb.md @@ -58,5 +58,6 @@ This connector by default uses 10 capacity units for both Read and Write in Dyna | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.1 | 2022-12-05 | [\#9314](https://github.com/airbytehq/airbyte/pull/9314) | Rename dynamo_db_table_name to dynamo_db_table_name_prefix. | | 0.1.0 | 2021-08-20 | [\#5561](https://github.com/airbytehq/airbyte/pull/5561) | Initial release. | From 0d55835d6668e74c4ff71b2a2394b20645d5ece2 Mon Sep 17 00:00:00 2001 From: michaeljguarino Date: Thu, 13 Jan 2022 16:27:51 -0500 Subject: [PATCH 118/215] Polish docs for installing airbyte on Plural (#9461) * Polish docs for installing airbyte on Plural * addressed Abhi's comments --- docs/deploying-airbyte/on-plural.md | 33 ++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/docs/deploying-airbyte/on-plural.md b/docs/deploying-airbyte/on-plural.md index f6ea39b3033ad..708dc2d40d748 100644 --- a/docs/deploying-airbyte/on-plural.md +++ b/docs/deploying-airbyte/on-plural.md @@ -6,8 +6,9 @@ Plural is a unified application deployment platform that makes it easy to run op ## Getting Started -First, install Plural and the Plural CLI by following steps 1, 2, and 3 of the instructions [here](https://docs.plural.sh/getting-started). Through this, you will also configure your cloud provider and the domain name under which your -application will be deployed to. +First, create an account on https://app.plural.sh. This is simply to track your installations and allow for the delivery of automated upgrades, you will not be asked to provide any infrastructure credentials or sensitive information. + +Then, install the Plural CLI by following steps 1, 2, and 3 of the instructions [here](https://docs.plural.sh/getting-started). Through this, you will also configure your cloud provider and the domain name under which your application will be deployed to. Then create a fresh Git repo to store your Plural installation and from within the repo, run: @@ -25,19 +26,41 @@ To install Airbyte on your Plural repo, simply run: plural bundle install airbyte airbyte-aws ``` +Plural's Airbyte distribution currently has support for AWS, GCP and Azure set up and ready to go, so feel free to pick whichever best fits your infrastructure. + The CLI will prompt you to choose whether or not you want to use Plural OIDC, which means you're using Plural as your identity provider for SSO. After this, run: ```bash plural build -plural deploy --commit "Initial Deploy." +plural deploy --commit "deploying airbyte" +``` + +## Adding the Plural Console + +To make management of your installation as simple as possible, we recommend installing the Plural Console. The console provides tools to manage resource scaling, receiving automated upgrades and getting out-of-the-box dashboarding and log aggregation. This can be done using the exact same process as above: + +```bash +plural bundle install console console-aws +plural build +plural deploy --commit "deploying the console too" ``` ## Accessing your Airbyte Installation Now, just head over to airbyte.SUBDOMAIN_NAME.onplural.sh to access the Airbyte UI. -## Monitoring your Installation +## Accessing your Console Installation + +To monitor and manage your Airbyte installation, head over to the Plural Console at console.YOUR_ORGANIZATION.onplural.sh (or whichever subdomain you chose). + +## Troubleshooting + +If you have any issues with installing Airbyte on Plural, feel free to jump into our [discord](https://discord.gg/bEBAMXV64s) and we can help you out. + +If you'd like to request any new features for our Airbyte install, feel free to open an issue or PR at https://github.com/pluralsh/plural-artifacts. + +## Further Reading -To monitor and manage your Airbyte installation, head over to the Plural control panel at console.YOUR_ORGANIZATION.onplural.sh. +To learn more about what you can do with Plural and more advanced uses of the platform, feel free to dive deeper into our docs [here](https://docs.plural.sh) From 5f4a5bb59324a3291a70e253e7f9eed43fc0a5b7 Mon Sep 17 00:00:00 2001 From: Artem Astapenko <3767150+Jamakase@users.noreply.github.com> Date: Fri, 14 Jan 2022 02:07:10 +0300 Subject: [PATCH 119/215] Add workspaceId to intercom custom attributes (#9489) * Add workspaceId to intercom custom attributes * Rename to workspace_id --- .../services/thirdParty/intercom/index.tsx | 1 + .../thirdParty/intercom/useIntercom.ts | 30 +++++++++++++++---- .../CreditsPage/components/CreditsTitle.tsx | 3 +- .../cloud/views/layout/SideBar/SideBar.tsx | 8 ++--- .../services/workspaces/WorkspacesService.tsx | 11 +++++-- 5 files changed, 39 insertions(+), 14 deletions(-) create mode 100644 airbyte-webapp/src/packages/cloud/services/thirdParty/intercom/index.tsx diff --git a/airbyte-webapp/src/packages/cloud/services/thirdParty/intercom/index.tsx b/airbyte-webapp/src/packages/cloud/services/thirdParty/intercom/index.tsx new file mode 100644 index 0000000000000..b4190eea53d3c --- /dev/null +++ b/airbyte-webapp/src/packages/cloud/services/thirdParty/intercom/index.tsx @@ -0,0 +1 @@ +export * from "./useIntercom"; diff --git a/airbyte-webapp/src/packages/cloud/services/thirdParty/intercom/useIntercom.ts b/airbyte-webapp/src/packages/cloud/services/thirdParty/intercom/useIntercom.ts index f60243dd91e5a..b91df12718b0d 100644 --- a/airbyte-webapp/src/packages/cloud/services/thirdParty/intercom/useIntercom.ts +++ b/airbyte-webapp/src/packages/cloud/services/thirdParty/intercom/useIntercom.ts @@ -1,20 +1,40 @@ import { useEffect } from "react"; -import { useIntercom as useIntercomProvider } from "react-use-intercom"; +import { + useIntercom as useIntercomProvider, + IntercomContextValues, +} from "react-use-intercom"; import { useCurrentUser } from "packages/cloud/services/auth/AuthService"; +import { useAnalytics } from "hooks/services/Analytics"; + +export const useIntercom = (): IntercomContextValues => { + const intercomContextValues = useIntercomProvider(); -export const useIntercom = (): void => { const user = useCurrentUser(); - const { boot, shutdown } = useIntercomProvider(); + const { analyticsContext } = useAnalytics(); useEffect(() => { - boot({ + intercomContextValues.boot({ email: user.email, name: user.name, userId: user.userId, userHash: user.intercomHash, + + customAttributes: { + workspace_id: analyticsContext.workspaceId, + }, }); - return () => shutdown(); + return () => intercomContextValues.shutdown(); }, [user]); + + useEffect(() => { + intercomContextValues.update({ + customAttributes: { + workspace_id: analyticsContext.workspace_id, + }, + }); + }, [analyticsContext.workspace_id]); + + return intercomContextValues; }; diff --git a/airbyte-webapp/src/packages/cloud/views/credits/CreditsPage/components/CreditsTitle.tsx b/airbyte-webapp/src/packages/cloud/views/credits/CreditsPage/components/CreditsTitle.tsx index 6b85ee0009764..3e05a421e1fe6 100644 --- a/airbyte-webapp/src/packages/cloud/views/credits/CreditsPage/components/CreditsTitle.tsx +++ b/airbyte-webapp/src/packages/cloud/views/credits/CreditsPage/components/CreditsTitle.tsx @@ -1,6 +1,7 @@ import React from "react"; import { FormattedMessage } from "react-intl"; -import { useIntercom } from "react-use-intercom"; + +import { useIntercom } from "packages/cloud/services/thirdParty/intercom"; import PageTitle from "components/PageTitle"; import { Button } from "components/base"; diff --git a/airbyte-webapp/src/packages/cloud/views/layout/SideBar/SideBar.tsx b/airbyte-webapp/src/packages/cloud/views/layout/SideBar/SideBar.tsx index 82fe147a53f3e..a6384e2047c89 100644 --- a/airbyte-webapp/src/packages/cloud/views/layout/SideBar/SideBar.tsx +++ b/airbyte-webapp/src/packages/cloud/views/layout/SideBar/SideBar.tsx @@ -4,7 +4,8 @@ import { FormattedMessage, FormattedNumber } from "react-intl"; import { NavLink } from "react-router-dom"; import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; import { faStar } from "@fortawesome/free-solid-svg-icons"; -import { useIntercom } from "react-use-intercom"; + +import { useIntercom } from "packages/cloud/services/thirdParty/intercom"; import { CloudRoutes } from "packages/cloud/cloudRoutes"; @@ -26,10 +27,7 @@ import ResourcesPopup, { Item, } from "views/layout/SideBar/components/ResourcesPopup"; import { RoutePaths } from "pages/routes"; -import { - FeatureItem, - WithFeature, -} from "../../../../../hooks/services/Feature"; +import { FeatureItem, WithFeature } from "hooks/services/Feature"; const CreditsIcon = styled(FontAwesomeIcon)` font-size: 21px; diff --git a/airbyte-webapp/src/services/workspaces/WorkspacesService.tsx b/airbyte-webapp/src/services/workspaces/WorkspacesService.tsx index f6484fb7df244..7b85f2b3b5453 100644 --- a/airbyte-webapp/src/services/workspaces/WorkspacesService.tsx +++ b/airbyte-webapp/src/services/workspaces/WorkspacesService.tsx @@ -66,12 +66,17 @@ export const useWorkspaceService = (): Context => { return workspaceService; }; -export const useCurrentWorkspace = (): Workspace => { +export const useCurrentWorkspaceId = (): string => { const { params } = useRouter(); - const { workspaceId } = params; + + return params.workspaceId; +}; + +export const useCurrentWorkspace = (): Workspace => { + const workspaceId = useCurrentWorkspaceId(); return useResource(WorkspaceResource.detailShape(), { - workspaceId: workspaceId, + workspaceId, }); }; From 9cc2560935129316820db90f35d7bf7c1cc76b63 Mon Sep 17 00:00:00 2001 From: gergelylendvai <47741829+gergelylendvai@users.noreply.github.com> Date: Fri, 14 Jan 2022 01:06:51 +0100 Subject: [PATCH 120/215] Source Hubspot: adding form_submissions stream (#8011) * Source Hubspot: adding form_submissions stream * Adding form_id to form_submissions output * Adding form_id to form_submissions output and schema * Removing field deletions and id generation * Adding new line to form_submissions.json * Fixing form_submissions.json schema * add form_submissions as empty_stream to run ci airbyte * add forms_submissions to empty_stream * bump connector seed version Co-authored-by: Marcos Marx --- .../36c891d9-4bd9-43ac-bad2-10e12756272c.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-hubspot/Dockerfile | 2 +- .../source-hubspot/acceptance-test-config.yml | 4 +-- .../sample_files/configured_catalog.json | 9 +++++ .../configured_catalog_for_oauth_config.json | 9 +++++ .../sample_files/full_refresh_catalog.json | 9 +++++ .../source-hubspot/source_hubspot/api.py | 28 +++++++++++++++ .../source-hubspot/source_hubspot/client.py | 2 ++ .../schemas/form_submissions.json | 35 +++++++++++++++++++ docs/integrations/sources/hubspot.md | 1 + 12 files changed, 99 insertions(+), 6 deletions(-) create mode 100644 airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/form_submissions.json diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json index 5e76551413b44..b307efc884257 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "36c891d9-4bd9-43ac-bad2-10e12756272c", "name": "HubSpot", "dockerRepository": "airbyte/source-hubspot", - "dockerImageTag": "0.1.30", + "dockerImageTag": "0.1.32", "documentationUrl": "https://docs.airbyte.io/integrations/sources/hubspot", "icon": "hubspot.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 74993fe19c8cb..17551d21c8e43 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -287,7 +287,7 @@ - name: HubSpot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot - dockerImageTag: 0.1.31 + dockerImageTag: 0.1.32 documentationUrl: https://docs.airbyte.io/integrations/sources/hubspot icon: hubspot.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 8a42a527dbb1c..a95b9511c84db 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2846,7 +2846,7 @@ path_in_connector_config: - "credentials" - "client_secret" -- dockerImage: "airbyte/source-hubspot:0.1.31" +- dockerImage: "airbyte/source-hubspot:0.1.32" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index b8bd788b20cda..e51182b850dd4 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -34,5 +34,5 @@ COPY source_hubspot ./source_hubspot ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.31 +LABEL io.airbyte.version=0.1.32 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml b/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml index d10181d818662..7a320596a7ca1 100644 --- a/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml @@ -18,14 +18,14 @@ tests: basic_read: - config_path: "secrets/config.json" configured_catalog_path: "sample_files/full_refresh_catalog.json" - empty_streams: ["workflows"] + empty_streams: ["workflows", "form_submissions"] - config_path: "secrets/config_oauth.json" configured_catalog_path: "sample_files/configured_catalog_for_oauth_config.json" # The `campaigns` stream is empty in this case, because we use a catalog with # incremental streams: subscription_changes and email_events (it takes a long time to read) # and therefore the start date is set at 2021-10-10 for `config_oauth.json`, # but the campaign was created on 2021-01-11 - empty_streams: ["campaigns", "workflows", "contacts_list_memberships"] + empty_streams: ["campaigns", "workflows", "contacts_list_memberships", "form_submissions"] incremental: - config_path: "secrets/config.json" configured_catalog_path: "sample_files/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json index 5f9e42c80d30b..b6157948c2968 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json @@ -96,6 +96,15 @@ "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, + { + "stream": { + "name": "form_submissions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, { "stream": { "name": "line_items", diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog_for_oauth_config.json b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog_for_oauth_config.json index 3bc43efac7647..673f27f562138 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog_for_oauth_config.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog_for_oauth_config.json @@ -93,6 +93,15 @@ "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, + { + "stream": { + "name": "form_submissions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, { "stream": { "name": "line_items", diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json b/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json index 19f4049c4ac17..98d568d1cc022 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json @@ -81,6 +81,15 @@ "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, + { + "stream": { + "name": "form_submissions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, { "stream": { "name": "line_items", diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py index 708836d4d3b80..8dedd91d82004 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py @@ -732,6 +732,34 @@ class FormStream(Stream): created_at_field = "createdAt" +class FormSubmissionStream(Stream): + """Marketing Forms, API v1 + This endpoint requires the forms scope. + Docs: https://legacydocs.hubspot.com/docs/methods/forms/get-submissions-for-a-form + """ + + url = "/form-integrations/v1/submissions/forms" + limit = 50 + updated_at_field = "updatedAt" + + def _transform(self, records: Iterable) -> Iterable: + for record in super()._transform(records): + keys = record.keys() + + # There's no updatedAt field in the submission however forms fetched by using this field, + # so it has to be added to the submissions otherwise it would fail when calling _filter_old_records + if "updatedAt" not in keys: + record["updatedAt"] = record["submittedAt"] + + yield record + + def list(self, fields) -> Iterable: + for form in self.read(getter=partial(self._api.get, url="/marketing/v3/forms")): + for submission in self.read(getter=partial(self._api.get, url=f"{self.url}/{form['id']}")): + submission["formId"] = form["id"] + yield submission + + class MarketingEmailStream(Stream): """Marketing Email, API v1 Docs: https://legacydocs.hubspot.com/docs/methods/cms_email/get-all-marketing-emails diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py index ca17ea698f025..7768755fc387d 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py @@ -19,6 +19,7 @@ EmailEventStream, EngagementStream, FormStream, + FormSubmissionStream, MarketingEmailStream, OwnerStream, SubscriptionChangeStream, @@ -45,6 +46,7 @@ def __init__(self, start_date, credentials, **kwargs): "email_events": EmailEventStream(**common_params), "engagements": EngagementStream(**common_params), "forms": FormStream(**common_params), + "form_submissions": FormSubmissionStream(**common_params), "line_items": CRMObjectIncrementalStream(entity="line_item", **common_params), "marketing_emails": MarketingEmailStream(**common_params), "owners": OwnerStream(**common_params), diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/form_submissions.json b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/form_submissions.json new file mode 100644 index 0000000000000..3ee1cf9ea6788 --- /dev/null +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/form_submissions.json @@ -0,0 +1,35 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "type": ["null", "object"], + "properties": { + "submittedAt": { + "type": ["null", "integer"] + }, + "updatedAt": { + "type": ["null", "integer"] + }, + "values": { + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "name": { + "type": ["null", "string"] + }, + "value": { + "type": ["null", "string"] + }, + "objectTypeId": { + "type": ["null", "string"] + } + } + } + }, + "pageUrl": { + "type": ["null", "string"] + }, + "formId": { + "type": ["null", "string"] + } + } +} diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index 90ac117be7bf0..79e463b79434e 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -110,6 +110,7 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:-----------------------------------------------------------------------------------------------------------------------------------------------| +| 0.1.32 | 2022-01-13 | [8011](https://github.com/airbytehq/airbyte/pull/8011) | Add new stream form_submissions | | 0.1.31 | 2022-01-11 | [9385](https://github.com/airbytehq/airbyte/pull/9385) | Remove auto-generated `properties` from `Engagements` stream | | 0.1.30 | 2021-01-10 | [9129](https://github.com/airbytehq/airbyte/pull/9129) | Created Contacts list memberships streams | | 0.1.29 | 2021-12-17 | [8699](https://github.com/airbytehq/airbyte/pull/8699) | Add incremental sync support for `companies`, `contact_lists`, `contacts`, `deals`, `line_items`, `products`, `quotes`, `tickets` streams | From 41f89d1ab262ae80cc279b583cfa30d13635125b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Queiroz?= Date: Thu, 13 Jan 2022 21:32:26 -0300 Subject: [PATCH 121/215] Destination S3: use instanceprofile if credentials are not provided (#9399) * use instanceprofile to auth if id is not provided * restore support for using endpoint * update readme * update changelog * update documentation, add setup guide * Update docs/integrations/destinations/s3.md Co-authored-by: Edward Gao * minor fixes * add error message * now using RuntimeException * Update airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java Co-authored-by: Edward Gao * bump connector version * update seed file Co-authored-by: Edward Gao Co-authored-by: Marcos Marx --- .../4816b78f-1489-44c1-9060-4b19d5fa9362.json | 2 +- .../seed/destination_definitions.yaml | 2 +- .../main/resources/seed/destination_specs.yaml | 10 +++++----- .../connectors/destination-s3/Dockerfile | 2 +- .../connectors/destination-s3/README.md | 1 + .../destination/s3/S3DestinationConfig.java | 18 +++++++++++++++--- .../src/main/resources/spec.json | 6 ++---- docs/integrations/destinations/s3.md | 6 +++++- 8 files changed, 31 insertions(+), 16 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json index 07e795e2e3889..e4137456ad996 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "4816b78f-1489-44c1-9060-4b19d5fa9362", "name": "S3", "dockerRepository": "airbyte/destination-s3", - "dockerImageTag": "0.2.4", + "dockerImageTag": "0.2.5", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/s3", "icon": "s3.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 2b5cad4c78691..1e561ba8289d8 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -167,7 +167,7 @@ - name: S3 destinationDefinitionId: 4816b78f-1489-44c1-9060-4b19d5fa9362 dockerRepository: airbyte/destination-s3 - dockerImageTag: 0.2.3 + dockerImageTag: 0.2.5 documentationUrl: https://docs.airbyte.io/integrations/destinations/s3 icon: s3.svg - name: SFTP-JSON diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 06520bfa6bcd0..9276dac6c07b6 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -3408,7 +3408,7 @@ supported_destination_sync_modes: - "append" - "overwrite" -- dockerImage: "airbyte/destination-s3:0.2.4" +- dockerImage: "airbyte/destination-s3:0.2.5" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/s3" connectionSpecification: @@ -3419,8 +3419,6 @@ - "s3_bucket_name" - "s3_bucket_path" - "s3_bucket_region" - - "access_key_id" - - "secret_access_key" - "format" additionalProperties: false properties: @@ -3478,14 +3476,16 @@ access_key_id: type: "string" description: "The access key id to access the S3 bucket. Airbyte requires\ - \ Read and Write permissions to the given bucket." + \ Read and Write permissions to the given bucket, if not set, Airbyte\ + \ will rely on Instance Profile." title: "S3 Key Id" airbyte_secret: true examples: - "A012345678910EXAMPLE" secret_access_key: type: "string" - description: "The corresponding secret to the access key id." + description: "The corresponding secret to the access key id, if S3 Key Id\ + \ is set, then S3 Access Key must also be provided" title: "S3 Access Key" airbyte_secret: true examples: diff --git a/airbyte-integrations/connectors/destination-s3/Dockerfile b/airbyte-integrations/connectors/destination-s3/Dockerfile index 7334a212258e6..7850e4f00e2a7 100644 --- a/airbyte-integrations/connectors/destination-s3/Dockerfile +++ b/airbyte-integrations/connectors/destination-s3/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-s3 COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.2.4 +LABEL io.airbyte.version=0.2.5 LABEL io.airbyte.name=airbyte/destination-s3 diff --git a/airbyte-integrations/connectors/destination-s3/README.md b/airbyte-integrations/connectors/destination-s3/README.md index e163606e68a71..a5b7f9bb7c4cc 100644 --- a/airbyte-integrations/connectors/destination-s3/README.md +++ b/airbyte-integrations/connectors/destination-s3/README.md @@ -8,6 +8,7 @@ As a community contributor, you will need access to AWS to run the integration t - Create an S3 bucket for testing. - Get your `access_key_id` and `secret_access_key` that can read and write to the above bucket. +- if you leave `access_key_id` and `secret_access_key` in blank, the authentication will rely on the instance profile authentication - Paste the bucket and key information into the config files under [`./sample_secrets`](./sample_secrets). - Rename the directory from `sample_secrets` to `secrets`. - Feel free to modify the config files with different settings in the acceptance test file (e.g. `S3CsvDestinationAcceptanceTest.java`, method `getFormatConfig`), as long as they follow the schema defined in [spec.json](src/main/resources/spec.json). diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java index 3aea3ceceed3a..79ea47bee1853 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java @@ -4,6 +4,7 @@ package io.airbyte.integrations.destination.s3; +import com.amazonaws.auth.InstanceProfileCredentialsProvider; import com.amazonaws.ClientConfiguration; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSStaticCredentialsProvider; @@ -87,8 +88,8 @@ public static S3DestinationConfig getS3DestinationConfig(final JsonNode config) config.get("s3_bucket_name").asText(), bucketPath, config.get("s3_bucket_region").asText(), - config.get("access_key_id").asText(), - config.get("secret_access_key").asText(), + config.get("access_key_id") == null ? "" : config.get("access_key_id").asText(), + config.get("secret_access_key") == null ? "" : config.get("secret_access_key").asText(), partSize, format); } @@ -128,7 +129,18 @@ public S3FormatConfig getFormatConfig() { public AmazonS3 getS3Client() { final AWSCredentials awsCreds = new BasicAWSCredentials(accessKeyId, secretAccessKey); - if (endpoint == null || endpoint.isEmpty()) { + if (accessKeyId.isEmpty() && !secretAccessKey.isEmpty() + || !accessKeyId.isEmpty() && secretAccessKey.isEmpty()) { + throw new RuntimeException("Either both accessKeyId and secretAccessKey should be provided, or neither"); + } + + if (accessKeyId.isEmpty() && secretAccessKey.isEmpty()) { + return AmazonS3ClientBuilder.standard() + .withCredentials(new InstanceProfileCredentialsProvider(false)) + .build(); + } + + else if (endpoint == null || endpoint.isEmpty()) { return AmazonS3ClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(awsCreds)) .withRegion(bucketRegion) diff --git a/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json index ceb0d8998cf2e..1251f6a400d99 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json @@ -12,8 +12,6 @@ "s3_bucket_name", "s3_bucket_path", "s3_bucket_region", - "access_key_id", - "secret_access_key", "format" ], "additionalProperties": false, @@ -72,14 +70,14 @@ }, "access_key_id": { "type": "string", - "description": "The access key id to access the S3 bucket. Airbyte requires Read and Write permissions to the given bucket.", + "description": "The access key id to access the S3 bucket. Airbyte requires Read and Write permissions to the given bucket, if not set, Airbyte will rely on Instance Profile.", "title": "S3 Key Id", "airbyte_secret": true, "examples": ["A012345678910EXAMPLE"] }, "secret_access_key": { "type": "string", - "description": "The corresponding secret to the access key id.", + "description": "The corresponding secret to the access key id, if S3 Key Id is set, then S3 Access Key must also be provided", "title": "S3 Access Key", "airbyte_secret": true, "examples": ["a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"] diff --git a/docs/integrations/destinations/s3.md b/docs/integrations/destinations/s3.md index d9ac12b8c1fcb..8ef5ee823d3ad 100644 --- a/docs/integrations/destinations/s3.md +++ b/docs/integrations/destinations/s3.md @@ -199,7 +199,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A #### Requirements 1. Allow connections from Airbyte server to your AWS S3/ Minio S3 cluster \(if they exist in separate VPCs\). -2. An S3 bucket with credentials. +2. An S3 bucket with credentials or an instanceprofile with read/write permissions configured for the host (ec2, eks). #### Setup Guide @@ -211,18 +211,22 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A * **S3 Bucket Region** * **Access Key Id** * See [this](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) on how to generate an access key. + * See [this](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2_instance-profiles.html) on how to create a instanceprofile. * We recommend creating an Airbyte-specific user. This user will require [read and write permissions](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_examples_s3_rw-bucket.html) to objects in the staging bucket. + * If the Access Key and Secret Access Key are not provided, the authentication will rely on the instanceprofile. * **Secret Access Key** * Corresponding key to the above key id. * Make sure your S3 bucket is accessible from the machine running Airbyte. * This depends on your networking setup. * You can check AWS S3 documentation with a tutorial on how to properly configure your S3's access [here](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-control-overview.html). + * If you will use instance profile authentication, make sure the role has permission to read/write on the bucket. * The easiest way to verify if Airbyte is able to connect to your S3 bucket is via the check connection tool in the UI. ## CHANGELOG | Version | Date | Pull Request | Subject | |:--------| :--- | :--- | :--- | +| 0.2.5 | 2022-01-13 | [\#9399](https://github.com/airbytehq/airbyte/pull/9399) | Use instance profile authentication if credentials are not provided | | 0.2.4 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | | 0.2.3 | 2022-01-11 | [\#9367](https://github.com/airbytehq/airbyte/pull/9367) | Avro & Parquet: support array field with unknown item type; default any improperly typed field to string. | | 0.2.2 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | From f83eca58eaf2129d21b5796a301732ab22675130 Mon Sep 17 00:00:00 2001 From: Eugene Kulak Date: Fri, 14 Jan 2022 06:29:34 +0200 Subject: [PATCH 122/215] CDK: Fix typing errors (#9037) * fix typing, drop AirbyteLogger * format * bump the version * use logger instead of fixture logger Co-authored-by: Eugene Kulak Co-authored-by: auganbay --- airbyte-cdk/python/CHANGELOG.md | 3 ++ airbyte-cdk/python/airbyte_cdk/connector.py | 6 +-- .../airbyte_cdk/destinations/destination.py | 15 +++--- airbyte-cdk/python/airbyte_cdk/logger.py | 53 +++++-------------- .../airbyte_cdk/sources/abstract_source.py | 24 ++++----- .../python/airbyte_cdk/sources/config.py | 4 +- .../sources/deprecated/base_source.py | 10 ++-- .../sources/singer/singer_helpers.py | 7 +-- .../python/airbyte_cdk/sources/source.py | 6 +-- .../airbyte_cdk/sources/streams/core.py | 4 +- .../airbyte_cdk/sources/streams/http/http.py | 18 ++++--- .../sources/streams/http/rate_limiting.py | 5 +- .../http/requests_native_auth/oauth.py | 2 +- .../airbyte_cdk/sources/utils/__init__.py | 4 ++ .../sources/utils/schema_models.py | 4 +- .../airbyte_cdk/sources/utils/transform.py | 12 ++--- .../python/airbyte_cdk/utils/event_timing.py | 3 +- .../python/airbyte_cdk/utils/mapping_utils.py | 4 +- airbyte-cdk/python/setup.py | 2 +- .../unit_tests/singer/test_singer_source.py | 14 ++--- .../sources/streams/http/test_http.py | 20 +++---- .../sources/test_abstract_source.py | 26 ++++----- .../python/unit_tests/sources/test_source.py | 8 +-- .../python/unit_tests/test_connector.py | 6 +-- airbyte-cdk/python/unit_tests/test_logger.py | 2 +- .../python/unit_tests/test_secure_logger.py | 8 +-- 26 files changed, 126 insertions(+), 144 deletions(-) diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index c5aed3c9a8df5..4c4f0cf604aae 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.1.47 +Fix typing errors. + ## 0.1.45 Integrate Sentry for performance and errors tracking. diff --git a/airbyte-cdk/python/airbyte_cdk/connector.py b/airbyte-cdk/python/airbyte_cdk/connector.py index 4e8fa91cb601a..f17c76ab5754e 100644 --- a/airbyte-cdk/python/airbyte_cdk/connector.py +++ b/airbyte-cdk/python/airbyte_cdk/connector.py @@ -4,12 +4,12 @@ import json +import logging import os import pkgutil from abc import ABC, abstractmethod from typing import Any, Mapping, Optional -from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.models import AirbyteConnectionStatus, ConnectorSpecification @@ -48,7 +48,7 @@ def write_config(config: Mapping[str, Any], config_path: str): with open(config_path, "w") as fh: fh.write(json.dumps(config)) - def spec(self, logger: AirbyteLogger) -> ConnectorSpecification: + def spec(self, logger: logging.Logger) -> ConnectorSpecification: """ Returns the spec for this integration. The spec is a JSON-Schema object describing the required configurations (e.g: username and password) required to run this integration. @@ -59,7 +59,7 @@ def spec(self, logger: AirbyteLogger) -> ConnectorSpecification: return ConnectorSpecification.parse_obj(json.loads(raw_spec)) @abstractmethod - def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + def check(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: """ Tests if the input configuration can be used to successfully connect to the integration e.g: if a provided Stripe API token can be used to connect to the Stripe API. diff --git a/airbyte-cdk/python/airbyte_cdk/destinations/destination.py b/airbyte-cdk/python/airbyte_cdk/destinations/destination.py index f07500f0bc098..b46123dd0ffc2 100644 --- a/airbyte-cdk/python/airbyte_cdk/destinations/destination.py +++ b/airbyte-cdk/python/airbyte_cdk/destinations/destination.py @@ -4,19 +4,20 @@ import argparse import io +import logging import sys from abc import ABC, abstractmethod from typing import Any, Iterable, List, Mapping -from airbyte_cdk import AirbyteLogger from airbyte_cdk.connector import Connector from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog, Type from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit from pydantic import ValidationError +logger = logging.getLogger("airbyte") + class Destination(Connector, ABC): - logger = AirbyteLogger() VALID_CMDS = {"spec", "check", "write"} @abstractmethod @@ -26,7 +27,7 @@ def write( """Implement to define how the connector writes data to the destination""" def _run_check(self, config: Mapping[str, Any]) -> AirbyteMessage: - check_result = self.check(self.logger, config) + check_result = self.check(logger, config) return AirbyteMessage(type=Type.CONNECTION_STATUS, connectionStatus=check_result) def _parse_input_stream(self, input_stream: io.TextIOWrapper) -> Iterable[AirbyteMessage]: @@ -35,16 +36,16 @@ def _parse_input_stream(self, input_stream: io.TextIOWrapper) -> Iterable[Airbyt try: yield AirbyteMessage.parse_raw(line) except ValidationError: - self.logger.info(f"ignoring input which can't be deserialized as Airbyte Message: {line}") + logger.info(f"ignoring input which can't be deserialized as Airbyte Message: {line}") def _run_write( self, config: Mapping[str, Any], configured_catalog_path: str, input_stream: io.TextIOWrapper ) -> Iterable[AirbyteMessage]: catalog = ConfiguredAirbyteCatalog.parse_file(configured_catalog_path) input_messages = self._parse_input_stream(input_stream) - self.logger.info("Begin writing to the destination...") + logger.info("Begin writing to the destination...") yield from self.write(config=config, configured_catalog=catalog, input_messages=input_messages) - self.logger.info("Writing complete.") + logger.info("Writing complete.") def parse_args(self, args: List[str]) -> argparse.Namespace: """ @@ -86,7 +87,7 @@ def run_cmd(self, parsed_args: argparse.Namespace) -> Iterable[AirbyteMessage]: if cmd not in self.VALID_CMDS: raise Exception(f"Unrecognized command: {cmd}") - spec = self.spec(self.logger) + spec = self.spec(logger) if cmd == "spec": yield AirbyteMessage(type=Type.SPEC, spec=spec) return diff --git a/airbyte-cdk/python/airbyte_cdk/logger.py b/airbyte-cdk/python/airbyte_cdk/logger.py index 5b35f36478b49..1cfb72175a620 100644 --- a/airbyte-cdk/python/airbyte_cdk/logger.py +++ b/airbyte-cdk/python/airbyte_cdk/logger.py @@ -6,7 +6,7 @@ import logging.config import sys import traceback -from typing import List +from typing import List, Tuple from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage @@ -49,7 +49,6 @@ def hook_fn(exception_type, exception_value, traceback_): def init_logger(name: str = None): """Initial set up of logger""" - logging.setLoggerClass(AirbyteNativeLogger) logging.addLevelName(TRACE_LEVEL_NUM, "TRACE") logger = logging.getLogger(name) logger.setLevel(TRACE_LEVEL_NUM) @@ -61,7 +60,7 @@ def init_logger(name: str = None): class AirbyteLogFormatter(logging.Formatter): """Output log records using AirbyteMessage""" - _secrets = [] + _secrets: List[str] = [] @classmethod def update_secrets(cls, secrets: List[str]): @@ -88,46 +87,22 @@ def format(self, record: logging.LogRecord) -> str: return log_message.json(exclude_unset=True) -class AirbyteNativeLogger(logging.Logger): - """Using native logger with implementing all AirbyteLogger features""" +def log_by_prefix(msg: str, default_level: str) -> Tuple[int, str]: + """Custom method, which takes log level from first word of message""" + valid_log_types = ["FATAL", "ERROR", "WARN", "INFO", "DEBUG", "TRACE"] + split_line = msg.split() + first_word = next(iter(split_line), None) + if first_word in valid_log_types: + log_level = logging.getLevelName(first_word) + rendered_message = " ".join(split_line[1:]) + else: + log_level = logging.getLevelName(default_level) + rendered_message = msg - def __init__(self, name): - super().__init__(name) - self.valid_log_types = ["FATAL", "ERROR", "WARN", "INFO", "DEBUG", "TRACE"] - - def log_by_prefix(self, msg, default_level): - """Custom method, which takes log level from first word of message""" - split_line = msg.split() - first_word = next(iter(split_line), None) - if first_word in self.valid_log_types: - log_level = logging.getLevelName(first_word) - rendered_message = " ".join(split_line[1:]) - else: - default_level = default_level if default_level in self.valid_log_types else "INFO" - log_level = logging.getLevelName(default_level) - rendered_message = msg - self.log(log_level, rendered_message) - - def trace(self, msg, *args, **kwargs): - self._log(TRACE_LEVEL_NUM, msg, args, **kwargs) + return log_level, rendered_message class AirbyteLogger: - def __init__(self): - self.valid_log_types = ["FATAL", "ERROR", "WARN", "INFO", "DEBUG", "TRACE"] - - def log_by_prefix(self, message, default_level): - """Custom method, which takes log level from first word of message""" - split_line = message.split() - first_word = next(iter(split_line), None) - if first_word in self.valid_log_types: - log_level = first_word - rendered_message = " ".join(split_line[1:]) - else: - log_level = default_level - rendered_message = message - self.log(log_level, rendered_message) - def log(self, level, message): log_record = AirbyteLogMessage(level=level, message=message) log_message = AirbyteMessage(type="LOG", log=log_record) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/abstract_source.py b/airbyte-cdk/python/airbyte_cdk/sources/abstract_source.py index 706536ce9e6a7..d2e81e99b3505 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/abstract_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/abstract_source.py @@ -4,19 +4,18 @@ import copy +import logging from abc import ABC, abstractmethod from datetime import datetime from functools import lru_cache from typing import Any, Dict, Iterator, List, Mapping, MutableMapping, Optional, Tuple -from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.models import ( AirbyteCatalog, AirbyteConnectionStatus, AirbyteMessage, AirbyteRecordMessage, AirbyteStateMessage, - AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, Status, @@ -38,8 +37,9 @@ class AbstractSource(Source, ABC): """ @abstractmethod - def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]: + def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]: """ + :param logger: source logger :param config: The user-provided configuration as specified by the source's spec. This usually contains information required to check connection e.g. tokens, secrets and keys etc. :return: A tuple of (boolean, error). If boolean is true, then the connection check is successful @@ -57,19 +57,19 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: """ # Stream name to instance map for applying output object transformation - _stream_to_instance_map: Dict[str, AirbyteStream] = {} + _stream_to_instance_map: Dict[str, Stream] = {} @property def name(self) -> str: """Source name""" return self.__class__.__name__ - def discover(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteCatalog: + def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog: """Implements the Discover operation from the Airbyte Specification. See https://docs.airbyte.io/architecture/airbyte-specification.""" streams = [stream.as_airbyte_stream() for stream in self.streams(config=config)] return AirbyteCatalog(streams=streams) - def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + def check(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: """Implements the Check Connection operation from the Airbyte Specification. See https://docs.airbyte.io/architecture/airbyte-specification.""" try: check_succeeded, error = self.check_connection(logger, config) @@ -81,7 +81,7 @@ def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConn return AirbyteConnectionStatus(status=Status.SUCCEEDED) def read( - self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None + self, logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None ) -> Iterator[AirbyteMessage]: """Implements the Read operation from the Airbyte Specification. See https://docs.airbyte.io/architecture/airbyte-specification.""" connector_state = copy.deepcopy(state or {}) @@ -118,7 +118,7 @@ def read( def _read_stream( self, - logger: AirbyteLogger, + logger: logging.Logger, stream_instance: Stream, configured_stream: ConfiguredAirbyteStream, connector_state: MutableMapping[str, Any], @@ -160,7 +160,7 @@ def _limit_reached(internal_config: InternalConfig, records_counter: int) -> boo def _read_incremental( self, - logger: AirbyteLogger, + logger: logging.Logger, stream_instance: Stream, configured_stream: ConfiguredAirbyteStream, connector_state: MutableMapping[str, Any], @@ -222,7 +222,7 @@ def _checkpoint_state(self, stream_name, stream_state, connector_state, logger): return AirbyteMessage(type=MessageType.STATE, state=AirbyteStateMessage(data=connector_state)) @lru_cache(maxsize=None) - def _get_stream_transformer_and_schema(self, stream_name: str) -> Tuple[TypeTransformer, dict]: + def _get_stream_transformer_and_schema(self, stream_name: str) -> Tuple[TypeTransformer, Mapping[str, Any]]: """ Lookup stream's transform object and jsonschema based on stream name. This function would be called a lot so using caching to save on costly @@ -230,7 +230,7 @@ def _get_stream_transformer_and_schema(self, stream_name: str) -> Tuple[TypeTran :param stream_name name of stream from catalog. :return tuple with stream transformer object and discover json schema. """ - stream_instance = self._stream_to_instance_map.get(stream_name) + stream_instance = self._stream_to_instance_map[stream_name] return stream_instance.transformer, stream_instance.get_json_schema() def _as_airbyte_record(self, stream_name: str, data: Mapping[str, Any]): @@ -240,6 +240,6 @@ def _as_airbyte_record(self, stream_name: str, data: Mapping[str, Any]): # need it to normalize values against json schema. By default no action # taken unless configured. See # docs/connector-development/cdk-python/schemas.md for details. - transformer.transform(data, schema) + transformer.transform(data, schema) # type: ignore message = AirbyteRecordMessage(stream=stream_name, data=data, emitted_at=now_millis) return AirbyteMessage(type=MessageType.RECORD, record=message) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/config.py b/airbyte-cdk/python/airbyte_cdk/sources/config.py index 11ff07a9606c6..b96672ef86e49 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/config.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/config.py @@ -17,9 +17,9 @@ class BaseConfig(BaseModel): """ @classmethod - def schema(cls, **kwargs) -> Dict[str, Any]: + def schema(cls, *args, **kwargs) -> Dict[str, Any]: """We're overriding the schema classmethod to enable some post-processing""" - schema = super().schema(**kwargs) + schema = super().schema(*args, **kwargs) rename_key(schema, old_key="anyOf", new_key="oneOf") # UI supports only oneOf expand_refs(schema) schema.pop("description", None) # description added from the docstring diff --git a/airbyte-cdk/python/airbyte_cdk/sources/deprecated/base_source.py b/airbyte-cdk/python/airbyte_cdk/sources/deprecated/base_source.py index 617f0378f9247..c1dd61c45d114 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/deprecated/base_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/deprecated/base_source.py @@ -4,10 +4,10 @@ import copy +import logging from datetime import datetime from typing import Any, Iterable, Mapping, MutableMapping, Type -from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.models import ( AirbyteCatalog, AirbyteConnectionStatus, @@ -39,13 +39,13 @@ def _get_client(self, config: Mapping): """Construct client""" return self.client_class(**config) - def discover(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteCatalog: + def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog: """Discover streams""" client = self._get_client(config) return AirbyteCatalog(streams=[stream for stream in client.streams]) - def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + def check(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: """Check connection""" client = self._get_client(config) alive, error = client.health_check() @@ -55,7 +55,7 @@ def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConn return AirbyteConnectionStatus(status=Status.SUCCEEDED) def read( - self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None + self, logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None ) -> Iterable[AirbyteMessage]: state = state or {} client = self._get_client(config) @@ -73,7 +73,7 @@ def read( logger.info(f"Finished syncing {self.name}") def _read_stream( - self, logger: AirbyteLogger, client: BaseClient, configured_stream: ConfiguredAirbyteStream, state: MutableMapping[str, Any] + self, logger: logging.Logger, client: BaseClient, configured_stream: ConfiguredAirbyteStream, state: MutableMapping[str, Any] ): stream_name = configured_stream.stream.name use_incremental = configured_stream.sync_mode == SyncMode.incremental and client.stream_has_state(stream_name) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/singer/singer_helpers.py b/airbyte-cdk/python/airbyte_cdk/sources/singer/singer_helpers.py index 23d4648240982..d279e3d7d3004 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/singer/singer_helpers.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/singer/singer_helpers.py @@ -12,6 +12,7 @@ from io import TextIOWrapper from typing import Any, DefaultDict, Dict, Iterator, List, Mapping, Optional, Tuple +from airbyte_cdk.logger import log_by_prefix from airbyte_cdk.models import ( AirbyteCatalog, AirbyteMessage, @@ -138,7 +139,7 @@ def _read_singer_catalog(logger, shell_command: str) -> Mapping[str, Any]: shell_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True ) for line in completed_process.stderr.splitlines(): - logger.log_by_prefix(line, "ERROR") + logger.log(*log_by_prefix(line, "ERROR")) return json.loads(completed_process.stdout) @@ -169,9 +170,9 @@ def read(logger, shell_command, is_message=(lambda x: True)) -> Iterator[Airbyte if message_data is not None: yield message_data else: - logger.log_by_prefix(line, "INFO") + logger.log(*log_by_prefix(line, "INFO")) else: - logger.log_by_prefix(line, "ERROR") + logger.log(*log_by_prefix(line, "ERROR")) @staticmethod def _read_lines(process: subprocess.Popen) -> Iterator[Tuple[str, TextIOWrapper]]: diff --git a/airbyte-cdk/python/airbyte_cdk/sources/source.py b/airbyte-cdk/python/airbyte_cdk/sources/source.py index 81e1eac7aaa2d..5e0396b3fcbd9 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/source.py @@ -4,12 +4,12 @@ import json +import logging from abc import ABC, abstractmethod from collections import defaultdict from typing import Any, Dict, Iterable, Mapping, MutableMapping from airbyte_cdk.connector import Connector -from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.models import AirbyteCatalog, AirbyteMessage, ConfiguredAirbyteCatalog @@ -29,14 +29,14 @@ def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog: @abstractmethod def read( - self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None + self, logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None ) -> Iterable[AirbyteMessage]: """ Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state. """ @abstractmethod - def discover(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteCatalog: + def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog: """ Returns an AirbyteCatalog representing the available streams and fields in this integration. For example, given valid credentials to a Postgres database, returns an Airbyte catalog where each postgres table is a stream, and each table column is a field. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/core.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/core.py index 22823cd29d2b1..9aea6d7d15084 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/core.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/core.py @@ -110,11 +110,13 @@ def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: """ def stream_slices( - self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None + self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None ) -> Iterable[Optional[Mapping[str, Any]]]: """ Override to define the slices for this stream. See the stream slicing section of the docs for more information. + :param sync_mode: + :param cursor_field: :param stream_state: :return: """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/http.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/http.py index 1abab94799f5a..f2b1f06597632 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/http.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/http.py @@ -33,13 +33,13 @@ class HttpStream(Stream, ABC): """ source_defined_cursor = True # Most HTTP streams use a source defined cursor (i.e: the user can't configure it like on a SQL table) - page_size = None # Use this variable to define page size for API http requests with pagination support + page_size: Optional[int] = None # Use this variable to define page size for API http requests with pagination support # TODO: remove legacy HttpAuthenticator authenticator references def __init__(self, authenticator: Union[AuthBase, HttpAuthenticator] = None): self._session = requests.Session() - self._authenticator = NoAuth() + self._authenticator: HttpAuthenticator = NoAuth() if isinstance(authenticator, AuthBase): self._session.auth = authenticator elif authenticator: @@ -107,7 +107,7 @@ def max_retries(self) -> Union[int, None]: return 5 @property - def retry_factor(self) -> int: + def retry_factor(self) -> float: """ Override if needed. Specifies factor for backoff policy. """ @@ -130,6 +130,7 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, @abstractmethod def path( self, + *, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None, @@ -206,6 +207,7 @@ def request_kwargs( def parse_response( self, response: requests.Response, + *, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None, @@ -214,6 +216,9 @@ def parse_response( Parses the raw response object into a list of records. By default, this returns an iterable containing the input. Override to parse differently. :param response: + :param stream_state: + :param stream_slice: + :param next_page_token: :return: An iterable containing the parsed response """ @@ -236,6 +241,7 @@ def backoff_time(self, response: requests.Response) -> Optional[float]: This method is called only if should_backoff() returns True for the input request. + :param response: :return how long to backoff in seconds. The return value may be a floating point number for subsecond precision. Returning None defers backoff to the default backoff behavior (e.g using an exponential algorithm). """ @@ -310,11 +316,11 @@ def _send_request(self, request: requests.PreparedRequest, request_kwargs: Mappi max_tries: The maximum number of attempts to make before giving up ...The default value of None means there is no limit to the number of tries. - This implies that if max_tries is excplicitly set to None there is no + This implies that if max_tries is explicitly set to None there is no limit to retry attempts, otherwise it is limited number of tries. But this is not true for current version of backoff packages (1.8.0). Setting - max_tries to 0 or negative number would result in endless retry atempts. - Add this condition to avoid an endless loop if it hasnt been set + max_tries to 0 or negative number would result in endless retry attempts. + Add this condition to avoid an endless loop if it hasn't been set explicitly (i.e. max_retries is not None). """ if max_tries is not None: diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py index ab7cbba741cfe..2401e51005d52 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py @@ -5,6 +5,7 @@ import sys import time +from typing import Optional import backoff from airbyte_cdk.logger import AirbyteLogger @@ -18,7 +19,7 @@ logger = AirbyteLogger() -def default_backoff_handler(max_tries: int, factor: int, **kwargs): +def default_backoff_handler(max_tries: Optional[int], factor: float, **kwargs): def log_retry_attempt(details): _, exc, _ = sys.exc_info() if exc.response: @@ -46,7 +47,7 @@ def should_give_up(exc): ) -def user_defined_backoff_handler(max_tries: int, **kwargs): +def user_defined_backoff_handler(max_tries: Optional[int], **kwargs): def sleep_on_ratelimit(details): _, exc, _ = sys.exc_info() if isinstance(exc, UserDefinedBackoffException): diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py index 39c65950a7ae7..a77fa5c730494 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py @@ -23,7 +23,7 @@ def __init__( client_secret: str, refresh_token: str, scopes: List[str] = None, - token_expiry_date: pendulum.datetime = None, + token_expiry_date: pendulum.DateTime = None, access_token_name: str = "access_token", expires_in_name: str = "expires_in", ): diff --git a/airbyte-cdk/python/airbyte_cdk/sources/utils/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/utils/__init__.py index acfd1708ebe44..5adf292dff0cb 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/utils/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/utils/__init__.py @@ -1 +1,5 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + # Initialize Utils Package diff --git a/airbyte-cdk/python/airbyte_cdk/sources/utils/schema_models.py b/airbyte-cdk/python/airbyte_cdk/sources/utils/schema_models.py index d2c0c23fec50e..7a36964937036 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/utils/schema_models.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/utils/schema_models.py @@ -77,8 +77,8 @@ def schema_extra(cls, schema: Dict[str, Any], model: Type[BaseModel]) -> None: prop["oneOf"] = [{"type": "null"}, {"$ref": ref}] @classmethod - def schema(cls, **kwargs) -> Dict[str, Any]: + def schema(cls, *args, **kwargs) -> Dict[str, Any]: """We're overriding the schema classmethod to enable some post-processing""" - schema = super().schema(**kwargs) + schema = super().schema(*args, **kwargs) expand_refs(schema) return schema diff --git a/airbyte-cdk/python/airbyte_cdk/sources/utils/transform.py b/airbyte-cdk/python/airbyte_cdk/sources/utils/transform.py index fa14eb531f042..ed974ef1305b8 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/utils/transform.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/utils/transform.py @@ -4,7 +4,7 @@ from distutils.util import strtobool from enum import Flag, auto -from typing import Any, Callable, Dict +from typing import Any, Callable, Dict, Mapping, Optional from airbyte_cdk.logger import AirbyteLogger from jsonschema import Draft7Validator, validators @@ -36,7 +36,7 @@ class TypeTransformer: Class for transforming object before output. """ - _custom_normalizer: Callable[[Any, Dict[str, Any]], Any] = None + _custom_normalizer: Optional[Callable[[Any, Dict[str, Any]], Any]] = None def __init__(self, config: TransformConfig): """ @@ -90,7 +90,7 @@ def default_convert(original_item: Any, subschema: Dict[str, Any]) -> Any: :param subschema part of the jsonschema containing field type/format data. :return transformed field value. """ - target_type = subschema.get("type") + target_type = subschema.get("type", []) if original_item is None and "null" in target_type: return None if isinstance(target_type, list): @@ -160,11 +160,11 @@ def resolve(subschema): return normalizator - def transform(self, record: Dict[str, Any], schema: Dict[str, Any]): + def transform(self, record: Dict[str, Any], schema: Mapping[str, Any]): """ Normalize and validate according to config. - :param record record instance for normalization/transformation. All modification are done by modifing existent object. - :schema object's jsonschema for normalization. + :param record: record instance for normalization/transformation. All modification are done by modifying existent object. + :param schema: object's jsonschema for normalization. """ if TransformConfig.NoTransform in self._config: return diff --git a/airbyte-cdk/python/airbyte_cdk/utils/event_timing.py b/airbyte-cdk/python/airbyte_cdk/utils/event_timing.py index 8d31dc80d0a3a..25983c42c71a4 100644 --- a/airbyte-cdk/python/airbyte_cdk/utils/event_timing.py +++ b/airbyte-cdk/python/airbyte_cdk/utils/event_timing.py @@ -6,6 +6,7 @@ import time from contextlib import contextmanager from dataclasses import dataclass, field +from typing import Optional from airbyte_cdk.logger import AirbyteLogger @@ -60,7 +61,7 @@ def report(self, order_by="name"): class Event: name: str start: float = field(default_factory=time.perf_counter_ns) - end: float = field(default=None) + end: Optional[float] = field(default=None) @property def duration(self) -> float: diff --git a/airbyte-cdk/python/airbyte_cdk/utils/mapping_utils.py b/airbyte-cdk/python/airbyte_cdk/utils/mapping_utils.py index c2d5c85149d7a..c618316afea49 100644 --- a/airbyte-cdk/python/airbyte_cdk/utils/mapping_utils.py +++ b/airbyte-cdk/python/airbyte_cdk/utils/mapping_utils.py @@ -3,7 +3,7 @@ # from functools import reduce -from typing import Any, List, Mapping, Optional +from typing import Any, Iterable, List, Mapping, Optional, Tuple def all_key_pairs_dot_notation(dict_obj: Mapping) -> Mapping[str, Any]: @@ -12,7 +12,7 @@ def all_key_pairs_dot_notation(dict_obj: Mapping) -> Mapping[str, Any]: keys are prefixed with the list of keys passed in as prefix. """ - def _all_key_pairs_dot_notation(_dict_obj: Mapping, prefix: List[str] = []) -> Mapping[str, Any]: + def _all_key_pairs_dot_notation(_dict_obj: Mapping, prefix: List[str] = []) -> Iterable[Tuple[str, Any]]: for key, value in _dict_obj.items(): if isinstance(value, dict): prefix.append(str(key)) diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index 35fa886a9fa4d..baedbfb18d227 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -15,7 +15,7 @@ setup( name="airbyte-cdk", - version="0.1.46", + version="0.1.47", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", diff --git a/airbyte-cdk/python/unit_tests/singer/test_singer_source.py b/airbyte-cdk/python/unit_tests/singer/test_singer_source.py index 4f65b107341e2..319745c3e22b6 100644 --- a/airbyte-cdk/python/unit_tests/singer/test_singer_source.py +++ b/airbyte-cdk/python/unit_tests/singer/test_singer_source.py @@ -4,14 +4,14 @@ import copy +import logging from unittest.mock import patch -from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.models.airbyte_protocol import SyncMode from airbyte_cdk.sources.singer import SingerHelper, SyncModeInfo from airbyte_cdk.sources.singer.source import BaseSingerSource, ConfigContainer -LOGGER = AirbyteLogger() +logger = logging.getLogger("airbyte") class TetsBaseSinger(BaseSingerSource): @@ -57,7 +57,7 @@ class TetsBaseSinger(BaseSingerSource): @patch.object(SingerHelper, "_read_singer_catalog", return_value=basic_singer_catalog) def test_singer_discover_single_pk(mock_read_catalog): - airbyte_catalog = TetsBaseSinger().discover(LOGGER, ConfigContainer({}, "")) + airbyte_catalog = TetsBaseSinger().discover(logger, ConfigContainer({}, "")) _user_stream = airbyte_catalog.streams[0] _roles_stream = airbyte_catalog.streams[1] assert _user_stream.source_defined_primary_key == [["id"]] @@ -69,7 +69,7 @@ def test_singer_discover_with_composite_pk(): singer_catalog_composite_pk = copy.deepcopy(basic_singer_catalog) singer_catalog_composite_pk["streams"][0]["key_properties"] = ["id", "name"] with patch.object(SingerHelper, "_read_singer_catalog", return_value=singer_catalog_composite_pk): - airbyte_catalog = TetsBaseSinger().discover(LOGGER, ConfigContainer({}, "")) + airbyte_catalog = TetsBaseSinger().discover(logger, ConfigContainer({}, "")) _user_stream = airbyte_catalog.streams[0] _roles_stream = airbyte_catalog.streams[1] @@ -81,7 +81,7 @@ def test_singer_discover_with_composite_pk(): @patch.object(BaseSingerSource, "get_primary_key_overrides", return_value={"users": ["updated_at"]}) @patch.object(SingerHelper, "_read_singer_catalog", return_value=basic_singer_catalog) def test_singer_discover_pk_overrides(mock_pk_override, mock_read_catalog): - airbyte_catalog = TetsBaseSinger().discover(LOGGER, ConfigContainer({}, "")) + airbyte_catalog = TetsBaseSinger().discover(logger, ConfigContainer({}, "")) _user_stream = airbyte_catalog.streams[0] _roles_stream = airbyte_catalog.streams[1] assert _user_stream.source_defined_primary_key == [["updated_at"]] @@ -91,7 +91,7 @@ def test_singer_discover_pk_overrides(mock_pk_override, mock_read_catalog): @patch.object(SingerHelper, "_read_singer_catalog", return_value=basic_singer_catalog) def test_singer_discover_metadata(mock_read_catalog): - airbyte_catalog = TetsBaseSinger().discover(LOGGER, ConfigContainer({}, "")) + airbyte_catalog = TetsBaseSinger().discover(logger, ConfigContainer({}, "")) _user_stream = airbyte_catalog.streams[0] _roles_stream = airbyte_catalog.streams[1] @@ -105,7 +105,7 @@ def test_singer_discover_metadata(mock_read_catalog): def test_singer_discover_sync_mode_overrides(mock_read_catalog): sync_mode_override = SyncModeInfo(supported_sync_modes=[SyncMode.full_refresh, SyncMode.incremental], default_cursor_field=["name"]) with patch.object(BaseSingerSource, "get_sync_mode_overrides", return_value={"roles": sync_mode_override}): - airbyte_catalog = TetsBaseSinger().discover(LOGGER, ConfigContainer({}, "")) + airbyte_catalog = TetsBaseSinger().discover(logger, ConfigContainer({}, "")) _roles_stream = airbyte_catalog.streams[1] assert _roles_stream.supported_sync_modes == sync_mode_override.supported_sync_modes diff --git a/airbyte-cdk/python/unit_tests/sources/streams/http/test_http.py b/airbyte-cdk/python/unit_tests/sources/streams/http/test_http.py index b325733584e5b..61d8e0c69d596 100644 --- a/airbyte-cdk/python/unit_tests/sources/streams/http/test_http.py +++ b/airbyte-cdk/python/unit_tests/sources/streams/http/test_http.py @@ -29,18 +29,10 @@ def __init__(self, **kwargs): def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: return None - def path( - self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None - ) -> str: + def path(self, **kwargs) -> str: return "" - def parse_response( - self, - response: requests.Response, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, - ) -> Iterable[Mapping]: + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: stubResp = {"data": self.resp_counter} self.resp_counter += 1 yield stubResp @@ -364,10 +356,10 @@ class CacheHttpSubStream(HttpSubStream): def __init__(self, parent): super().__init__(parent=parent) - def parse_response(self, **kwargs) -> Iterable[Mapping]: + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: return [] - def next_page_token(self, **kwargs) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: return None def path(self, **kwargs) -> str: @@ -406,14 +398,14 @@ class CacheHttpStreamWithSlices(CacheHttpStream): paths = ["", "search"] def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: - return f'{stream_slice.get("path")}' + return f'{stream_slice["path"]}' if stream_slice else "" def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: for path in self.paths: yield {"path": path} def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: - yield response + yield {"value": len(response.text)} @patch("airbyte_cdk.sources.streams.core.logging", MagicMock()) diff --git a/airbyte-cdk/python/unit_tests/sources/test_abstract_source.py b/airbyte-cdk/python/unit_tests/sources/test_abstract_source.py index 4c432b69e6500..703504598c61b 100644 --- a/airbyte-cdk/python/unit_tests/sources/test_abstract_source.py +++ b/airbyte-cdk/python/unit_tests/sources/test_abstract_source.py @@ -2,12 +2,11 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # - +import logging from collections import defaultdict from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Tuple, Union import pytest -from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.models import ( AirbyteCatalog, AirbyteConnectionStatus, @@ -25,13 +24,15 @@ from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.streams import Stream +logger = logging.getLogger("airbyte") + class MockSource(AbstractSource): def __init__(self, check_lambda: Callable[[], Tuple[bool, Optional[Any]]] = None, streams: List[Stream] = None): self._streams = streams self.check_lambda = check_lambda - def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]: + def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]: if self.check_lambda: return self.check_lambda() return (False, "Missing callable.") @@ -42,11 +43,6 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: return self._streams -@pytest.fixture -def logger() -> AirbyteLogger: - return AirbyteLogger() - - def test_successful_check(): """Tests that if a source returns TRUE for the connection check the appropriate connectionStatus success message is returned""" expected = AirbyteConnectionStatus(status=Status.SUCCEEDED) @@ -112,7 +108,7 @@ def test_discover(mocker): assert expected == src.discover(logger, {}) -def test_read_nonexistent_stream_raises_exception(mocker, logger): +def test_read_nonexistent_stream_raises_exception(mocker): """Tests that attempting to sync a stream which the source does not return from the `streams` method raises an exception""" s1 = MockStream(name="s1") s2 = MockStream(name="this_stream_doesnt_exist_in_the_source") @@ -149,7 +145,7 @@ def _fix_emitted_at(messages: List[AirbyteMessage]) -> List[AirbyteMessage]: return messages -def test_valid_full_refresh_read_no_slices(logger, mocker): +def test_valid_full_refresh_read_no_slices(mocker): """Tests that running a full refresh sync on streams which don't specify slices produces the expected AirbyteMessages""" stream_output = [{"k1": "v1"}, {"k2": "v2"}] s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1") @@ -168,7 +164,7 @@ def test_valid_full_refresh_read_no_slices(logger, mocker): assert expected == messages -def test_valid_full_refresh_read_with_slices(mocker, logger): +def test_valid_full_refresh_read_with_slices(mocker): """Tests that running a full refresh sync on streams which use slices produces the expected AirbyteMessages""" slices = [{"1": "1"}, {"2": "2"}] # When attempting to sync a slice, just output that slice as a record @@ -194,7 +190,7 @@ def _state(state_data: Dict[str, Any]): return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=state_data)) -def test_valid_incremental_read_with_checkpoint_interval(mocker, logger): +def test_valid_incremental_read_with_checkpoint_interval(mocker): """Tests that an incremental read which doesn't specify a checkpoint interval outputs a STATE message after reading N records within a stream""" stream_output = [{"k1": "v1"}, {"k2": "v2"}] s1 = MockStream([({"sync_mode": SyncMode.incremental, "stream_state": {}}, stream_output)], name="s1") @@ -226,7 +222,7 @@ def test_valid_incremental_read_with_checkpoint_interval(mocker, logger): assert expected == messages -def test_valid_incremental_read_with_no_interval(mocker, logger): +def test_valid_incremental_read_with_no_interval(mocker): """Tests that an incremental read which doesn't specify a checkpoint interval outputs a STATE message only after fully reading the stream and does not output any STATE messages during syncing the stream.""" stream_output = [{"k1": "v1"}, {"k2": "v2"}] @@ -252,7 +248,7 @@ def test_valid_incremental_read_with_no_interval(mocker, logger): assert expected == messages -def test_valid_incremental_read_with_slices(mocker, logger): +def test_valid_incremental_read_with_slices(mocker): """Tests that an incremental read which uses slices outputs each record in the slice followed by a STATE message, for each slice""" slices = [{"1": "1"}, {"2": "2"}] stream_output = [{"k1": "v1"}, {"k2": "v2"}, {"k3": "v3"}] @@ -291,7 +287,7 @@ def test_valid_incremental_read_with_slices(mocker, logger): assert expected == messages -def test_valid_incremental_read_with_slices_and_interval(mocker, logger): +def test_valid_incremental_read_with_slices_and_interval(mocker): """ Tests that an incremental read which uses slices and a checkpoint interval: 1. outputs all records diff --git a/airbyte-cdk/python/unit_tests/sources/test_source.py b/airbyte-cdk/python/unit_tests/sources/test_source.py index 3812cc6b57bec..141168494c236 100644 --- a/airbyte-cdk/python/unit_tests/sources/test_source.py +++ b/airbyte-cdk/python/unit_tests/sources/test_source.py @@ -4,12 +4,12 @@ import json +import logging import tempfile from typing import Any, Mapping, MutableMapping from unittest.mock import MagicMock import pytest -from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.models import ConfiguredAirbyteCatalog, SyncMode, Type from airbyte_cdk.sources import AbstractSource, Source from airbyte_cdk.sources.streams.core import Stream @@ -19,14 +19,14 @@ class MockSource(Source): def read( - self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None + self, logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None ): pass - def check(self, logger: AirbyteLogger, config: Mapping[str, Any]): + def check(self, logger: logging.Logger, config: Mapping[str, Any]): pass - def discover(self, logger: AirbyteLogger, config: Mapping[str, Any]): + def discover(self, logger: logging.Logger, config: Mapping[str, Any]): pass diff --git a/airbyte-cdk/python/unit_tests/test_connector.py b/airbyte-cdk/python/unit_tests/test_connector.py index d1ac03788030c..463b58290cba8 100644 --- a/airbyte-cdk/python/unit_tests/test_connector.py +++ b/airbyte-cdk/python/unit_tests/test_connector.py @@ -4,13 +4,13 @@ import json +import logging import tempfile from pathlib import Path from typing import Any, Mapping import pytest from airbyte_cdk import AirbyteSpec, Connector -from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.models import AirbyteConnectionStatus @@ -39,7 +39,7 @@ def test_from_file_nonexistent(self): class MockConnector(Connector): - def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + def check(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: pass @@ -68,6 +68,6 @@ def test_read_config(nonempty_file, integration: Connector, mock_config): def test_write_config(integration, mock_config): config_path = Path(tempfile.gettempdir()) / "config.json" - integration.write_config(mock_config, config_path) + integration.write_config(mock_config, str(config_path)) with open(config_path, "r") as actual: assert mock_config == json.loads(actual.read()) diff --git a/airbyte-cdk/python/unit_tests/test_logger.py b/airbyte-cdk/python/unit_tests/test_logger.py index 9a4b2b9469ec6..96b5be2052089 100644 --- a/airbyte-cdk/python/unit_tests/test_logger.py +++ b/airbyte-cdk/python/unit_tests/test_logger.py @@ -54,7 +54,7 @@ def test_level_transform(logger, caplog): def test_trace(logger, caplog): - logger.trace("Test trace 1") + logger.log(logging.getLevelName("TRACE"), "Test trace 1") record = caplog.records[0] assert record.levelname == "TRACE" assert record.message == "Test trace 1" diff --git a/airbyte-cdk/python/unit_tests/test_secure_logger.py b/airbyte-cdk/python/unit_tests/test_secure_logger.py index 5bd5d79ccfde3..ed1e86cfc4dbb 100644 --- a/airbyte-cdk/python/unit_tests/test_secure_logger.py +++ b/airbyte-cdk/python/unit_tests/test_secure_logger.py @@ -8,7 +8,7 @@ from typing import Any, Iterable, Mapping, MutableMapping import pytest -from airbyte_cdk import AirbyteEntrypoint, AirbyteLogger +from airbyte_cdk import AirbyteEntrypoint from airbyte_cdk.logger import AirbyteLogFormatter from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog, ConnectorSpecification, Type from airbyte_cdk.sources import Source @@ -29,7 +29,7 @@ class MockSource(Source): def read( self, - logger: AirbyteLogger, + logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None, @@ -152,7 +152,7 @@ def test_airbyte_secrets_are_masked_on_uncaught_exceptions(mocker, caplog): class BrokenSource(MockSource): def read( self, - logger: AirbyteLogger, + logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None, @@ -198,7 +198,7 @@ def test_non_airbyte_secrets_are_not_masked_on_uncaught_exceptions(mocker, caplo class BrokenSource(MockSource): def read( self, - logger: AirbyteLogger, + logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None, From 15cd088e8a8ddf977bb92446f6880c0858626bdf Mon Sep 17 00:00:00 2001 From: Abhi Vaidyanatha Date: Fri, 14 Jan 2022 00:29:52 -0800 Subject: [PATCH 123/215] Clarify Connector Spec Caching. (#7664) --- docs/contributing-to-airbyte/developing-locally.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/contributing-to-airbyte/developing-locally.md b/docs/contributing-to-airbyte/developing-locally.md index 0ccabcda3dc6b..13c6412384978 100644 --- a/docs/contributing-to-airbyte/developing-locally.md +++ b/docs/contributing-to-airbyte/developing-locally.md @@ -136,7 +136,10 @@ npm start ### Connector Specification Caching -The Configuration API caches connector specifications. This is done to avoid needing to run docker everytime one is needed in the UI. Without this caching, the UI crawls. If you update the specification of a connector and you need to clear this cache so the API / UI pick up the change. You have two options: 1. Go to the Admin page in the UI and update the version of the connector. Updating to the same version will for the cache to clear for that connector. 1. Restart the server +The Configuration API caches connector specifications. This is done to avoid needing to run Docker everytime one is needed in the UI. Without this caching, the UI crawls. If you update the specification of a connector and need to clear this cache so the API / UI picks up the change, you have two options: + +1. Go to the Admin page in the UI and update the version of the connector. Updating to any version, including the one you're already on, will trigger clearing the cache. +2. Restart the server by running the following commands: ```bash VERSION=dev docker-compose down -v From 6a3cb920eeab1834d3c930c35486ac79dd25e636 Mon Sep 17 00:00:00 2001 From: Abhi Vaidyanatha Date: Fri, 14 Jan 2022 00:30:12 -0800 Subject: [PATCH 124/215] Improve clarity on namespaces documentation. (#7461) --- docs/understanding-airbyte/namespaces.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/understanding-airbyte/namespaces.md b/docs/understanding-airbyte/namespaces.md index 9c2ff5cd64ba1..3fe323cc6e1a3 100644 --- a/docs/understanding-airbyte/namespaces.md +++ b/docs/understanding-airbyte/namespaces.md @@ -6,16 +6,16 @@ The high-level overview contains all the information you need to use Namespaces when pulling from APIs. Information past that can be read for advanced or educational purposes. {% endhint %} -When looking through our connector docs, you'll notice that some sources and destinations support "Namespaces." These allow you to organize and separate your data into groups in the destination if the destination supports it. For example, in a database, a namespace could be a schema in the database. If your desired destination doesn't support it, you can ignore this feature. +When looking through our connector docs, you'll notice that some sources and destinations support "Namespaces." These allow you to organize and separate your data into groups in the destination if the destination supports it. In most cases, namespaces are schemas in the database you're replicating to. If your desired destination doesn't support it, you can ignore this feature. Note that this is the location that both your normalized and raw data will get written to. Your raw data will show up with the prefix `_airbyte_raw_` in the namespace you define. If you don't enable basic normalization, you will only receive the raw tables. If only your destination supports namespaces, you have two simple options. **This is the most likely case**, as all HTTP APIs currently don't support Namespaces. -1. Replicate to the default namespace in the destination, which will differ based on your destination. -2. Create a "Custom Format" to rename the namespace that your data will be replicated into. +1. Mirror Destination Settings - Replicate to the default namespace in the destination, which will differ based on your destination. +2. Custom Format - Create a "Custom Format" to rename the namespace that your data will be replicated into. -If both your desired source and destination support namespaces, your use case is probably replication-focused and more advanced, so continue reading. +If both your desired source and destination support namespaces, you're likely using a more advanced use case with a database as a source, so continue reading. ## What is a Namespace? From f552539619aa6c4b397e7def32279bd3d63a9c5f Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Fri, 14 Jan 2022 17:30:45 +0800 Subject: [PATCH 125/215] Upgrade to patched version. (#9506) --- .github/actions/start-aws-runner/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/start-aws-runner/action.yml b/.github/actions/start-aws-runner/action.yml index c9fab572b7171..ae3ba61873e50 100644 --- a/.github/actions/start-aws-runner/action.yml +++ b/.github/actions/start-aws-runner/action.yml @@ -40,7 +40,7 @@ runs: aws-region: us-east-2 - name: Start EC2 runner id: start-ec2-runner - uses: skyzh/ec2-github-runner@ba2298a67875dfdd29a88fafbc1ba27f4f94af39 + uses: machulav/ec2-github-runner@v2.3.2 with: mode: start github-token: ${{ inputs.github-token }} From 6fc9889460903d66d30d351b6dc3fb8f44f35c43 Mon Sep 17 00:00:00 2001 From: Baz Date: Fri, 14 Jan 2022 14:08:11 +0200 Subject: [PATCH 126/215] =?UTF-8?q?=F0=9F=90=9B=20=20Source=20Bing-Ads:=20?= =?UTF-8?q?fixed=20broken=20dependency=20(#9510)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../47f25999-dd5e-4636-8c39-e7cea2453331.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- airbyte-integrations/connectors/source-bing-ads/Dockerfile | 2 +- airbyte-integrations/connectors/source-bing-ads/setup.py | 2 +- docs/integrations/sources/bing-ads.md | 1 + 6 files changed, 6 insertions(+), 5 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/47f25999-dd5e-4636-8c39-e7cea2453331.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/47f25999-dd5e-4636-8c39-e7cea2453331.json index 6b06a621e092b..cc586a8c03d31 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/47f25999-dd5e-4636-8c39-e7cea2453331.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/47f25999-dd5e-4636-8c39-e7cea2453331.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "47f25999-dd5e-4636-8c39-e7cea2453331", "name": "Bing Ads", "dockerRepository": "airbyte/source-bing-ads", - "dockerImageTag": "0.1.2", + "dockerImageTag": "0.1.3", "documentationUrl": "https://docs.airbyte.io/integrations/sources/bing-ads", "icon": "bingads.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 17551d21c8e43..1302890f6e688 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -91,7 +91,7 @@ - name: Bing Ads sourceDefinitionId: 47f25999-dd5e-4636-8c39-e7cea2453331 dockerRepository: airbyte/source-bing-ads - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://docs.airbyte.io/integrations/sources/bing-ads icon: bingads.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index a95b9511c84db..ed9459b0c5bbc 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -736,7 +736,7 @@ - "overwrite" - "append" - "append_dedup" -- dockerImage: "airbyte/source-bing-ads:0.1.2" +- dockerImage: "airbyte/source-bing-ads:0.1.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/bing-ads" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-bing-ads/Dockerfile b/airbyte-integrations/connectors/source-bing-ads/Dockerfile index 57d8f148428a6..235027d466fc7 100644 --- a/airbyte-integrations/connectors/source-bing-ads/Dockerfile +++ b/airbyte-integrations/connectors/source-bing-ads/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/source-bing-ads diff --git a/airbyte-integrations/connectors/source-bing-ads/setup.py b/airbyte-integrations/connectors/source-bing-ads/setup.py index 1ab26067c25d3..1c86a5ade1b37 100644 --- a/airbyte-integrations/connectors/source-bing-ads/setup.py +++ b/airbyte-integrations/connectors/source-bing-ads/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk", "bingads==13.0.10", "vcrpy==4.1.1", "backoff==1.10.0", "pendulum==2.1.2"] +MAIN_REQUIREMENTS = ["airbyte-cdk", "bingads~=13.0.11", "vcrpy==4.1.1", "backoff==1.10.0", "pendulum==2.1.2"] TEST_REQUIREMENTS = [ "pytest~=6.1", diff --git a/docs/integrations/sources/bing-ads.md b/docs/integrations/sources/bing-ads.md index 4631f0970b073..d85234ad8c570 100644 --- a/docs/integrations/sources/bing-ads.md +++ b/docs/integrations/sources/bing-ads.md @@ -77,6 +77,7 @@ Be aware that `refresh token` will expire in 90 days. You need to repeat auth pr | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.3 | 2022-01-14 | [9510](https://github.com/airbytehq/airbyte/pull/9510) | Fixed broken dependency that blocked connector's operations | | 0.1.2 | 2021-12-14 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update titles and descriptions | | 0.1.1 | 2021-08-31 | [5750](https://github.com/airbytehq/airbyte/pull/5750) | Added reporting streams\) | | 0.1.0 | 2021-07-22 | [4911](https://github.com/airbytehq/airbyte/pull/4911) | Initial release supported core streams \(Accounts, Campaigns, Ads, AdGroups\) | From dbddd7b3c587f132d4e83f998031e3c81a450273 Mon Sep 17 00:00:00 2001 From: Christophe Duong Date: Fri, 14 Jan 2022 13:37:52 +0100 Subject: [PATCH 127/215] Add metadata to segment tracking (#8872) * Add metadata to segment tracking * Add sync start time * Fix test and format code --- .../analytics/SegmentTrackingClient.java | 3 +++ .../analytics/SegmentTrackingClientTest.java | 18 ++++++++++++++++-- .../AzureBlobStorageDestinationConfig.java | 4 ++-- .../csv/AzureBlobStorageCsvWriter.java | 1 - .../jsonl/AzureBlobStorageJsonlWriter.java | 1 - .../job_tracker/TrackingMetadata.java | 3 +++ .../scheduler/persistence/JobNotifierTest.java | 2 ++ .../job_factory/OAuthConfigSupplierTest.java | 3 +++ .../job_tracker/JobTrackerTest.java | 16 +++++++++++++++- 9 files changed, 44 insertions(+), 7 deletions(-) diff --git a/airbyte-analytics/src/main/java/io/airbyte/analytics/SegmentTrackingClient.java b/airbyte-analytics/src/main/java/io/airbyte/analytics/SegmentTrackingClient.java index 3599fb91fb708..575b101a471ab 100644 --- a/airbyte-analytics/src/main/java/io/airbyte/analytics/SegmentTrackingClient.java +++ b/airbyte-analytics/src/main/java/io/airbyte/analytics/SegmentTrackingClient.java @@ -11,6 +11,7 @@ import com.segment.analytics.messages.IdentifyMessage; import com.segment.analytics.messages.TrackMessage; import io.airbyte.config.StandardWorkspace; +import java.time.Instant; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -43,6 +44,7 @@ public class SegmentTrackingClient implements TrackingClient { private static final String SEGMENT_WRITE_KEY = "7UDdp5K55CyiGgsauOr2pNNujGvmhaeu"; private static final String AIRBYTE_VERSION_KEY = "airbyte_version"; private static final String AIRBYTE_ROLE = "airbyte_role"; + private static final String AIRBYTE_TRACKED_AT = "tracked_at"; // Analytics is threadsafe. private final Analytics analytics; @@ -116,6 +118,7 @@ public void track(final UUID workspaceId, final String action, final Map mapCopy.put("email", email)); } diff --git a/airbyte-analytics/src/test/java/io/airbyte/analytics/SegmentTrackingClientTest.java b/airbyte-analytics/src/test/java/io/airbyte/analytics/SegmentTrackingClientTest.java index a3164108fc35b..d6d0bb9eb0e94 100644 --- a/airbyte-analytics/src/test/java/io/airbyte/analytics/SegmentTrackingClientTest.java +++ b/airbyte-analytics/src/test/java/io/airbyte/analytics/SegmentTrackingClientTest.java @@ -5,11 +5,13 @@ package io.airbyte.analytics; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableMap.Builder; import com.segment.analytics.Analytics; import com.segment.analytics.messages.IdentifyMessage; import com.segment.analytics.messages.TrackMessage; @@ -17,6 +19,7 @@ import io.airbyte.config.Configs; import io.airbyte.config.Configs.WorkerEnvironment; import java.util.Map; +import java.util.Objects; import java.util.UUID; import java.util.function.Function; import java.util.function.Supplier; @@ -109,7 +112,7 @@ void testTrack() { final TrackMessage actual = mockBuilder.getValue().build(); assertEquals("jump", actual.event()); assertEquals(IDENTITY.getCustomerId().toString(), actual.userId()); - assertEquals(metadata, actual.properties()); + assertEquals(metadata, filterTrackedAtProperty(Objects.requireNonNull(actual.properties()))); } @Test @@ -127,7 +130,18 @@ void testTrackWithMetadata() { final TrackMessage actual = mockBuilder.getValue().build(); assertEquals("jump", actual.event()); assertEquals(IDENTITY.getCustomerId().toString(), actual.userId()); - assertEquals(metadata, actual.properties()); + assertEquals(metadata, filterTrackedAtProperty(Objects.requireNonNull(actual.properties()))); + } + + private static ImmutableMap filterTrackedAtProperty(final Map properties) { + assertTrue(properties.containsKey("tracked_at")); + final Builder builder = ImmutableMap.builder(); + properties.forEach((key, value) -> { + if (!key.equals("tracked_at")) { + builder.put(key, value); + } + }); + return builder.build(); } } diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java index 8d575214b678c..1cebcd4b600cd 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationConfig.java @@ -64,11 +64,11 @@ public static AzureBlobStorageDestinationConfig getAzureBlobStorageConfig(final final JsonNode endpointFromConfig = config .get("azure_blob_storage_endpoint_domain_name"); final JsonNode containerName = config.get("azure_blob_storage_container_name"); - final int outputStreamBufferSizeFromConfig = + final int outputStreamBufferSizeFromConfig = config.get("azure_blob_storage_output_buffer_size") != null ? config.get("azure_blob_storage_output_buffer_size").asInt(DEFAULT_STORAGE_OUTPUT_BUFFER_SIZE) : DEFAULT_STORAGE_OUTPUT_BUFFER_SIZE; - + final JsonNode blobName = config.get("azure_blob_storage_blob_name"); // streamId final String endpointComputed = String.format(Locale.ROOT, DEFAULT_STORAGE_ENDPOINT_FORMAT, diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java index 23e31bbf4d9ce..f31aaab64fa02 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java @@ -5,7 +5,6 @@ package io.airbyte.integrations.destination.azure_blob_storage.csv; import com.azure.storage.blob.specialized.AppendBlobClient; -import com.azure.storage.blob.specialized.BlobOutputStream; import io.airbyte.integrations.destination.azure_blob_storage.AzureBlobStorageDestinationConfig; import io.airbyte.integrations.destination.azure_blob_storage.writer.AzureBlobStorageWriter; import io.airbyte.integrations.destination.azure_blob_storage.writer.BaseAzureBlobStorageWriter; diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java index 6a0406be7a7e2..77ed63b7dee63 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java @@ -5,7 +5,6 @@ package io.airbyte.integrations.destination.azure_blob_storage.jsonl; import com.azure.storage.blob.specialized.AppendBlobClient; -import com.azure.storage.blob.specialized.BlobOutputStream; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import com.fasterxml.jackson.databind.node.ObjectNode; diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/TrackingMetadata.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/TrackingMetadata.java index 73c951c08ae1f..4de7b58b3c549 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/TrackingMetadata.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/TrackingMetadata.java @@ -66,6 +66,7 @@ public static ImmutableMap generateDestinationDefinitionMetadata final Builder metadata = ImmutableMap.builder(); metadata.put("connector_destination", destinationDefinition.getName()); metadata.put("connector_destination_definition_id", destinationDefinition.getDestinationDefinitionId()); + metadata.put("connector_destination_docker_repository", destinationDefinition.getDockerRepository()); final String imageTag = destinationDefinition.getDockerImageTag(); if (!Strings.isEmpty(imageTag)) { metadata.put("connector_destination_version", imageTag); @@ -77,6 +78,7 @@ public static ImmutableMap generateSourceDefinitionMetadata(fina final Builder metadata = ImmutableMap.builder(); metadata.put("connector_source", sourceDefinition.getName()); metadata.put("connector_source_definition_id", sourceDefinition.getSourceDefinitionId()); + metadata.put("connector_source_docker_repository", sourceDefinition.getDockerRepository()); final String imageTag = sourceDefinition.getDockerImageTag(); if (!Strings.isEmpty(imageTag)) { metadata.put("connector_source_version", imageTag); @@ -94,6 +96,7 @@ public static ImmutableMap generateJobAttemptMetadata(final Job final JobOutput jobOutput = lastAttempt.getOutput().get(); if (jobOutput.getSync() != null) { final StandardSyncSummary syncSummary = jobOutput.getSync().getStandardSyncSummary(); + metadata.put("sync_start_time", syncSummary.getStartTime()); metadata.put("duration", Math.round((syncSummary.getEndTime() - syncSummary.getStartTime()) / 1000.0)); metadata.put("volume_mb", syncSummary.getBytesSynced()); metadata.put("volume_rows", syncSummary.getRecordsSynced()); diff --git a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobNotifierTest.java b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobNotifierTest.java index f55a8d6a5876b..83385d404ffbe 100644 --- a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobNotifierTest.java +++ b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/JobNotifierTest.java @@ -99,9 +99,11 @@ void testFailJob() throws IOException, InterruptedException, JsonValidationExcep metadata.put("connector_source_definition_id", sourceDefinition.getSourceDefinitionId()); metadata.put("connector_source", "source-test"); metadata.put("connector_source_version", TEST_DOCKER_TAG); + metadata.put("connector_source_docker_repository", sourceDefinition.getDockerRepository()); metadata.put("connector_destination_definition_id", destinationDefinition.getDestinationDefinitionId()); metadata.put("connector_destination", "destination-test"); metadata.put("connector_destination_version", TEST_DOCKER_TAG); + metadata.put("connector_destination_docker_repository", destinationDefinition.getDockerRepository()); metadata.put("notification_type", NotificationType.SLACK); verify(trackingClient).track(WORKSPACE_ID, JobNotifier.FAILURE_NOTIFICATION, metadata.build()); } diff --git a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_factory/OAuthConfigSupplierTest.java b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_factory/OAuthConfigSupplierTest.java index 3c375a217cfc3..9a08190f0c562 100644 --- a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_factory/OAuthConfigSupplierTest.java +++ b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_factory/OAuthConfigSupplierTest.java @@ -141,6 +141,7 @@ public void testOAuthFullInjectionBecauseNoOAuthSpec() throws JsonValidationExce .thenReturn(new StandardSourceDefinition() .withSourceDefinitionId(sourceDefinitionId) .withName("test") + .withDockerRepository("test/test") .withDockerImageTag("dev") .withSpec(null)); setupOAuthParamMocks(oauthParameters); @@ -222,6 +223,7 @@ private void setupStandardDefinitionMock(final AdvancedAuth advancedAuth) throws when(configRepository.getStandardSourceDefinition(any())).thenReturn(new StandardSourceDefinition() .withSourceDefinitionId(sourceDefinitionId) .withName("test") + .withDockerRepository("test/test") .withDockerImageTag("dev") .withSpec(new ConnectorSpecification().withAdvancedAuth(advancedAuth))); } @@ -277,6 +279,7 @@ private void assertTracking(final UUID workspaceId) { verify(trackingClient, times(1)).track(workspaceId, "OAuth Injection - Backend", Map.of( "connector_source", "test", "connector_source_definition_id", sourceDefinitionId, + "connector_source_docker_repository", "test/test", "connector_source_version", "dev")); } diff --git a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_tracker/JobTrackerTest.java b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_tracker/JobTrackerTest.java index a6096f950c8c4..3e5bf9a0b0cd6 100644 --- a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_tracker/JobTrackerTest.java +++ b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_tracker/JobTrackerTest.java @@ -65,7 +65,8 @@ class JobTrackerTest { private static final UUID CONNECTION_ID = UUID.randomUUID(); private static final String SOURCE_DEF_NAME = "postgres"; private static final String DESTINATION_DEF_NAME = "bigquery"; - public static final String CONNECTOR_VERSION = "test"; + private static final String CONNECTOR_REPOSITORY = "test/test"; + private static final String CONNECTOR_VERSION = "test"; private static final long SYNC_START_TIME = 1000L; private static final long SYNC_END_TIME = 10000L; private static final long SYNC_DURATION = 9L; // in sync between end and start time @@ -84,6 +85,7 @@ class JobTrackerTest { .put("attempt_completion_status", JobState.FAILED) .build(); private static final ImmutableMap ATTEMPT_METADATA = ImmutableMap.builder() + .put("sync_start_time", SYNC_START_TIME) .put("duration", SYNC_DURATION) .put("volume_rows", SYNC_RECORDS_SYNC) .put("volume_mb", SYNC_BYTES_SYNC) @@ -122,6 +124,7 @@ void testTrackCheckConnectionSource() throws ConfigNotFoundException, IOExceptio .put("attempt_id", 0) .put("connector_source", SOURCE_DEF_NAME) .put("connector_source_definition_id", UUID1) + .put("connector_source_docker_repository", CONNECTOR_REPOSITORY) .put("connector_source_version", CONNECTOR_VERSION) .build(); @@ -129,6 +132,7 @@ void testTrackCheckConnectionSource() throws ConfigNotFoundException, IOExceptio .thenReturn(new StandardSourceDefinition() .withSourceDefinitionId(UUID1) .withName(SOURCE_DEF_NAME) + .withDockerRepository(CONNECTOR_REPOSITORY) .withDockerImageTag(CONNECTOR_VERSION)); when(configRepository.getStandardWorkspace(WORKSPACE_ID, true)) .thenReturn(new StandardWorkspace().withWorkspaceId(WORKSPACE_ID).withName(WORKSPACE_NAME)); @@ -150,6 +154,7 @@ void testTrackCheckConnectionDestination() throws ConfigNotFoundException, IOExc .put("attempt_id", 0) .put("connector_destination", DESTINATION_DEF_NAME) .put("connector_destination_definition_id", UUID2) + .put("connector_destination_docker_repository", CONNECTOR_REPOSITORY) .put("connector_destination_version", CONNECTOR_VERSION) .build(); @@ -157,6 +162,7 @@ void testTrackCheckConnectionDestination() throws ConfigNotFoundException, IOExc .thenReturn(new StandardDestinationDefinition() .withDestinationDefinitionId(UUID2) .withName(DESTINATION_DEF_NAME) + .withDockerRepository(CONNECTOR_REPOSITORY) .withDockerImageTag(CONNECTOR_VERSION)); when(configRepository.getStandardWorkspace(WORKSPACE_ID, true)) .thenReturn(new StandardWorkspace().withWorkspaceId(WORKSPACE_ID).withName(WORKSPACE_NAME)); @@ -178,6 +184,7 @@ void testTrackDiscover() throws ConfigNotFoundException, IOException, JsonValida .put("attempt_id", 0) .put("connector_source", SOURCE_DEF_NAME) .put("connector_source_definition_id", UUID1) + .put("connector_source_docker_repository", CONNECTOR_REPOSITORY) .put("connector_source_version", CONNECTOR_VERSION) .build(); @@ -185,6 +192,7 @@ void testTrackDiscover() throws ConfigNotFoundException, IOException, JsonValida .thenReturn(new StandardSourceDefinition() .withSourceDefinitionId(UUID1) .withName(SOURCE_DEF_NAME) + .withDockerRepository(CONNECTOR_REPOSITORY) .withDockerImageTag(CONNECTOR_VERSION)); when(configRepository.getStandardWorkspace(WORKSPACE_ID, true)) .thenReturn(new StandardWorkspace().withWorkspaceId(WORKSPACE_ID).withName(WORKSPACE_NAME)); @@ -296,22 +304,26 @@ private Job getJobMock(final ConfigType configType, final long jobId) throws Con .thenReturn(new StandardSourceDefinition() .withSourceDefinitionId(UUID1) .withName(SOURCE_DEF_NAME) + .withDockerRepository(CONNECTOR_REPOSITORY) .withDockerImageTag(CONNECTOR_VERSION)); when(configRepository.getDestinationDefinitionFromConnection(CONNECTION_ID)) .thenReturn(new StandardDestinationDefinition() .withDestinationDefinitionId(UUID2) .withName(DESTINATION_DEF_NAME) + .withDockerRepository(CONNECTOR_REPOSITORY) .withDockerImageTag(CONNECTOR_VERSION)); when(configRepository.getStandardSourceDefinition(UUID1)) .thenReturn(new StandardSourceDefinition() .withSourceDefinitionId(UUID1) .withName(SOURCE_DEF_NAME) + .withDockerRepository(CONNECTOR_REPOSITORY) .withDockerImageTag(CONNECTOR_VERSION)); when(configRepository.getStandardDestinationDefinition(UUID2)) .thenReturn(new StandardDestinationDefinition() .withDestinationDefinitionId(UUID2) .withName(DESTINATION_DEF_NAME) + .withDockerRepository(CONNECTOR_REPOSITORY) .withDockerImageTag(CONNECTOR_VERSION)); final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of( @@ -368,9 +380,11 @@ private ImmutableMap getJobMetadata(final ConfigType configType, .put("connection_id", CONNECTION_ID) .put("connector_source", SOURCE_DEF_NAME) .put("connector_source_definition_id", UUID1) + .put("connector_source_docker_repository", CONNECTOR_REPOSITORY) .put("connector_source_version", CONNECTOR_VERSION) .put("connector_destination", DESTINATION_DEF_NAME) .put("connector_destination_definition_id", UUID2) + .put("connector_destination_docker_repository", CONNECTOR_REPOSITORY) .put("connector_destination_version", CONNECTOR_VERSION) .put("namespace_definition", NamespaceDefinitionType.SOURCE) .put("table_prefix", false) From bca9499495e3604bb5a78a280dd6a4412ce358d2 Mon Sep 17 00:00:00 2001 From: VitaliiMaltsev <39538064+VitaliiMaltsev@users.noreply.github.com> Date: Fri, 14 Jan 2022 18:17:37 +0200 Subject: [PATCH 128/215] Destination Snowflake add config file to test resources (#9518) * fix for jdk 17 * add config file * add config file Co-authored-by: vmaltsev --- .../snowflake/SnowflakeDestinationTest.java | 12 +++--------- .../src/test/resources/insert_config.json | 9 +++++++++ 2 files changed, 12 insertions(+), 9 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-snowflake/src/test/resources/insert_config.json diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java index 021fa9ed88feb..bfa451bbd0c9b 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java @@ -14,16 +14,11 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.base.AirbyteMessageConsumer; import io.airbyte.integrations.base.Destination; -import io.airbyte.protocol.models.AirbyteMessage; -import io.airbyte.protocol.models.AirbyteRecordMessage; -import io.airbyte.protocol.models.CatalogHelpers; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.DestinationSyncMode; -import io.airbyte.protocol.models.Field; -import io.airbyte.protocol.models.JsonSchemaPrimitive; +import io.airbyte.protocol.models.*; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; @@ -81,8 +76,7 @@ public void testCleanupStageOnFailure() throws Exception { JdbcDatabase mockDb = mock(JdbcDatabase.class); SnowflakeStagingSqlOperations sqlOperations = mock(SnowflakeStagingSqlOperations.class); final var testMessages = generateTestMessages(); - final JsonNode config = Jsons.deserialize(IOs.readFile(Path.of("secrets/insert_config.json"))); - + final JsonNode config = Jsons.deserialize(MoreResources.readResource("insert_config.json"), JsonNode.class); AirbyteMessageConsumer airbyteMessageConsumer = new SnowflakeInternalStagingConsumerFactory() .create(Destination::defaultOutputRecordCollector, mockDb, sqlOperations, new SnowflakeSQLNameTransformer(), config, getCatalog()); diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/resources/insert_config.json b/airbyte-integrations/connectors/destination-snowflake/src/test/resources/insert_config.json new file mode 100644 index 0000000000000..44d984364cb48 --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/resources/insert_config.json @@ -0,0 +1,9 @@ +{ + "host": "testhost.snowflakecomputing.com", + "role": "AIRBYTE_ROLE", + "warehouse": "AIRBYTE_WAREHOUSE", + "database": "AIRBYTE_DATABASE", + "schema": "AIRBYTE_SCHEMA", + "username": "AIRBYTE_INTEGRATION_TEST", + "password": "testPass" +} \ No newline at end of file From 01d64861b1bc01d67a3283ef039a7e9244d3464d Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Sat, 15 Jan 2022 01:25:25 +0800 Subject: [PATCH 129/215] Limit release workflow to run one at a time. (#9507) Limit the release workflow to only one with a queue of capacity one. This should help reduce the chance of release clashing. --- .github/workflows/release-airbyte-os.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/release-airbyte-os.yml b/.github/workflows/release-airbyte-os.yml index 3c1f4e58024d6..c7ccc2d7c6ece 100644 --- a/.github/workflows/release-airbyte-os.yml +++ b/.github/workflows/release-airbyte-os.yml @@ -1,4 +1,6 @@ name: Release Open Source Airbyte +concurrency: release-airbyte + on: workflow_dispatch: inputs: From e3313875a3a61028b0748fe7de9f619821894777 Mon Sep 17 00:00:00 2001 From: dluftspring Date: Fri, 14 Jan 2022 13:21:45 -0500 Subject: [PATCH 130/215] =?UTF-8?q?=F0=9F=8E=89=20=20New=20Source:=20Hello?= =?UTF-8?q?baton=20(#8461)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Connector working through read method for hellobaton source * Check connection method fixes * Cleaning up directories and files +activity stream * Changing webapp port for testing * More testing ports * Ports ports ports * porto * Reverting port changes * Updating activity stream schema and config catalog * Fixing flake lint failures * fix schemas * correct schemas and acceptance tests * run format * bump version in config files * Docs page for hellobaton * add doc * run format Co-authored-by: Marcos Marx --- .../492b56d1-937c-462e-8076-21ad2031e784.json | 7 + .../resources/seed/source_definitions.yaml | 6 + .../src/main/resources/seed/source_specs.yaml | 26 + .../snowflake/SnowflakeDestinationTest.java | 50 +- .../source-hellobaton/.dockerignore | 7 + .../connectors/source-hellobaton/Dockerfile | 38 + .../connectors/source-hellobaton/README.md | 132 +++ .../acceptance-test-config.yml | 20 + .../acceptance-test-docker.sh | 16 + .../connectors/source-hellobaton/build.gradle | 9 + .../integration_tests/__init__.py | 3 + .../integration_tests/abnormal_state.json | 5 + .../integration_tests/acceptance.py | 16 + .../integration_tests/catalog.json | 9 + .../integration_tests/configured_catalog.json | 103 +++ .../integration_tests/invalid_config.json | 4 + .../integration_tests/sample_config.json | 4 + .../integration_tests/sample_state.json | 5 + .../connectors/source-hellobaton/main.py | 13 + .../source-hellobaton/requirements.txt | 2 + .../sample_files/configured_catalog.json | 815 ++++++++++++++++++ .../sample_files/sample_config.json | 4 + .../connectors/source-hellobaton/setup.py | 29 + .../source_hellobaton/__init__.py | 8 + .../source_hellobaton/schemas/activity.json | 48 ++ .../source_hellobaton/schemas/companies.json | 32 + .../source_hellobaton/schemas/milestones.json | 95 ++ .../source_hellobaton/schemas/phases.json | 27 + .../schemas/project_attachments.json | 42 + .../source_hellobaton/schemas/projects.json | 137 +++ .../schemas/task_attachments.json | 49 ++ .../source_hellobaton/schemas/tasks.json | 116 +++ .../source_hellobaton/schemas/templates.json | 84 ++ .../schemas/time_entries.json | 60 ++ .../source_hellobaton/schemas/users.json | 49 ++ .../source_hellobaton/source.py | 59 ++ .../source_hellobaton/spec.json | 22 + .../source_hellobaton/streams.py | 204 +++++ docs/integrations/sources/hellobaton.md | 50 ++ 39 files changed, 2386 insertions(+), 19 deletions(-) create mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/492b56d1-937c-462e-8076-21ad2031e784.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/.dockerignore create mode 100644 airbyte-integrations/connectors/source-hellobaton/Dockerfile create mode 100644 airbyte-integrations/connectors/source-hellobaton/README.md create mode 100644 airbyte-integrations/connectors/source-hellobaton/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-hellobaton/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-hellobaton/build.gradle create mode 100644 airbyte-integrations/connectors/source-hellobaton/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-hellobaton/integration_tests/abnormal_state.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-hellobaton/integration_tests/catalog.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/integration_tests/sample_state.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/main.py create mode 100644 airbyte-integrations/connectors/source-hellobaton/requirements.txt create mode 100644 airbyte-integrations/connectors/source-hellobaton/sample_files/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/sample_files/sample_config.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/setup.py create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/__init__.py create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/activity.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/companies.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/milestones.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/phases.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/project_attachments.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/projects.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/task_attachments.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/tasks.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/templates.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/time_entries.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/users.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/source.py create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/spec.json create mode 100644 airbyte-integrations/connectors/source-hellobaton/source_hellobaton/streams.py create mode 100644 docs/integrations/sources/hellobaton.md diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/492b56d1-937c-462e-8076-21ad2031e784.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/492b56d1-937c-462e-8076-21ad2031e784.json new file mode 100644 index 0000000000000..6f5f3febef56b --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/492b56d1-937c-462e-8076-21ad2031e784.json @@ -0,0 +1,7 @@ +{ + "sourceDefinitionId": "492b56d1-937c-462e-8076-21ad2031e784", + "name": "Hellobaton", + "dockerRepository": "airbyte/source-hellobaton", + "dockerImageTag": "0.1.0", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/hellobaton" +} diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 1302890f6e688..7df59d480d51e 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -284,6 +284,12 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/harvest icon: harvest.svg sourceType: api +- name: Hellobaton + sourceDefinitionId: 492b56d1-937c-462e-8076-21ad2031e784 + dockerRepository: airbyte/source-hellobaton + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/hellobaton + sourceType: api - name: HubSpot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index ed9459b0c5bbc..4909346bbea47 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2846,6 +2846,32 @@ path_in_connector_config: - "credentials" - "client_secret" +- dockerImage: "airbyte/source-hellobaton:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Hellobaton Spec" + type: "object" + required: + - "api_key" + - "company" + additionalProperties: false + properties: + api_key: + type: "string" + description: "authentication key required to access the api endpoints" + airbyte_secret: true + company: + type: "string" + description: "Company name that generates your base api url" + examples: + - "google" + - "facebook" + - "microsoft" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] - dockerImage: "airbyte/source-hubspot:0.1.32" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java index bfa451bbd0c9b..8125613834c00 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java @@ -18,16 +18,28 @@ import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.base.AirbyteMessageConsumer; import io.airbyte.integrations.base.Destination; +<<<<<<< HEAD +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.DestinationSyncMode; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaPrimitive; +======= import io.airbyte.protocol.models.*; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; +>>>>>>> master import java.nio.file.Path; import java.sql.SQLException; import java.time.Instant; import java.util.List; import java.util.stream.Collectors; import java.util.stream.IntStream; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; public class SnowflakeDestinationTest { @@ -78,40 +90,40 @@ public void testCleanupStageOnFailure() throws Exception { final var testMessages = generateTestMessages(); final JsonNode config = Jsons.deserialize(MoreResources.readResource("insert_config.json"), JsonNode.class); AirbyteMessageConsumer airbyteMessageConsumer = new SnowflakeInternalStagingConsumerFactory() - .create(Destination::defaultOutputRecordCollector, mockDb, + .create(Destination::defaultOutputRecordCollector, mockDb, sqlOperations, new SnowflakeSQLNameTransformer(), config, getCatalog()); - doThrow(SQLException.class).when(sqlOperations).copyIntoTmpTableFromStage(any(),anyString(),anyString(),anyString()); + doThrow(SQLException.class).when(sqlOperations).copyIntoTmpTableFromStage(any(), anyString(), anyString(), anyString()); airbyteMessageConsumer.start(); for (AirbyteMessage m : testMessages) { - airbyteMessageConsumer.accept(m); + airbyteMessageConsumer.accept(m); } assertThrows(RuntimeException.class, airbyteMessageConsumer::close); - verify(sqlOperations, times(1)).cleanUpStage(any(),anyString()); + verify(sqlOperations, times(1)).cleanUpStage(any(), anyString()); } private List generateTestMessages() { return IntStream.range(0, 3) - .boxed() - .map(i -> new AirbyteMessage() - .withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage() - .withStream("test") - .withNamespace("test_staging") - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "human " + i))))) - .collect(Collectors.toList()); + .boxed() + .map(i -> new AirbyteMessage() + .withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream("test") + .withNamespace("test_staging") + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "human " + i))))) + .collect(Collectors.toList()); } ConfiguredAirbyteCatalog getCatalog() { return new ConfiguredAirbyteCatalog().withStreams(List.of( - CatalogHelpers.createConfiguredAirbyteStream( - "test", - "test_staging", - Field.of("id", JsonSchemaPrimitive.NUMBER), - Field.of("name", JsonSchemaPrimitive.STRING)) - .withDestinationSyncMode(DestinationSyncMode.OVERWRITE))); + CatalogHelpers.createConfiguredAirbyteStream( + "test", + "test_staging", + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)) + .withDestinationSyncMode(DestinationSyncMode.OVERWRITE))); } } diff --git a/airbyte-integrations/connectors/source-hellobaton/.dockerignore b/airbyte-integrations/connectors/source-hellobaton/.dockerignore new file mode 100644 index 0000000000000..2e018bfa07087 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_hellobaton +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-hellobaton/Dockerfile b/airbyte-integrations/connectors/source-hellobaton/Dockerfile new file mode 100644 index 0000000000000..f8bf6c037c6e1 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_hellobaton ./source_hellobaton + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-hellobaton diff --git a/airbyte-integrations/connectors/source-hellobaton/README.md b/airbyte-integrations/connectors/source-hellobaton/README.md new file mode 100644 index 0000000000000..1b3bc774802a2 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/README.md @@ -0,0 +1,132 @@ +# Hellobaton Source + +This is the repository for the Hellobaton source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/hellobaton). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-hellobaton:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/hellobaton) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_hellobaton/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source hellobaton test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-hellobaton:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-hellobaton:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-hellobaton:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-hellobaton:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-hellobaton:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-hellobaton:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-hellobaton:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-hellobaton:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-hellobaton/acceptance-test-config.yml b/airbyte-integrations/connectors/source-hellobaton/acceptance-test-config.yml new file mode 100644 index 0000000000000..79787e5b96207 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/acceptance-test-config.yml @@ -0,0 +1,20 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-hellobaton:dev +tests: + spec: + - spec_path: "source_hellobaton/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: ["templates", "time_entries"] + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-hellobaton/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-hellobaton/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-hellobaton/build.gradle b/airbyte-integrations/connectors/source-hellobaton/build.gradle new file mode 100644 index 0000000000000..55c27d3615676 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_hellobaton' +} diff --git a/airbyte-integrations/connectors/source-hellobaton/integration_tests/__init__.py b/airbyte-integrations/connectors/source-hellobaton/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-hellobaton/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-hellobaton/integration_tests/abnormal_state.json new file mode 100644 index 0000000000000..52b0f2c2118f4 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "todo-abnormal-value" + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-hellobaton/integration_tests/acceptance.py new file mode 100644 index 0000000000000..056971f954502 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-hellobaton/integration_tests/catalog.json b/airbyte-integrations/connectors/source-hellobaton/integration_tests/catalog.json new file mode 100644 index 0000000000000..db86d334a63d8 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/integration_tests/catalog.json @@ -0,0 +1,9 @@ +{ + "streams": [ + { + "name": "tasks", + "supported_sync_modes": ["full_refresh"], + "json_schema": {} + } + ] +} diff --git a/airbyte-integrations/connectors/source-hellobaton/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-hellobaton/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..8261b74756911 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/integration_tests/configured_catalog.json @@ -0,0 +1,103 @@ +{ + "streams": [ + { + "stream": { + "name": "activity", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "companies", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "milestones", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "phases", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "project_attachments", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "projects", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "task_attachments", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "tasks", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "templates", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "time_entries", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "users", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-hellobaton/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-hellobaton/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..792b2d6ed172b --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/integration_tests/invalid_config.json @@ -0,0 +1,4 @@ +{ + "api_key": "invalid-api-key", + "company": "non-valid" +} diff --git a/airbyte-integrations/connectors/source-hellobaton/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-hellobaton/integration_tests/sample_config.json new file mode 100644 index 0000000000000..792b2d6ed172b --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/integration_tests/sample_config.json @@ -0,0 +1,4 @@ +{ + "api_key": "invalid-api-key", + "company": "non-valid" +} diff --git a/airbyte-integrations/connectors/source-hellobaton/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-hellobaton/integration_tests/sample_state.json new file mode 100644 index 0000000000000..3587e579822d0 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/integration_tests/sample_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "value" + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/main.py b/airbyte-integrations/connectors/source-hellobaton/main.py new file mode 100644 index 0000000000000..978a73078c1b6 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_hellobaton import SourceHellobaton + +if __name__ == "__main__": + source = SourceHellobaton() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-hellobaton/requirements.txt b/airbyte-integrations/connectors/source-hellobaton/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-hellobaton/sample_files/configured_catalog.json b/airbyte-integrations/connectors/source-hellobaton/sample_files/configured_catalog.json new file mode 100644 index 0000000000000..0a6393a2c4ecf --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/sample_files/configured_catalog.json @@ -0,0 +1,815 @@ +{ + "streams": [ + { + "stream": { + "name": "activity", + "json_schema": { + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "type": { + "type": ["string", "null"] + }, + "group": { + "type": "string" + }, + "parent": { + "type": ["string", "null"] + }, + "child": { + "type": ["string", "null"] + }, + "actor": { + "type": "string" + }, + "project": { + "type": "string" + }, + "parent_type": { + "type": "string" + }, + "child_type": { + "type": "string" + }, + "meta": { + "type": ["object", "null"] + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "user_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "companies", + "json_schema": { + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "name": { + "type": "string" + }, + "type": { + "type": "string" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "user_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "milestones", + "json_schema": { + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "title": { + "type": "string" + }, + "description": { + "type": ["string", "null"] + }, + "project": { + "type": "string" + }, + "task_list": { + "type": "string" + }, + "phase": { + "type": ["object", "null"], + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "name": { + "type": "string" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + }, + "deadline_fixed": { + "type": "boolean" + }, + "deadline_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "risk_profiles": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "risk_level": { + "type": "string" + }, + "formula": { + "type": "string" + }, + "over_run": { + "type": "integer" + } + } + } + }, + "start_datetime": { + "type": "string" + }, + "finish_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "created_from": { + "type": ["string", "null"] + }, + "duration": { + "type": "integer" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "user_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "projects", + "json_schema": { + "properties": { + "_self": { + "type": "string" + }, + "annual_contract_value": { + "type": "string" + }, + "attachment_list": { + "type": "string" + }, + "client_systems": { + "type": ["string", "null"] + }, + "companies": { + "type": "array", + "items": { + "type": "string" + } + }, + "completed_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "cost": { + "type": ["integer", "null"] + }, + "created": { + "type": "string", + "format": "date-time" + }, + "created_from": { + "type": ["string", "null"] + }, + "created_from_template": { + "type": ["string", "null"] + }, + "creator": { + "type": ["string", "null"] + }, + "deadline_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "estimated_duration": { + "type": ["integer", "null"] + }, + "id": { + "type": "integer" + }, + "implementation_budget": { + "type": "string" + }, + "milestone_list": { + "type": "string" + }, + "modified": { + "type": ["string", "null"], + "format": "date-time" + }, + "phase": { + "type": ["object", "null"], + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "name": { + "type": ["string", "null"] + }, + "order": { + "type": "integer" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + }, + "risk_profiles": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "risk_score": { + "type": "number" + }, + "level": { + "type": "string" + }, + "variance": { + "type": "integer" + }, + "formula": { + "type": "string" + }, + "projected_golive_datetime": { + "type": ["string", "null"], + "format": "date-time" + } + } + } + }, + "start_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "started_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "status": { + "type": "string" + }, + "task_list": { + "type": "string" + }, + "time_entry_list": { + "type": "string" + }, + "title": { + "type": "string" + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "user_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "phases", + "json_schema": { + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "name": { + "type": ["string", "null"] + }, + "order": { + "type": "integer" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "user_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "project_attachments", + "json_schema": { + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "project": { + "type": "string" + }, + "url": { + "type": "string" + }, + "label": { + "type": ["string", "null"] + }, + "created_by": { + "type": "string" + }, + "type": { + "type": "string" + }, + "is_sow": { + "type": "boolean" + }, + "original_filename": { + "type": ["string", "null"] + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "datetime" + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "user_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "tasks", + "json_schema": { + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "title": { + "type": "string" + }, + "description": { + "type": ["string", "null"] + }, + "project": { + "type": "string" + }, + "status": { + "type": "string" + }, + "dependency": { + "type": ["string", "null"] + }, + "start_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "due_datetime": { + "type": "string", + "format": "date-time" + }, + "started_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "finished_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "started_overridden_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "finished_overridden_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "estimated_duration": { + "type": ["integer", "null"] + }, + "milestone": { + "type": "string" + }, + "created_by": { + "type": ["string", "null"] + }, + "assigned_to": { + "type": ["string", "null"] + }, + "created_from": { + "type": ["string", "null"] + }, + "risk_profiles": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "risk_level": { + "type": "string" + }, + "formula": { + "type": "string" + }, + "over_run": { + "type": "integer" + }, + "task_variance": { + "type": "integer" + }, + "cool_down": { + "type": "integer" + }, + "reason": { + "type": "integer" + }, + "duration": { + "type": "integer" + }, + "estimated_duration": { + "type": "integer" + } + } + } + }, + "time_entry_list": { + "type": "string" + }, + "attachment_list": { + "type": "string" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "user_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "task_attachments", + "json_schema": { + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "task": { + "type": "string" + }, + "url": { + "type": "string" + }, + "type": { + "type": "string" + }, + "label": { + "type": ["string", "null"] + }, + "deliverable": { + "type": "boolean" + }, + "requires_approval": { + "type": "boolean" + }, + "approved": { + "type": ["boolean", "null"] + }, + "revision_task": { + "type": ["string", "null"] + }, + "original_filename": { + "type": ["string", "null"] + }, + "created_by": { + "type": ["string", "null"] + }, + "created": { + "type": "string" + }, + "modified": { + "type": "string" + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "user_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "templates", + "json_schema": { + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "title": { + "type": "string" + }, + "status": { + "type": "string" + }, + "cost": { + "type": ["string", "null"] + }, + "annual_contract_value": { + "type": ["string", "null"] + }, + "implementation_budget": { + "type": ["string", "null"] + }, + "estimated_duration": { + "type": ["integer", "null"] + }, + "created_from_template": { + "type": ["string", "null"] + }, + "created_from": { + "type": ["string", "null"] + }, + "start_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "started_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "deadline_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "completed_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "client_systems": { + "type": ["string", "null"] + }, + "phase": { + "type": ["object", "null"], + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "order": { + "type": "integer" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + }, + "creator": { + "type": ["string", "null"] + }, + "task_list": { + "type": "string" + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "user_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "time_entries", + "json_schema": { + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "user": { + "type": "string" + }, + "created_by": { + "type": "string" + }, + "project": { + "type": "string" + }, + "task": { + "type": ["string", "null"] + }, + "rate": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "hourly_rate": { + "type": "string" + } + } + }, + "started_at": { + "type": ["string", "null"], + "format": "date-time" + }, + "ended_at": { + "type": ["string", "null"], + "format": "date-time" + }, + "reference_date": { + "type": "string", + "format": "date-time" + }, + "billable": { + "type": "boolean" + }, + "calculated_duration": { + "type": ["integer", "null"] + }, + "inputted_duration": { + "type": ["integer", "null"] + }, + "notes": { + "type": "string" + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "user_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "users", + "json_schema": { + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "first_name": { + "type": "string" + }, + "last_name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "account_type": { + "type": "string" + }, + "job_title": { + "type": "string" + }, + "company": { + "type": "string" + }, + "avatar_url": { + "type": ["string", "null"] + }, + "created_by": { + "type": ["string", "null"] + }, + "signed_up_at": { + "type": ["string", "null"], + "format": "date-time" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "user_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-hellobaton/sample_files/sample_config.json b/airbyte-integrations/connectors/source-hellobaton/sample_files/sample_config.json new file mode 100644 index 0000000000000..cc58f6f0f835c --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/sample_files/sample_config.json @@ -0,0 +1,4 @@ +{ + "company": "google", + "api_key": "" +} diff --git a/airbyte-integrations/connectors/source-hellobaton/setup.py b/airbyte-integrations/connectors/source-hellobaton/setup.py new file mode 100644 index 0000000000000..eda15c9efb4b2 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.1", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_hellobaton", + description="Source implementation for Hellobaton.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/__init__.py b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/__init__.py new file mode 100644 index 0000000000000..9137ca503a673 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceHellobaton + +__all__ = ["SourceHellobaton"] diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/activity.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/activity.json new file mode 100644 index 0000000000000..cc4a631b4d81f --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/activity.json @@ -0,0 +1,48 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "type": { + "type": ["string", "null"] + }, + "group": { + "type": "string" + }, + "parent": { + "type": ["string", "null"] + }, + "child": { + "type": ["string", "null"] + }, + "actor": { + "type": ["string", "null"] + }, + "project": { + "type": ["string", "null"] + }, + "parent_type": { + "type": ["string", "null"] + }, + "child_type": { + "type": ["string", "null"] + }, + "meta": { + "type": ["object", "null"] + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/companies.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/companies.json new file mode 100644 index 0000000000000..ebbb25699ee35 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/companies.json @@ -0,0 +1,32 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "id": { + "type": "integer" + }, + + "_self": { + "type": "string" + }, + + "name": { + "type": "string" + }, + + "type": { + "type": "string" + }, + + "created": { + "type": "string", + "format": "date-time" + }, + + "modified": { + "type": "string", + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/milestones.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/milestones.json new file mode 100644 index 0000000000000..16e0f0060b470 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/milestones.json @@ -0,0 +1,95 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "title": { + "type": "string" + }, + "description": { + "type": ["string", "null"] + }, + "project": { + "type": "string" + }, + "task_list": { + "type": "string" + }, + "phase": { + "type": ["object", "null"], + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "name": { + "type": "string" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + }, + "deadline_fixed": { + "type": "boolean" + }, + "deadline_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "risk_profiles": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "risk_level": { + "type": "string" + }, + "formula": { + "type": "string" + }, + "over_run": { + "type": "integer" + } + } + } + }, + "start_datetime": { + "type": "string" + }, + "finish_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "created_from": { + "type": ["string", "null"] + }, + "duration": { + "type": "integer" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/phases.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/phases.json new file mode 100644 index 0000000000000..476437ea6774b --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/phases.json @@ -0,0 +1,27 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "name": { + "type": ["string", "null"] + }, + "order": { + "type": "integer" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/project_attachments.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/project_attachments.json new file mode 100644 index 0000000000000..7d90c8628a27d --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/project_attachments.json @@ -0,0 +1,42 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "project": { + "type": "string" + }, + "url": { + "type": "string" + }, + "label": { + "type": ["string", "null"] + }, + "created_by": { + "type": "string" + }, + "type": { + "type": "string" + }, + "is_sow": { + "type": "boolean" + }, + "original_filename": { + "type": ["string", "null"] + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "datetime" + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/projects.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/projects.json new file mode 100644 index 0000000000000..e52493d79752b --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/projects.json @@ -0,0 +1,137 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "_self": { + "type": "string" + }, + "annual_contract_value": { + "type": "string" + }, + "attachment_list": { + "type": "string" + }, + "client_systems": { + "type": ["string", "null"] + }, + "companies": { + "type": "array", + "items": { + "type": "string" + } + }, + "completed_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "cost": { + "type": ["integer", "null"] + }, + "created": { + "type": "string", + "format": "date-time" + }, + "created_from": { + "type": ["string", "null"] + }, + "created_from_template": { + "type": ["string", "null"] + }, + "creator": { + "type": ["string", "null"] + }, + "deadline_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "estimated_duration": { + "type": ["integer", "null"] + }, + "id": { + "type": "integer" + }, + "implementation_budget": { + "type": "string" + }, + "milestone_list": { + "type": "string" + }, + "modified": { + "type": ["string", "null"], + "format": "date-time" + }, + "phase": { + "type": ["object", "null"], + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "name": { + "type": ["string", "null"] + }, + "order": { + "type": "integer" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + }, + "risk_profiles": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "risk_score": { + "type": "number" + }, + "level": { + "type": "string" + }, + "variance": { + "type": "integer" + }, + "formula": { + "type": "string" + }, + "projected_golive_datetime": { + "type": ["string", "null"], + "format": "date-time" + } + } + } + }, + "start_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "started_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "status": { + "type": "string" + }, + "task_list": { + "type": "string" + }, + "time_entry_list": { + "type": "string" + }, + "title": { + "type": "string" + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/task_attachments.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/task_attachments.json new file mode 100644 index 0000000000000..90da99ec9d51f --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/task_attachments.json @@ -0,0 +1,49 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "task": { + "type": "string" + }, + "url": { + "type": "string" + }, + "type": { + "type": "string" + }, + "label": { + "type": ["string", "null"] + }, + "deliverable": { + "type": "boolean" + }, + "requires_approval": { + "type": "boolean" + }, + "approved": { + "type": ["boolean", "null"] + }, + "revision_task": { + "type": ["string", "null"] + }, + "original_filename": { + "type": ["string", "null"] + }, + "created_by": { + "type": ["string", "null"] + }, + "created": { + "type": "string" + }, + "modified": { + "type": "string" + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/tasks.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/tasks.json new file mode 100644 index 0000000000000..980c82d5f5ced --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/tasks.json @@ -0,0 +1,116 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "title": { + "type": "string" + }, + "description": { + "type": ["string", "null"] + }, + "project": { + "type": "string" + }, + "status": { + "type": "string" + }, + "dependency": { + "type": ["string", "null"] + }, + "start_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "due_datetime": { + "type": "string", + "format": "date-time" + }, + "started_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "finished_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "started_overridden_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "finished_overridden_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "estimated_duration": { + "type": ["integer", "null"] + }, + "milestone": { + "type": "string" + }, + "created_by": { + "type": ["string", "null"] + }, + "assigned_to": { + "type": ["string", "null"] + }, + "created_from": { + "type": ["string", "null"] + }, + "risk_profiles": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "risk_level": { + "type": "string" + }, + "formula": { + "type": "string" + }, + "over_run": { + "type": "integer" + }, + "task_variance": { + "type": "integer" + }, + "cool_down": { + "type": "integer" + }, + "reason": { + "type": "integer" + }, + "duration": { + "type": "integer" + }, + "estimated_duration": { + "type": "integer" + } + } + } + }, + "time_entry_list": { + "type": "string" + }, + "attachment_list": { + "type": "string" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/templates.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/templates.json new file mode 100644 index 0000000000000..e2d02743d883a --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/templates.json @@ -0,0 +1,84 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "title": { + "type": "string" + }, + "status": { + "type": "string" + }, + "cost": { + "type": ["string", "null"] + }, + "annual_contract_value": { + "type": ["string", "null"] + }, + "implementation_budget": { + "type": ["string", "null"] + }, + "estimated_duration": { + "type": ["integer", "null"] + }, + "created_from_template": { + "type": ["string", "null"] + }, + "created_from": { + "type": ["string", "null"] + }, + "start_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "started_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "deadline_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "completed_datetime": { + "type": ["string", "null"], + "format": "date-time" + }, + "client_systems": { + "type": ["string", "null"] + }, + "phase": { + "type": ["object", "null"], + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "order": { + "type": "integer" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } + }, + "creator": { + "type": ["string", "null"] + }, + "task_list": { + "type": "string" + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/time_entries.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/time_entries.json new file mode 100644 index 0000000000000..7ea2f5553be25 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/time_entries.json @@ -0,0 +1,60 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "user": { + "type": "string" + }, + "created_by": { + "type": "string" + }, + "project": { + "type": "string" + }, + "task": { + "type": ["string", "null"] + }, + "rate": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "hourly_rate": { + "type": "string" + } + } + }, + "started_at": { + "type": ["string", "null"], + "format": "date-time" + }, + "ended_at": { + "type": ["string", "null"], + "format": "date-time" + }, + "reference_date": { + "type": "string", + "format": "date-time" + }, + "billable": { + "type": "boolean" + }, + "calculated_duration": { + "type": ["integer", "null"] + }, + "inputted_duration": { + "type": ["integer", "null"] + }, + "notes": { + "type": "string" + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/users.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/users.json new file mode 100644 index 0000000000000..3e6d3d73a6037 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/schemas/users.json @@ -0,0 +1,49 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "id": { + "type": "integer" + }, + "_self": { + "type": "string" + }, + "first_name": { + "type": "string" + }, + "last_name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "account_type": { + "type": "string" + }, + "job_title": { + "type": "string" + }, + "company": { + "type": "string" + }, + "avatar_url": { + "type": ["string", "null"] + }, + "created_by": { + "type": ["string", "null"] + }, + "signed_up_at": { + "type": ["string", "null"], + "format": "date-time" + }, + "created": { + "type": "string", + "format": "date-time" + }, + "modified": { + "type": "string", + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/source.py b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/source.py new file mode 100644 index 0000000000000..1c6edc6479c45 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/source.py @@ -0,0 +1,59 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from typing import Any, List, Mapping, Tuple + +import requests +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream + +from .streams import ( + Activity, + Companies, + Milestones, + Phases, + ProjectAttachments, + Projects, + TaskAttachments, + Tasks, + Templates, + TimeEntries, + Users, +) + +STREAMS = [Activity, Companies, Milestones, Projects, Phases, ProjectAttachments, Tasks, TaskAttachments, Templates, TimeEntries, Users] + + +# Source +class SourceHellobaton(AbstractSource): + def check_connection(self, logger: AirbyteLogger, config: Mapping[str, any]) -> Tuple[bool, any]: + """ + :param config: the user-input config object conforming to the connector's spec.json + :param logger: logger object + :return Tuple[bool, any]: (True, None) if the input config can be used to connect to the API successfully, (False, error) otherwise. + """ + url_template = "https://{company}.hellobaton.com/api/" + try: + params = { + "api_key": config["api_key"], + } + base_url = url_template.format(company=config["company"]) + # This is just going to return a mapping of available endpoints + response = requests.get(base_url, params=params) + status_code = response.status_code + logger.info(f"Status code: {status_code}") + if status_code == 200: + return True, None + + except Exception as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + """ + + :param config: A Mapping of the user input configuration as defined in the connector spec. + """ + return [stream(company=config["company"], api_key=config["api_key"]) for stream in STREAMS] diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/spec.json b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/spec.json new file mode 100644 index 0000000000000..50793f64fae2e --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/spec.json @@ -0,0 +1,22 @@ +{ + "documentationUrl": "https://docsurl.com", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Hellobaton Spec", + "type": "object", + "required": ["api_key", "company"], + "additionalProperties": false, + "properties": { + "api_key": { + "type": "string", + "description": "authentication key required to access the api endpoints", + "airbyte_secret": true + }, + "company": { + "type": "string", + "description": "Company name that generates your base api url", + "examples": ["google", "facebook", "microsoft"] + } + } + } +} diff --git a/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/streams.py b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/streams.py new file mode 100644 index 0000000000000..9b36eab622041 --- /dev/null +++ b/airbyte-integrations/connectors/source-hellobaton/source_hellobaton/streams.py @@ -0,0 +1,204 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from typing import Any, Iterable, Mapping, MutableMapping, Optional +from urllib.parse import parse_qs, urlparse + +import requests +from airbyte_cdk.sources.streams.http import HttpStream + + +# Basic full refresh stream +class HellobatonStream(HttpStream, ABC): + """ + This class represents a stream output by the connector. + This is an abstract base class meant to contain all the common functionality at the API level e.g: the API base URL, pagination strategy, + parsing responses etc.. + """ + + page_size: int = 100 + primary_key: str = "id" + + def __init__(self, company: str, api_key: str, **kwargs): + super().__init__(**kwargs) + self.api_key = api_key + self.company = company + + @property + def url_base(self) -> str: + """ + Using this method instead of class init to dynamically generate base url based on config + """ + company = self.company + return f"https://{company}.hellobaton.com/api/" + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + """ + Logic to generate next page token based on the response + """ + + payload = response.json() + result_count = payload["count"] + + if result_count > self.page_size: + query_string = urlparse(payload["next"]).query + next_page_token = parse_qs(query_string).get("page", None) + + else: + next_page_token = None + + return next_page_token + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + """ + API request params which expect an api key for auth and any pagination is done using defined in the next_page_token method + """ + + params = {"api_key": self.api_key, "page_size": self.page_size, "page": next_page_token} + + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + """ + May want to add logic here to unpack foreign keys from urls but tbd + For now each response record is accessed through the results key in the JSON payload + """ + for results in response.json()["results"]: + yield results + + +class Activity(HellobatonStream): + """ + Activity stream class + """ + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "activity" + + +class Companies(HellobatonStream): + """ + Companies stream class + """ + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "companies" + + +class Milestones(HellobatonStream): + """ + Milestones stream class + """ + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "milestones" + + +class Phases(HellobatonStream): + """ + Phases stream class + """ + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "phases" + + +class ProjectAttachments(HellobatonStream): + """ + Project attachments stream class + """ + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "project_attachments" + + +class Projects(HellobatonStream): + """ + Projects stream class + """ + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "projects" + + +class Tasks(HellobatonStream): + """ + Tasks stream class + """ + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "tasks" + + +class TaskAttachments(HellobatonStream): + """ + Task attachments stream class + """ + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "task_attachments" + + +class Templates(HellobatonStream): + """ + Templates stream class + """ + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "templates" + + +class TimeEntries(HellobatonStream): + """ + Time entries stream class + """ + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "time_entries" + + +class Users(HellobatonStream): + """ + Users stream class + """ + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "users" diff --git a/docs/integrations/sources/hellobaton.md b/docs/integrations/sources/hellobaton.md new file mode 100644 index 0000000000000..16d8d401d884f --- /dev/null +++ b/docs/integrations/sources/hellobaton.md @@ -0,0 +1,50 @@ +# Baton + +## Sync overview + +This source can sync data from the [baton API](https://app.hellobaton.com/api/redoc/). At present this connector only supports full refresh syncs meaning that each time you use the connector it will sync all available records from scratch. Please use cautiously if you expect your API to have a lot of records. + +## This Source Supports the Following Streams + +* activity +* companies +* milestones +* phases +* project_attachments +* projects +* task_attachemnts +* tasks +* templates +* time_entries +* users + +Baton adds new streams fairly regularly please submit an issue or PR if this project doesn't support required streams for your use case. + +### Data type mapping + +| Integration Type | Airbyte Type | Notes | +| :--- | :--- | :--- | +| `string` | `string` | | +| `integer` | `integer` | | +| `number` | `number` | | +| `array` | `array` | | +| `object` | `object` | | + +### Features + +| Feature | Supported?\(Yes/No\) | Notes | +| :--- | :--- | :--- | +| Full Refresh Sync | Yes | | +| Incremental Sync | No | | +| Namespaces | No | | + +### Performance considerations + +The connector is rate limited at 1000 requests per minute per api key. If you find yourself receiving errors contact your customer success manager and request a rate limit increase. + +## Getting started + +### Requirements + +* Baton account +* Baton api key \ No newline at end of file From 103c224eccc47685cece315fde8ce890108e9144 Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Fri, 14 Jan 2022 16:33:18 -0800 Subject: [PATCH 131/215] Add template for updating source connector type (#9430) --- .../ISSUE_TEMPLATE/source-connector-type.md | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/source-connector-type.md diff --git a/.github/ISSUE_TEMPLATE/source-connector-type.md b/.github/ISSUE_TEMPLATE/source-connector-type.md new file mode 100644 index 0000000000000..e3345ea03b476 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/source-connector-type.md @@ -0,0 +1,26 @@ +--- + +name: Source Connector Type +about: Add a new type or update an existing type in source connector +title: '[EPIC] Add new type / update in source connector ' +labels: area/connectors, needs-triage +assignees: '' + +--- + +## Summary +(Choose one of the two below.) +- [ ] Support new type +- [ ] Update existing type + +## TODOs +(Complete the TODOs based on the instruction, and convert each bullet point with the `[Issue]` tag into an issue.) +- [ ] [Issue] Add a new destination acceptance test (DAT) test case for this type. +- List every destination below, either update the destination to suppor this type, or override its DAT to bypass the new test case. + - [ ] Example: [Issue] support in destination bigquery +- [ ] [Issue] Make sure every destination can pass the new DAT test case. + - Even if a destination does not need to support this type, its DAT should not break. +- List every source that should support this type below. + - [ ] Example: [Issue] support in source github + +## Desired Timeline From 64f127f33cd692a30bed910ad13f52587cfe2021 Mon Sep 17 00:00:00 2001 From: Marcos Marx Date: Sat, 15 Jan 2022 16:24:38 -0300 Subject: [PATCH 132/215] Destination Snowflake: correct files and format (#9529) --- .../destination/s3/S3DestinationConfig.java | 14 +++++++------- .../snowflake/SnowflakeDestinationTest.java | 9 --------- .../src/test/resources/insert_config.json | 2 +- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java index 79ea47bee1853..626aa9eb63869 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java @@ -4,11 +4,11 @@ package io.airbyte.integrations.destination.s3; -import com.amazonaws.auth.InstanceProfileCredentialsProvider; import com.amazonaws.ClientConfiguration; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSStaticCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.InstanceProfileCredentialsProvider; import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; @@ -129,17 +129,17 @@ public S3FormatConfig getFormatConfig() { public AmazonS3 getS3Client() { final AWSCredentials awsCreds = new BasicAWSCredentials(accessKeyId, secretAccessKey); - if (accessKeyId.isEmpty() && !secretAccessKey.isEmpty() - || !accessKeyId.isEmpty() && secretAccessKey.isEmpty()) { + if (accessKeyId.isEmpty() && !secretAccessKey.isEmpty() + || !accessKeyId.isEmpty() && secretAccessKey.isEmpty()) { throw new RuntimeException("Either both accessKeyId and secretAccessKey should be provided, or neither"); } if (accessKeyId.isEmpty() && secretAccessKey.isEmpty()) { return AmazonS3ClientBuilder.standard() - .withCredentials(new InstanceProfileCredentialsProvider(false)) - .build(); - } - + .withCredentials(new InstanceProfileCredentialsProvider(false)) + .build(); + } + else if (endpoint == null || endpoint.isEmpty()) { return AmazonS3ClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(awsCreds)) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java index 8125613834c00..74cb73490d5ba 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java @@ -11,14 +11,12 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableMap; -import io.airbyte.commons.io.IOs; import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.base.AirbyteMessageConsumer; import io.airbyte.integrations.base.Destination; -<<<<<<< HEAD import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.CatalogHelpers; @@ -26,13 +24,6 @@ import io.airbyte.protocol.models.DestinationSyncMode; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaPrimitive; -======= -import io.airbyte.protocol.models.*; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Test; - ->>>>>>> master -import java.nio.file.Path; import java.sql.SQLException; import java.time.Instant; import java.util.List; diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/resources/insert_config.json b/airbyte-integrations/connectors/destination-snowflake/src/test/resources/insert_config.json index 44d984364cb48..6f5ec2e7c8a80 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test/resources/insert_config.json +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/resources/insert_config.json @@ -6,4 +6,4 @@ "schema": "AIRBYTE_SCHEMA", "username": "AIRBYTE_INTEGRATION_TEST", "password": "testPass" -} \ No newline at end of file +} From 2bad2cf3cf119e21d017477bbc609e890451519e Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Sat, 15 Jan 2022 20:38:56 -0800 Subject: [PATCH 133/215] Use new github action runner ami with 100 gb disk (#9450) * Use new ami with 100 gb for action runner * Revert slash command ami Co-authored-by: Serhii Chvaliuk --- .github/actions/start-aws-runner/action.yml | 3 ++- .github/workflows/gradle.yml | 1 + .github/workflows/publish-command.yml | 1 + .github/workflows/test-command.yml | 1 + .github/workflows/test-performance-command.yml | 1 + 5 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/actions/start-aws-runner/action.yml b/.github/actions/start-aws-runner/action.yml index ae3ba61873e50..635bb7f5cbc01 100644 --- a/.github/actions/start-aws-runner/action.yml +++ b/.github/actions/start-aws-runner/action.yml @@ -8,7 +8,8 @@ inputs: github-token: required: true ec2-image-id: - default: "ami-04bd6e81239f4f3fb" + # github-self-hosted-runner-ubuntu-20-100g-disk + default: "ami-0ccd67e0abd945eec" required: true ec2-instance-type: default: "c5.2xlarge" diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index adc1ad47b14d9..c5c53c331a1b9 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -482,6 +482,7 @@ jobs: id: start-ec2-runner uses: ./.github/actions/start-aws-runner with: + # github-self-hosted-runner-ubuntu-20-with-120gdisk-docker-20.10.7 ec2-image-id: ami-0d4083c04fde515c4 aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/publish-command.yml b/.github/workflows/publish-command.yml index f73a4dacd4161..2770eaa8b35f7 100644 --- a/.github/workflows/publish-command.yml +++ b/.github/workflows/publish-command.yml @@ -34,6 +34,7 @@ jobs: aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} github-token: ${{ secrets.SELF_RUNNER_GITHUB_ACCESS_TOKEN }} + # 80 gb disk ec2-image-id: ami-0d648081937c75a73 publish-image: needs: start-publish-image-runner diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index 8c747eaf76451..363e72851248d 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -33,6 +33,7 @@ jobs: aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} github-token: ${{ secrets.SELF_RUNNER_GITHUB_ACCESS_TOKEN }} + # 80 gb disk ec2-image-id: ami-0d648081937c75a73 integration-test: timeout-minutes: 240 diff --git a/.github/workflows/test-performance-command.yml b/.github/workflows/test-performance-command.yml index 1cf83a0b24a64..a3e0dfbce070c 100644 --- a/.github/workflows/test-performance-command.yml +++ b/.github/workflows/test-performance-command.yml @@ -39,6 +39,7 @@ jobs: aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} github-token: ${{ secrets.SELF_RUNNER_GITHUB_ACCESS_TOKEN }} + # 80 gb disk ec2-image-id: ami-0d648081937c75a73 performance-test: timeout-minutes: 240 From d65b8f986b29e7ca9232eebc29495f34479a9948 Mon Sep 17 00:00:00 2001 From: Augustin Date: Mon, 17 Jan 2022 09:27:14 +0100 Subject: [PATCH 134/215] =?UTF-8?q?=F0=9F=90=99=20octavia-cli:=20generate?= =?UTF-8?q?=20open=20api=20client=20(#9277)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 12 +++---- build.gradle | 1 + octavia-cli/.dockerignore | 1 + octavia-cli/README.md | 7 ++-- octavia-cli/build.gradle | 16 +++++++++ octavia-cli/octavia_cli/entrypoint.py | 43 +++++++++++++++-------- octavia-cli/setup.py | 5 +-- octavia-cli/unit_tests/test_entrypoint.py | 28 +++++++++++++-- tools/python/.flake8 | 1 + 9 files changed, 86 insertions(+), 28 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5bf2849642590..e62b52145420f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,19 +7,16 @@ repos: hooks: - id: licenseheaders args: ["--tmpl=LICENSE_SHORT", "--ext=py", "-f"] - - repo: https://github.com/ambv/black rev: 21.11b1 hooks: - id: black - - repo: https://github.com/timothycrosley/isort rev: 5.10.1 hooks: - id: isort args: ["--dont-follow-links", "--jobs=-1"] additional_dependencies: ["colorama"] - - repo: https://github.com/pre-commit/mirrors-prettier rev: v2.5.0 hooks: @@ -29,7 +26,8 @@ repos: (?x)^.*( .github/| source_specs.yaml| - destination_specs.yaml + destination_specs.yaml| + .gitlab-ci.yml ).?$ - repo: https://github.com/csachs/pyproject-flake8 @@ -38,12 +36,14 @@ repos: - id: pyproject-flake8 additional_dependencies: ["mccabe"] alias: flake8 - - repo: https://github.com/pre-commit/mirrors-mypy rev: v0.910-1 hooks: - id: mypy - + exclude: | + (?x)^.*( + octavia-cli/unit_tests/| + ).?$ - repo: local hooks: - id: spec-linter diff --git a/build.gradle b/build.gradle index ad8535bbc29f6..0c86b69da1881 100644 --- a/build.gradle +++ b/build.gradle @@ -340,6 +340,7 @@ subprojects { source = fileTree(dir: projectDir) .include("**/*.py") .exclude(".venv/**/*.py") + .exclude("**/airbyte_api_client/**/*.py") .exclude("**/__init__.py") strictCheck = true } diff --git a/octavia-cli/.dockerignore b/octavia-cli/.dockerignore index 89b498ce9934e..1a4780ba9ba3d 100644 --- a/octavia-cli/.dockerignore +++ b/octavia-cli/.dockerignore @@ -1,3 +1,4 @@ build +!build/airbyte_api_client .venv octavia_cli.egg-info diff --git a/octavia-cli/README.md b/octavia-cli/README.md index 4bcc11dac808c..3589ea1512f85 100644 --- a/octavia-cli/README.md +++ b/octavia-cli/README.md @@ -14,9 +14,9 @@ The project is under development: readers can refer to our [tech spec deck](http We encourage users to use the CLI with docker to avoid the hassle of setting up a Python installation. The project is under development: we have not yet published any docker image to our Docker registry. -1. Build the image locally: +1. Build the project locally (from the root of the repo): ```bash -docker build -t octavia-cli:dev --rm . +SUB_BUILD=OCTAVIA_CLI ./gradlew build #from the root of the repo ``` 2. Run the CLI from docker: ```bash @@ -34,10 +34,11 @@ Octavia is currently under development. You can find a detailed and updated execution plan [here](https://docs.google.com/spreadsheets/d/1weB9nf0Zx3IR_QvpkxtjBAzyfGb7B0PWpsVt6iMB5Us/edit#gid=0). We welcome community contributions! -Summary of achievements: +**Summary of achievements**: | Date | Milestone | |------------|-------------------------------------| +| 2022-01-06 | Generate an API Python client from our Open API spec | | 2021-12-22 | Bootstrapping the project's code base | # Developing locally diff --git a/octavia-cli/build.gradle b/octavia-cli/build.gradle index 5ebd064d689b0..ea29a91ffa369 100644 --- a/octavia-cli/build.gradle +++ b/octavia-cli/build.gradle @@ -1,4 +1,7 @@ +import org.openapitools.generator.gradle.plugin.tasks.GenerateTask + plugins { + id "org.openapi.generator" version "5.3.1" id 'airbyte-python' id 'airbyte-docker' } @@ -7,3 +10,16 @@ airbytePython { moduleDirectory 'octavia_cli' } + +task generateApiClient(type: GenerateTask) { + inputSpec = "$rootDir.absolutePath/airbyte-api/src/main/openapi/config.yaml" + outputDir = "$buildDir/airbyte_api_client" + + generatorName = "python" + packageName = "airbyte_api_client" +} + +blackFormat.dependsOn generateApiClient +isortFormat.dependsOn generateApiClient +flakeCheck.dependsOn generateApiClient +installReqs.dependsOn generateApiClient diff --git a/octavia-cli/octavia_cli/entrypoint.py b/octavia-cli/octavia_cli/entrypoint.py index 3d82bc32f5ea3..b0de9afb55171 100644 --- a/octavia-cli/octavia_cli/entrypoint.py +++ b/octavia-cli/octavia_cli/entrypoint.py @@ -2,41 +2,56 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # +import airbyte_api_client import click +from airbyte_api_client.api import workspace_api @click.group() @click.option("--airbyte-url", envvar="AIRBYTE_URL", default="http://localhost:8000", help="The URL of your Airbyte instance.") -def octavia(airbyte_url): - # TODO: check if the airbyte_url is reachable - click.secho(f"🐙 - Octavia is targetting your Airbyte instance running at {airbyte_url}") +@click.pass_context +def octavia(ctx: click.Context, airbyte_url: str) -> None: + ctx.ensure_object(dict) + client_configuration = airbyte_api_client.Configuration(host=f"{airbyte_url}/api") + api_client = airbyte_api_client.ApiClient(client_configuration) + # TODO alafanechere workspace check might deserve its own function + api_instance = workspace_api.WorkspaceApi(api_client) + # open-api-generator consider non-required field as not nullable + # This will break validation of WorkspaceRead object for firstCompletedSync and feedbackDone fields + # This is why we bypass _check_return_type + api_response = api_instance.list_workspaces(_check_return_type=False) + # TODO alafanechere prompt user to chose a workspace if multiple workspaces exist + workspace_id = api_response.workspaces[0]["workspaceId"] + click.echo(f"🐙 - Octavia is targetting your Airbyte instance running at {airbyte_url} on workspace {workspace_id}") + ctx.obj["API_CLIENT"] = api_client + ctx.obj["WORKSPACE_ID"] = workspace_id @octavia.command(help="Scaffolds a local project directories.") -def init(): +def init() -> None: raise click.ClickException("The init command is not yet implemented.") @octavia.command(name="list", help="List existing resources on the Airbyte instance.") -def _list(): - raise click.ClickException("The init command is not yet implemented.") +def _list() -> None: + raise click.ClickException("The list command is not yet implemented.") @octavia.command(name="import", help="Import an existing resources from the Airbyte instance.") -def _import(): - raise click.ClickException("The init command is not yet implemented.") +def _import() -> None: + raise click.ClickException("The import command is not yet implemented.") @octavia.command(help="Generate a YAML configuration file to manage a resource.") -def create(): - raise click.ClickException("The init command is not yet implemented.") +def create() -> None: + raise click.ClickException("The create command is not yet implemented.") @octavia.command(help="Create or update resources according to YAML configurations.") -def apply(): - raise click.ClickException("The init command is not yet implemented.") +def apply() -> None: + raise click.ClickException("The apply command is not yet implemented.") @octavia.command(help="Delete resources") -def delete(): - raise click.ClickException("The init command is not yet implemented.") +def delete() -> None: + raise click.ClickException("The delete command is not yet implemented.") diff --git a/octavia-cli/setup.py b/octavia-cli/setup.py index e5ad552eb3d9d..b7056db32bfbc 100644 --- a/octavia-cli/setup.py +++ b/octavia-cli/setup.py @@ -2,6 +2,7 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # +import os import pathlib from setuptools import find_packages, setup @@ -39,8 +40,8 @@ "Source": "https://github.com/airbytehq/airbyte", "Tracker": "https://github.com/airbytehq/airbyte/issues", }, - packages=find_packages(exclude=("tests", "docs")), - install_requires=["click~=8.0.3"], + packages=find_packages(exclude=("unit_tests", "docs")), + install_requires=["click~=8.0.3", f"airbyte_api_client @ file://{os.getcwd()}/build/airbyte_api_client"], python_requires=">=3.8.12", extras_require={ "dev": ["MyPy~=0.812", "pytest~=6.2.5", "pytest-cov", "pytest-mock", "requests-mock", "pre-commit"], diff --git a/octavia-cli/unit_tests/test_entrypoint.py b/octavia-cli/unit_tests/test_entrypoint.py index c8effc674c65f..50d5ad68af118 100644 --- a/octavia-cli/unit_tests/test_entrypoint.py +++ b/octavia-cli/unit_tests/test_entrypoint.py @@ -2,16 +2,38 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # +from unittest import mock + +import click import pytest from click.testing import CliRunner from octavia_cli import entrypoint -def test_octavia(): +@click.command() +@click.pass_context +def dumb(ctx): + pass + + +def test_octavia(mocker): + mocker.patch.object(entrypoint, "workspace_api") + mocker.patch.object(entrypoint, "airbyte_api_client") + + context_object = {} + mock_api_instance = entrypoint.workspace_api.WorkspaceApi.return_value + mock_api_instance.list_workspaces.return_value = mock.MagicMock(workspaces=[{"workspaceId": "expected_workspace_id"}]) + + entrypoint.octavia.add_command(dumb) runner = CliRunner() - result = runner.invoke(entrypoint.octavia) + result = runner.invoke(entrypoint.octavia, ["--airbyte-url", "test-airbyte-url", "dumb"], obj=context_object) + entrypoint.airbyte_api_client.Configuration.assert_called_with(host="test-airbyte-url/api") + entrypoint.airbyte_api_client.ApiClient.assert_called_with(entrypoint.airbyte_api_client.Configuration.return_value) + entrypoint.workspace_api.WorkspaceApi.assert_called_with(entrypoint.airbyte_api_client.ApiClient.return_value) + mock_api_instance.list_workspaces.assert_called_once() + assert context_object["API_CLIENT"] == entrypoint.airbyte_api_client.ApiClient.return_value + assert context_object["WORKSPACE_ID"] == "expected_workspace_id" assert result.exit_code == 0 - assert result.output.startswith("Usage: octavia [OPTIONS] COMMAND [ARGS]...") @pytest.mark.parametrize( diff --git a/tools/python/.flake8 b/tools/python/.flake8 index a270aeef08638..b07e01f847baa 100644 --- a/tools/python/.flake8 +++ b/tools/python/.flake8 @@ -5,6 +5,7 @@ exclude = .eggs # python libraries" .tox build + airbyte_api_client # generated api client in octavia-cli extend-ignore = E203, # whitespace before ':' (conflicts with Black) E231, # Bad trailing comma (conflicts with Black) From d09829b21c00ff7ba75758052f4c296af4a9cf89 Mon Sep 17 00:00:00 2001 From: Anna Lvova <37615075+annalvova05@users.noreply.github.com> Date: Mon, 17 Jan 2022 11:20:58 +0100 Subject: [PATCH 135/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20Mailchimp:=20su?= =?UTF-8?q?pport=20oauth=20flow=20(#7159)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add mailchimp oauth support * add PR * fix creds * upd spec * format * upd creds * upd auth for different creds * rename creds * rename creds * change ref in campaigns.json * upd timeout_seconds * merge * add oauth java part * add java test * bump version * update spec * add anotation * upd spec * upd spec * upd * upd tests * format * upd * upd * add state * add invalid_config * bump version * format --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 108 ++++++++++++++--- .../connectors/source-mailchimp/Dockerfile | 2 +- .../acceptance-test-config.yml | 27 +++++ .../integration_tests/integration_test.py | 7 -- .../invalid_config_apikey.json | 6 + .../invalid_config_oauth.json | 8 ++ .../connectors/source-mailchimp/setup.py | 1 - .../source_mailchimp/source.py | 55 ++++++--- .../source_mailchimp/spec.json | 112 ++++++++++++++++-- .../oauth/OAuthImplementationFactory.java | 1 + .../oauth/flows/MailchimpOAuthFlow.java | 89 ++++++++++++++ .../oauth/flows/MailchimpOAuthFlowTest.java | 44 +++++++ docs/integrations/sources/mailchimp.md | 11 ++ 14 files changed, 422 insertions(+), 51 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-mailchimp/integration_tests/integration_test.py create mode 100644 airbyte-integrations/connectors/source-mailchimp/integration_tests/invalid_config_apikey.json create mode 100644 airbyte-integrations/connectors/source-mailchimp/integration_tests/invalid_config_oauth.json create mode 100644 airbyte-oauth/src/main/java/io/airbyte/oauth/flows/MailchimpOAuthFlow.java create mode 100644 airbyte-oauth/src/test/java/io/airbyte/oauth/flows/MailchimpOAuthFlowTest.java diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 7df59d480d51e..452238a59e0af 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -383,7 +383,7 @@ - name: Mailchimp sourceDefinitionId: b03a9f3e-22a5-11eb-adc1-0242ac120002 dockerRepository: airbyte/source-mailchimp - dockerImageTag: 0.2.10 + dockerImageTag: 0.2.11 documentationUrl: https://docs.airbyte.io/integrations/sources/mailchimp icon: mailchimp.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 4909346bbea47..294e4080a640d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3764,31 +3764,109 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-mailchimp:0.2.10" +- dockerImage: "airbyte/source-mailchimp:0.2.11" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/mailchimp" connectionSpecification: $schema: "http://json-schema.org/draft-07/schema#" title: "Mailchimp Spec" type: "object" - required: - - "username" - - "apikey" - additionalProperties: false + required: [] + additionalProperties: true properties: - username: - type: "string" - title: "Username" - description: "The Username or email you use to sign into Mailchimp." - apikey: - type: "string" - airbyte_secret: true - title: "API Key" - description: "Mailchimp API Key. See the docs for information on how to generate this key." + credentials: + type: "object" + title: "Authentication Method" + oneOf: + - title: "OAuth2.0" + type: "object" + required: + - "auth_type" + - "access_token" + properties: + auth_type: + type: "string" + const: "oauth2.0" + enum: + - "oauth2.0" + default: "oauth2.0" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your OAuth application." + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The Client Secret of your OAuth application." + airbyte_secret: true + access_token: + title: "Access Token" + type: "string" + description: "An access token generated using the above client ID\ + \ and secret." + airbyte_secret: true + - type: "object" + title: "API Key" + required: + - "auth_type" + - "apikey" + properties: + auth_type: + type: "string" + const: "apikey" + enum: + - "apikey" + default: "apikey" + order: 1 + apikey: + type: "string" + title: "API Key" + description: "Mailchimp API Key. See the docs for information on how to generate this key." + airbyte_secret: true supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] + advanced_auth: + auth_flow_type: "oauth2.0" + predicate_key: + - "credentials" + - "auth_type" + predicate_value: "oauth2.0" + oauth_config_specification: + complete_oauth_output_specification: + type: "object" + additionalProperties: false + properties: + access_token: + type: "string" + path_in_connector_config: + - "credentials" + - "access_token" + complete_oauth_server_input_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + client_secret: + type: "string" + complete_oauth_server_output_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + path_in_connector_config: + - "credentials" + - "client_id" + client_secret: + type: "string" + path_in_connector_config: + - "credentials" + - "client_secret" - dockerImage: "airbyte/source-mailgun:0.1.0" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/mailgun" diff --git a/airbyte-integrations/connectors/source-mailchimp/Dockerfile b/airbyte-integrations/connectors/source-mailchimp/Dockerfile index 3968856ba0bb9..5ec742f2978b1 100644 --- a/airbyte-integrations/connectors/source-mailchimp/Dockerfile +++ b/airbyte-integrations/connectors/source-mailchimp/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.10 +LABEL io.airbyte.version=0.2.11 LABEL io.airbyte.name=airbyte/source-mailchimp diff --git a/airbyte-integrations/connectors/source-mailchimp/acceptance-test-config.yml b/airbyte-integrations/connectors/source-mailchimp/acceptance-test-config.yml index 160e8caa190f6..76b211bf8aa38 100644 --- a/airbyte-integrations/connectors/source-mailchimp/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-mailchimp/acceptance-test-config.yml @@ -2,16 +2,41 @@ connector_image: airbyte/source-mailchimp:dev tests: spec: - spec_path: "source_mailchimp/spec.json" + timeout_seconds: 60 connection: + # for old spec config (without oneOf) - config_path: "secrets/config.json" status: "succeed" + timeout_seconds: 180 + # for auth with API token + - config_path: "secrets/config_apikey.json" + status: "succeed" + timeout_seconds: 180 + # for auth with oauth2 token + - config_path: "secrets/config_oauth.json" + status: "succeed" + timeout_seconds: 180 - config_path: "integration_tests/invalid_config.json" status: "failed" + timeout_seconds: 180 + - config_path: "integration_tests/invalid_config_apikey.json" + status: "failed" + timeout_seconds: 180 + - config_path: "integration_tests/invalid_config_oauth.json" + status: "failed" + timeout_seconds: 180 discovery: + # for old spec config (without oneOf) - config_path: "secrets/config.json" + # for auth with API token + - config_path: "secrets/config_apikey.json" + # for auth with oauth2 token + - config_path: "secrets/config_oauth.json" basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + - config_path: "secrets/config_oauth.json" + configured_catalog_path: "integration_tests/configured_catalog.json" # THIS TEST IS COMMENTED OUT. Tests are supposed to accept # `state = {cursor_field: value}`. When we have dependent endpoint path # `path_begin/{some_id}/path_end` we need a complex state like below: @@ -30,3 +55,5 @@ tests: full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + - config_path: "secrets/config_oauth.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-mailchimp/integration_tests/integration_test.py b/airbyte-integrations/connectors/source-mailchimp/integration_tests/integration_test.py deleted file mode 100644 index e1814314fc3b0..0000000000000 --- a/airbyte-integrations/connectors/source-mailchimp/integration_tests/integration_test.py +++ /dev/null @@ -1,7 +0,0 @@ -# -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. -# - - -def test_example_method(): - assert True diff --git a/airbyte-integrations/connectors/source-mailchimp/integration_tests/invalid_config_apikey.json b/airbyte-integrations/connectors/source-mailchimp/integration_tests/invalid_config_apikey.json new file mode 100644 index 0000000000000..f2fd16517bba4 --- /dev/null +++ b/airbyte-integrations/connectors/source-mailchimp/integration_tests/invalid_config_apikey.json @@ -0,0 +1,6 @@ +{ + "credentials": { + "auth_type": "apikey", + "apikey": "api-key-awesome" + } +} diff --git a/airbyte-integrations/connectors/source-mailchimp/integration_tests/invalid_config_oauth.json b/airbyte-integrations/connectors/source-mailchimp/integration_tests/invalid_config_oauth.json new file mode 100644 index 0000000000000..ef7ef97ee2413 --- /dev/null +++ b/airbyte-integrations/connectors/source-mailchimp/integration_tests/invalid_config_oauth.json @@ -0,0 +1,8 @@ +{ + "credentials": { + "auth_type": "oauth2.0", + "client_id": "client_id", + "client_secret": "client_secret", + "access_token": "access_token" + } +} diff --git a/airbyte-integrations/connectors/source-mailchimp/setup.py b/airbyte-integrations/connectors/source-mailchimp/setup.py index 7b8d35bf6ad3c..aa117a3256bd9 100644 --- a/airbyte-integrations/connectors/source-mailchimp/setup.py +++ b/airbyte-integrations/connectors/source-mailchimp/setup.py @@ -13,7 +13,6 @@ packages=find_packages(), install_requires=[ "airbyte-cdk~=0.1.35", - "mailchimp3==3.0.14", "pytest~=6.1", ], package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, diff --git a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/source.py b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/source.py index aaea7ec9c3b39..822f7d3e5d018 100644 --- a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/source.py +++ b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/source.py @@ -6,36 +6,61 @@ import base64 from typing import Any, List, Mapping, Tuple +import requests from airbyte_cdk import AirbyteLogger from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator -from mailchimp3 import MailChimp +from requests.auth import AuthBase from .streams import Campaigns, EmailActivity, Lists -class HttpBasicAuthenticator(TokenAuthenticator): - def __init__(self, auth: Tuple[str, str], auth_method: str = "Basic", **kwargs): - # API keys have the format -. - # See https://mailchimp.com/developer/marketing/docs/fundamentals/#api-structure - self.data_center = auth[1].split("-").pop() - auth_string = f"{auth[0]}:{auth[1]}".encode("utf8") - b64_encoded = base64.b64encode(auth_string).decode("utf8") - super().__init__(token=b64_encoded, auth_method=auth_method, **kwargs) +class MailChimpAuthenticator: + @staticmethod + def get_server_prefix(access_token: str) -> str: + try: + response = requests.get( + "https://login.mailchimp.com/oauth2/metadata", headers={"Authorization": "OAuth {}".format(access_token)} + ) + return response.json()["dc"] + except Exception as e: + raise Exception(f"Cannot retrieve server_prefix for you account. \n {repr(e)}") + + def get_auth(self, config: Mapping[str, Any]) -> AuthBase: + authorization = config.get("credentials", {}) + auth_type = authorization.get("auth_type") + if auth_type == "apikey" or not authorization: + # API keys have the format -. + # See https://mailchimp.com/developer/marketing/docs/fundamentals/#api-structure + apikey = authorization.get("apikey") or config.get("apikey") + if not apikey: + raise Exception("No apikey in creds") + auth_string = f"anystring:{apikey}".encode("utf8") + b64_encoded = base64.b64encode(auth_string).decode("utf8") + auth = TokenAuthenticator(token=b64_encoded, auth_method="Basic") + auth.data_center = apikey.split("-").pop() + + elif auth_type == "oauth2.0": + access_token = authorization["access_token"] + auth = TokenAuthenticator(token=access_token, auth_method="Bearer") + auth.data_center = self.get_server_prefix(access_token) + + else: + raise Exception(f"Invalid auth type: {auth_type}") + + return auth class SourceMailchimp(AbstractSource): def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Any]: try: - client = MailChimp(mc_api=config["apikey"], mc_user=config["username"]) - client.ping.get() + authenticator = MailChimpAuthenticator().get_auth(config) + requests.get(f"https://{authenticator.data_center}.api.mailchimp.com/3.0/ping", headers=authenticator.get_auth_header()) return True, None except Exception as e: return False, repr(e) def streams(self, config: Mapping[str, Any]) -> List[Stream]: - authenticator = HttpBasicAuthenticator(auth=("anystring", config["apikey"])) - streams_ = [Lists(authenticator=authenticator), Campaigns(authenticator=authenticator), EmailActivity(authenticator=authenticator)] - - return streams_ + authenticator = MailChimpAuthenticator().get_auth(config) + return [Lists(authenticator=authenticator), Campaigns(authenticator=authenticator), EmailActivity(authenticator=authenticator)] diff --git a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/spec.json b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/spec.json index 3aee31fff53b5..98de089a30c97 100644 --- a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/spec.json +++ b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/spec.json @@ -4,19 +4,109 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Mailchimp Spec", "type": "object", - "required": ["username", "apikey"], - "additionalProperties": false, + "required": [], + "additionalProperties": true, "properties": { - "username": { - "type": "string", - "title": "Username", - "description": "The Username or email you use to sign into Mailchimp." + "credentials": { + "type": "object", + "title": "Authentication Method", + "oneOf": [ + { + "title": "OAuth2.0", + "type": "object", + "required": ["auth_type", "access_token"], + "properties": { + "auth_type": { + "type": "string", + "const": "oauth2.0", + "enum": ["oauth2.0"], + "default": "oauth2.0", + "order": 0 + }, + "client_id": { + "title": "Client ID", + "type": "string", + "description": "The Client ID of your OAuth application.", + "airbyte_secret": true + }, + "client_secret": { + "title": "Client Secret", + "type": "string", + "description": "The Client Secret of your OAuth application.", + "airbyte_secret": true + }, + "access_token": { + "title": "Access Token", + "type": "string", + "description": "An access token generated using the above client ID and secret.", + "airbyte_secret": true + } + } + }, + { + "type": "object", + "title": "API Key", + "required": ["auth_type", "apikey"], + "properties": { + "auth_type": { + "type": "string", + "const": "apikey", + "enum": ["apikey"], + "default": "apikey", + "order": 1 + }, + "apikey": { + "type": "string", + "title": "API Key", + "description": "Mailchimp API Key. See the docs for information on how to generate this key.", + "airbyte_secret": true + } + } + } + ] + } + } + }, + "advanced_auth": { + "auth_flow_type": "oauth2.0", + "predicate_key": ["credentials", "auth_type"], + "predicate_value": "oauth2.0", + "oauth_config_specification": { + "complete_oauth_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "access_token": { + "type": "string", + "path_in_connector_config": ["credentials", "access_token"] + } + } + }, + "complete_oauth_server_input_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string" + }, + "client_secret": { + "type": "string" + } + } }, - "apikey": { - "type": "string", - "airbyte_secret": true, - "title": "API Key", - "description": "Mailchimp API Key. See the docs for information on how to generate this key." + "complete_oauth_server_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string", + "path_in_connector_config": ["credentials", "client_id"] + }, + "client_secret": { + "type": "string", + "path_in_connector_config": ["credentials", "client_secret"] + } + } } } } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java index 0894acbf4efc1..d9de82bf9eccb 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java @@ -55,6 +55,7 @@ public OAuthImplementationFactory(final ConfigRepository configRepository, final .put("airbyte/source-zendesk-chat", new ZendeskChatOAuthFlow(configRepository, httpClient)) .put("airbyte/source-monday", new MondayOAuthFlow(configRepository, httpClient)) .put("airbyte/source-zendesk-sunshine", new ZendeskSunshineOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-mailchimp", new MailchimpOAuthFlow(configRepository, httpClient)) .build(); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/MailchimpOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/MailchimpOAuthFlow.java new file mode 100644 index 0000000000000..84b87c91efb7c --- /dev/null +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/MailchimpOAuthFlow.java @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.oauth.BaseOAuth2Flow; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.http.HttpClient; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; +import java.util.function.Supplier; +import org.apache.http.client.utils.URIBuilder; + +/** + * Following docs from https://mailchimp.com/developer/marketing/guides/access-user-data-oauth-2/ + */ +public class MailchimpOAuthFlow extends BaseOAuth2Flow { + + private static final String ACCESS_TOKEN_URL = "https://login.mailchimp.com/oauth2/token"; + private static final String AUTHORIZE_URL = "https://login.mailchimp.com/oauth2/authorize"; + + public MailchimpOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient) { + super(configRepository, httpClient); + } + + @VisibleForTesting + public MailchimpOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient, final Supplier stateSupplier) { + super(configRepository, httpClient, stateSupplier); + } + + @Override + protected String formatConsentUrl(final UUID definitionId, + final String clientId, + final String redirectUrl, + final JsonNode inputOAuthConfiguration) + throws IOException { + + try { + return new URIBuilder(AUTHORIZE_URL) + .addParameter("client_id", clientId) + .addParameter("response_type", "code") + .addParameter("redirect_uri", redirectUrl) + .addParameter("state", getState()) + .build().toString(); + } catch (URISyntaxException e) { + throw new IOException("Failed to format Consent URL for OAuth flow", e); + } + } + + @Override + protected Map getAccessTokenQueryParameters(String clientId, + String clientSecret, + String authCode, + String redirectUrl) { + return ImmutableMap.builder() + // required + .put("grant_type", "authorization_code") + .put("code", authCode) + .put("client_id", clientId) + .put("client_secret", clientSecret) + .put("redirect_uri", redirectUrl) + .build(); + } + + @Override + protected String getAccessTokenUrl(final JsonNode inputOAuthConfiguration) { + return ACCESS_TOKEN_URL; + } + + @Override + protected Map extractOAuthOutput(final JsonNode data, final String accessTokenUrl) throws IOException { + final Map result = new HashMap<>(); + // getting out access_token + if (data.has("access_token")) { + result.put("access_token", data.get("access_token").asText()); + } else { + throw new IOException(String.format("Missing 'access_token' in query params from %s", accessTokenUrl)); + } + return result; + } + +} diff --git a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/MailchimpOAuthFlowTest.java b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/MailchimpOAuthFlowTest.java new file mode 100644 index 0000000000000..138f93bba448d --- /dev/null +++ b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/MailchimpOAuthFlowTest.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.oauth.BaseOAuthFlow; +import io.airbyte.oauth.MoreOAuthParameters; +import java.util.Map; + +public class MailchimpOAuthFlowTest extends BaseOAuthFlowTest { + + @Override + protected BaseOAuthFlow getOAuthFlow() { + return new MailchimpOAuthFlow(getConfigRepository(), getHttpClient(), this::getConstantState); + } + + @Override + protected String getExpectedConsentUrl() { + return "https://login.mailchimp.com/oauth2/authorize?client_id=test_client_id&response_type=code&redirect_uri=https%3A%2F%2Fairbyte.io&state=state"; + } + + @Override + protected Map getExpectedOutput() { + return Map.of( + "access_token", "access_token_response", + "client_id", MoreOAuthParameters.SECRET_MASK, + "client_secret", MoreOAuthParameters.SECRET_MASK); + } + + @Override + protected JsonNode getCompleteOAuthOutputSpecification() { + return getJsonSchema(Map.of("access_token", Map.of("type", "string"))); + } + + @Override + protected Map getExpectedFilteredOutput() { + return Map.of( + "access_token", "access_token_response", + "client_id", MoreOAuthParameters.SECRET_MASK); + } + +} diff --git a/docs/integrations/sources/mailchimp.md b/docs/integrations/sources/mailchimp.md index 26aea681ad825..de9e8e250ff2f 100644 --- a/docs/integrations/sources/mailchimp.md +++ b/docs/integrations/sources/mailchimp.md @@ -36,9 +36,15 @@ At the time of this writing, [Mailchimp does not impose rate limits](https://mai ### Requirements +For Apikey authorithation: * Mailchimp account * Mailchimp API key +For OAuth authorization: +* Mailchimp registered app +* Mailchimp client_id +* Mailchimp client_secret + ### Setup guide To start syncing Mailchimp data with Airbyte, you'll need two things: @@ -46,10 +52,15 @@ To start syncing Mailchimp data with Airbyte, you'll need two things: 1. Your Mailchimp username. Often this is just the email address or username you use to sign into Mailchimp. 2. A Mailchimp API Key. Follow the [Mailchimp documentation for generating an API key](https://mailchimp.com/help/about-api-keys/). +OR +1. Register an app in [Mailchimp](https://us2.admin.mailchimp.com/account/oauth2/). +2. Specify client_id and client_secret. + ## Changelog | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.11 | 2021-12-24| [7159](https://github.com/airbytehq/airbyte/pull/7159) | Add oauth2.0 support | | 0.2.10 | 2021-12-21 | [9000](https://github.com/airbytehq/airbyte/pull/9000) | Update connector fields title/description | | 0.2.9 | 2021-12-13 | [7975](https://github.com/airbytehq/airbyte/pull/7975) | Updated JSON schemas | | 0.2.8 | 2021-08-17 | [5481](https://github.com/airbytehq/airbyte/pull/5481) | Remove date-time type from some fields | From 25fb7e7fd744f3852ebe8152db5514513f8a2c9a Mon Sep 17 00:00:00 2001 From: Luis Gomez <781929+lgomezm@users.noreply.github.com> Date: Mon, 17 Jan 2022 05:54:31 -0500 Subject: [PATCH 136/215] Source Hubspot: Some incremental CRM objects and engagements (#8887) --- .../36c891d9-4bd9-43ac-bad2-10e12756272c.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-hubspot/Dockerfile | 2 +- .../integration_tests/abnormal_state.json | 3 + .../sample_files/configured_catalog.json | 9 +- .../sample_files/sample_state.json | 3 + .../source-hubspot/source_hubspot/api.py | 235 ++++++++++++++---- .../source-hubspot/source_hubspot/client.py | 11 +- docs/integrations/sources/hubspot.md | 2 + 10 files changed, 212 insertions(+), 59 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json index b307efc884257..28595536e48b4 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "36c891d9-4bd9-43ac-bad2-10e12756272c", "name": "HubSpot", "dockerRepository": "airbyte/source-hubspot", - "dockerImageTag": "0.1.32", + "dockerImageTag": "0.1.33", "documentationUrl": "https://docs.airbyte.io/integrations/sources/hubspot", "icon": "hubspot.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 452238a59e0af..9347a5dc1119d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -293,7 +293,7 @@ - name: HubSpot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot - dockerImageTag: 0.1.32 + dockerImageTag: 0.1.33 documentationUrl: https://docs.airbyte.io/integrations/sources/hubspot icon: hubspot.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 294e4080a640d..723f59f8fdd60 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2872,7 +2872,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-hubspot:0.1.32" +- dockerImage: "airbyte/source-hubspot:0.1.33" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index e51182b850dd4..2f786fa761296 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -34,5 +34,5 @@ COPY source_hubspot ./source_hubspot ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.32 +LABEL io.airbyte.version=0.1.33 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json index 5944b5d50c7a9..5cf26f89b23dd 100644 --- a/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json @@ -14,6 +14,9 @@ "email_events": { "timestamp": "2221-10-12T13:37:56.412000+00:00" }, + "engagements": { + "lastUpdated": 7945393076412 + }, "line_items": { "updatedAt": "2221-10-12T13:37:56.412000+00:00" }, diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json index b6157948c2968..c3cbea5bfa4e5 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json @@ -82,10 +82,13 @@ "stream": { "name": "engagements", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["lastUpdated"] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "cursor_field": ["lastUpdated"], + "destination_sync_mode": "append" }, { "stream": { diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/sample_state.json b/airbyte-integrations/connectors/source-hubspot/sample_files/sample_state.json index a652b9bb23011..a78d97590de20 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/sample_state.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/sample_state.json @@ -14,6 +14,9 @@ "email_events": { "timestamp": "2021-02-23T00:00:00Z" }, + "engagements": { + "lastUpdated": 1614038400000 + }, "line_items": { "updatedAt": "2021-02-23T00:00:00Z" }, diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py index 8dedd91d82004..4f12dbc2a9882 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py @@ -93,14 +93,14 @@ def giveup_handler(exc): ) -def retry_after_handler(**kwargs): +def retry_after_handler(fixed_retry_after=None, **kwargs): """Retry helper when we hit the call limit, sleeps for specific duration""" def sleep_on_ratelimit(_details): _, exc, _ = sys.exc_info() if isinstance(exc, HubspotRateLimited): # HubSpot API does not always return Retry-After value for 429 HTTP error - retry_after = int(exc.response.headers.get("Retry-After", 3)) + retry_after = fixed_retry_after if fixed_retry_after else int(exc.response.headers.get("Retry-After", 3)) logger.info(f"Rate limit reached. Sleeping for {retry_after} seconds") time.sleep(retry_after + 1) # extra second to cover any fractions of second @@ -216,7 +216,7 @@ def name(self) -> str: stream_name = stream_name[: -len("Stream")] return stream_name - def list(self, fields) -> Iterable: + def list_records(self, fields) -> Iterable: yield from self.read(partial(self._api.get, url=self.url)) @staticmethod @@ -309,6 +309,27 @@ def _filter_old_records(self, records: Iterable) -> Iterable: continue yield record + def _read_stream_records( + self, getter: Callable, properties_list: List[str], params: MutableMapping[str, Any] = None + ) -> Tuple[dict, Any]: + # TODO: Additional processing was added due to the fact that users receive 414 errors while syncing their streams (issues #3977 and #5835). + # We will need to fix this code when the HubSpot developers add the ability to use a special parameter to get all properties for an entity. + # According to HubSpot Community (https://community.hubspot.com/t5/APIs-Integrations/Get-all-contact-properties-without-explicitly-listing-them/m-p/447950) + # and the official documentation, this does not exist at the moment. + stream_records = {} + response = None + + for properties in split_properties(properties_list): + params.update({"properties": ",".join(properties)}) + response = getter(params=params) + for record in self._transform(self.parse_response(response)): + if record["id"] not in stream_records: + stream_records[record["id"]] = record + elif stream_records[record["id"]].get("properties"): + stream_records[record["id"]]["properties"].update(record.get("properties", {})) + + return stream_records, response + def _read(self, getter: Callable, params: MutableMapping[str, Any] = None) -> Iterator: next_page_token = None while True: @@ -317,21 +338,7 @@ def _read(self, getter: Callable, params: MutableMapping[str, Any] = None) -> It properties_list = list(self.properties.keys()) if properties_list: - # TODO: Additional processing was added due to the fact that users receive 414 errors while syncing their streams (issues #3977 and #5835). - # We will need to fix this code when the HubSpot developers add the ability to use a special parameter to get all properties for an entity. - # According to HubSpot Community (https://community.hubspot.com/t5/APIs-Integrations/Get-all-contact-properties-without-explicitly-listing-them/m-p/447950) - # and the official documentation, this does not exist at the moment. - stream_records = {} - - for properties in split_properties(properties_list): - params.update({"properties": ",".join(properties)}) - response = getter(params=params) - for record in self._transform(self.parse_response(response)): - if record["id"] not in stream_records: - stream_records[record["id"]] = record - elif stream_records[record["id"]].get("properties"): - stream_records[record["id"]]["properties"].update(record.get("properties", {})) - + stream_records, response = self._read_stream_records(getter=getter, params=params, properties_list=properties_list) yield from [value for key, value in stream_records.items()] else: response = getter(params=params) @@ -427,6 +434,26 @@ def properties(self) -> Mapping[str, Any]: return props + def _flat_associations(self, records: Iterable[MutableMapping]) -> Iterable[MutableMapping]: + """When result has associations we prefer to have it flat, so we transform this: + + "associations": { + "contacts": { + "results": [{"id": "201", "type": "company_to_contact"}, {"id": "251", "type": "company_to_contact"}]} + } + } + + to this: + + "contacts": [201, 251] + """ + for record in records: + if "associations" in record: + associations = record.pop("associations") + for name, association in associations.items(): + record[name] = [row["id"] for row in association.get("results", [])] + yield record + class IncrementalStream(Stream, ABC): """Stream that supports state and incremental read""" @@ -472,6 +499,9 @@ def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator: cursor = self._field_to_datetime(record[self.updated_at_field]) latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor + self._update_state(latest_cursor=latest_cursor) + + def _update_state(self, latest_cursor): if latest_cursor: new_state = max(latest_cursor, self._state) if self._state else latest_cursor if new_state != self._state: @@ -498,6 +528,92 @@ def read_chunked( yield from super().read(getter, params) +class CRMSearchStream(IncrementalStream, ABC): + + limit = 100 # This value is used only when state is None. + state_pk = "updatedAt" + updated_at_field = "updatedAt" + + @property + def url(self): + return f"/crm/v3/objects/{self.entity}/search" if self.state else f"/crm/v3/objects/{self.entity}" + + def __init__( + self, + entity: Optional[str] = None, + last_modified_field: Optional[str] = None, + associations: Optional[List[str]] = None, + include_archived_only: bool = False, + **kwargs, + ): + super().__init__(**kwargs) + self._state = None + self.entity = entity + self.last_modified_field = last_modified_field + self.associations = associations + self._include_archived_only = include_archived_only + + @retry_connection_handler(max_tries=5, factor=5) + @retry_after_handler(fixed_retry_after=1, max_tries=3) + def search( + self, url: str, data: Mapping[str, Any], params: MutableMapping[str, Any] = None + ) -> Union[Mapping[str, Any], List[Mapping[str, Any]]]: + # We can safely retry this POST call, because it's a search operation. + # Given Hubspot does not return any Retry-After header (https://developers.hubspot.com/docs/api/crm/search) + # from the search endpoint, it waits one second after trying again. + # As per their docs: `These search endpoints are rate limited to four requests per second per authentication token`. + return self._api.post(url=url, data=data, params=params) + + def list_records(self, fields) -> Iterable: + params = { + "archived": str(self._include_archived_only).lower(), + "associations": self.associations, + } + if self.state: + generator = self.read(partial(self.search, url=self.url), params) + else: + generator = self.read(partial(self._api.get, url=self.url), params) + yield from self._flat_associations(self._filter_old_records(generator)) + + def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator: + """Apply state filter to set of records, update cursor(state) if necessary in the end""" + latest_cursor = None + default_params = {"limit": self.limit} + params = {**default_params, **params} if params else {**default_params} + properties_list = list(self.properties.keys()) + + payload = ( + { + "filters": [{"value": int(self._state.timestamp() * 1000), "propertyName": self.last_modified_field, "operator": "GTE"}], + "properties": properties_list, + "limit": 100, + } + if self.state + else {} + ) + + while True: + stream_records = {} + if self.state: + response = getter(data=payload) + for record in self._transform(self.parse_response(response)): + stream_records[record["id"]] = record + else: + stream_records, response = self._read_stream_records(getter=getter, params=params, properties_list=properties_list) + + for _, record in stream_records.items(): + yield record + cursor = self._field_to_datetime(record[self.updated_at_field]) + latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor + if "paging" in response and "next" in response["paging"] and "after" in response["paging"]["next"]: + params["after"] = response["paging"]["next"]["after"] + payload["after"] = response["paging"]["next"]["after"] + else: + break + + self._update_state(latest_cursor=latest_cursor) + + class CRMObjectStream(Stream): """Unified stream interface for CRM objects. You need to provide `entity` parameter to read concrete stream, possible values are: @@ -528,7 +644,7 @@ def __init__( if not self.entity: raise ValueError("Entity must be set either on class or instance level") - def list(self, fields) -> Iterable: + def list_records(self, fields) -> Iterable: params = { "archived": str(self._include_archived_only).lower(), "associations": self.associations, @@ -536,26 +652,6 @@ def list(self, fields) -> Iterable: generator = self.read(partial(self._api.get, url=self.url), params) yield from self._flat_associations(generator) - def _flat_associations(self, records: Iterable[MutableMapping]) -> Iterable[MutableMapping]: - """When result has associations we prefer to have it flat, so we transform this: - - "associations": { - "contacts": { - "results": [{"id": "201", "type": "company_to_contact"}, {"id": "251", "type": "company_to_contact"}]} - } - } - - to this: - - "contacts": [201, 251] - """ - for record in records: - if "associations" in record: - associations = record.pop("associations") - for name, association in associations.items(): - record[name] = [row["id"] for row in association.get("results", [])] - yield record - class CRMObjectIncrementalStream(CRMObjectStream, IncrementalStream): state_pk = "updatedAt" @@ -575,7 +671,7 @@ class CampaignStream(Stream): limit = 500 updated_at_field = "lastUpdatedTime" - def list(self, fields) -> Iterable: + def list_records(self, fields) -> Iterable: for row in self.read(getter=partial(self._api.get, url=self.url)): record = self._api.get(f"/email/public/v1/campaigns/{row['id']}") yield {**row, **record} @@ -623,7 +719,7 @@ def _transform(self, records: Iterable) -> Iterable: for item in record.get("list-memberships", []): yield {"canonical-vid": canonical_vid, **item} - def list(self, fields) -> Iterable: + def list_records(self, fields) -> Iterable: """Receiving all contacts with list memberships""" params = {"showListMemberships": True} yield from self.read(partial(self._api.get, url=self.url), params) @@ -648,24 +744,24 @@ def _transform(self, records: Iterable) -> Iterable: if updated_at: yield {"id": record.get("dealId"), "dealstage": dealstage, self.updated_at_field: updated_at} - def list(self, fields) -> Iterable: + def list_records(self, fields) -> Iterable: params = {"propertiesWithHistory": "dealstage"} yield from self.read(partial(self._api.get, url=self.url), params) -class DealStream(CRMObjectIncrementalStream): +class DealStream(CRMSearchStream): """Deals, API v3""" def __init__(self, **kwargs): - super().__init__(entity="deal", **kwargs) + super().__init__(entity="deal", last_modified_field="hs_lastmodifieddate", **kwargs) self._stage_history = DealStageHistoryStream(**kwargs) - def list(self, fields) -> Iterable: + def list_records(self, fields) -> Iterable: history_by_id = {} - for record in self._stage_history.list(fields): + for record in self._stage_history.list_records(fields): if all(field in record for field in ("id", "dealstage")): history_by_id[record["id"]] = record["dealstage"] - for record in super().list(fields): + for record in super().list_records(fields): if record.get("id") and int(record["id"]) in history_by_id: record["dealstage"] = history_by_id[int(record["id"])] yield record @@ -705,9 +801,10 @@ class EmailEventStream(IncrementalStream): created_at_field = "created" -class EngagementStream(Stream): +class EngagementStream(IncrementalStream): """Engagements, API v1 Docs: https://legacydocs.hubspot.com/docs/methods/engagements/get-all-engagements + https://legacydocs.hubspot.com/docs/methods/engagements/get-recent-engagements """ url = "/engagements/v1/engagements/paged" @@ -715,10 +812,50 @@ class EngagementStream(Stream): limit = 250 updated_at_field = "lastUpdated" created_at_field = "createdAt" + state_pk = "lastUpdated" + + @property + def url(self): + if self.state: + return "/engagements/v1/engagements/recent/modified" + return "/engagements/v1/engagements/paged" + + @property + def state(self) -> Optional[Mapping[str, Any]]: + """Current state, if wasn't set return None""" + return {self.state_pk: self._state} if self._state else None + + @state.setter + def state(self, value): + state = value[self.state_pk] + self._state = state + self._start_date = max(self._field_to_datetime(self._state), self._start_date) def _transform(self, records: Iterable) -> Iterable: yield from super()._transform({**record.pop("engagement"), **record} for record in records) + def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator: + max_last_updated_at = None + default_params = {self.limit_field: self.limit} + params = {**default_params, **params} if params else {**default_params} + if self.state: + params["since"] = self._state + count = 0 + for record in self._filter_old_records(self._read(getter, params)): + yield record + count += 1 + cursor = record[self.updated_at_field] + max_last_updated_at = max(cursor, max_last_updated_at) if max_last_updated_at else cursor + + logger.info(f"Processed {count} records") + + if max_last_updated_at: + new_state = max(max_last_updated_at, self._state) if self._state else max_last_updated_at + if new_state != self._state: + logger.info(f"Advancing bookmark for engagement stream from {self._state} to {max_last_updated_at}") + self._state = new_state + self._start_date = self._state + class FormStream(Stream): """Marketing Forms, API v3 @@ -753,7 +890,7 @@ def _transform(self, records: Iterable) -> Iterable: yield record - def list(self, fields) -> Iterable: + def list_records(self, fields) -> Iterable: for form in self.read(getter=partial(self._api.get, url="/marketing/v3/forms")): for submission in self.read(getter=partial(self._api.get, url=f"{self.url}/{form['id']}")): submission["formId"] = form["id"] diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py index 7768755fc387d..80e97632e26e2 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py @@ -14,6 +14,7 @@ ContactListStream, ContactsListMembershipsStream, CRMObjectIncrementalStream, + CRMSearchStream, DealPipelineStream, DealStream, EmailEventStream, @@ -37,9 +38,13 @@ def __init__(self, start_date, credentials, **kwargs): common_params = dict(api=self._api, start_date=self._start_date) self._apis = { "campaigns": CampaignStream(**common_params), - "companies": CRMObjectIncrementalStream(entity="company", associations=["contacts"], **common_params), + "companies": CRMSearchStream( + entity="company", last_modified_field="hs_lastmodifieddate", associations=["contacts"], **common_params + ), "contact_lists": ContactListStream(**common_params), - "contacts": CRMObjectIncrementalStream(entity="contact", **common_params), + "contacts": CRMSearchStream( + entity="contact", last_modified_field="lastmodifieddate", associations=["contacts"], **common_params + ), "contacts_list_memberships": ContactsListMembershipsStream(**common_params), "deal_pipelines": DealPipelineStream(**common_params), "deals": DealStream(associations=["contacts"], **common_params), @@ -63,7 +68,7 @@ def __init__(self, start_date, credentials, **kwargs): super().__init__(**kwargs) def _enumerate_methods(self) -> Mapping[str, Callable]: - return {name: api.list for name, api in self._apis.items()} + return {name: api.list_records for name, api in self._apis.items()} @property def streams(self) -> Iterator[AirbyteStream]: diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index 79e463b79434e..ffa18c68000ce 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -110,8 +110,10 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:-----------------------------------------------------------------------------------------------------------------------------------------------| +| 0.1.33 | 2021-01-14 | [8887](https://github.com/airbytehq/airbyte/pull/8887) | More efficient support for incremental updates on Companies, Contact, Deals and Engagement streams | | 0.1.32 | 2022-01-13 | [8011](https://github.com/airbytehq/airbyte/pull/8011) | Add new stream form_submissions | | 0.1.31 | 2022-01-11 | [9385](https://github.com/airbytehq/airbyte/pull/9385) | Remove auto-generated `properties` from `Engagements` stream | + | 0.1.30 | 2021-01-10 | [9129](https://github.com/airbytehq/airbyte/pull/9129) | Created Contacts list memberships streams | | 0.1.29 | 2021-12-17 | [8699](https://github.com/airbytehq/airbyte/pull/8699) | Add incremental sync support for `companies`, `contact_lists`, `contacts`, `deals`, `line_items`, `products`, `quotes`, `tickets` streams | | 0.1.28 | 2021-12-15 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update fields and descriptions | From cb6d9abcab0a14a881d002a60f4337e305b8dd45 Mon Sep 17 00:00:00 2001 From: Yevhenii <34103125+yevhenii-ldv@users.noreply.github.com> Date: Mon, 17 Jan 2022 15:21:35 +0200 Subject: [PATCH 137/215] =?UTF-8?q?=F0=9F=90=9B=20Source=20Github:=20Remov?= =?UTF-8?q?e=20optional=20parameter=20`Accept`=20for=20reaction's=20stream?= =?UTF-8?q?s=20to=20fix=20error=20with=20`502`=20HTTP=20status=20code=20(#?= =?UTF-8?q?9492)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Source Github: Remove optional parameter Accept for reaction's streams to fix error with 502 HTTP status code --- .../main/resources/seed/source_definitions.yaml | 2 +- .../init/src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-github/Dockerfile | 2 +- .../source-github/source_github/streams.py | 10 ++++++---- .../source-github/unit_tests/test_stream.py | 14 ++++++++++++-- docs/integrations/sources/github.md | 1 + 6 files changed, 22 insertions(+), 9 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 9347a5dc1119d..fe8fb0d204504 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -217,7 +217,7 @@ - name: GitHub sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e dockerRepository: airbyte/source-github - dockerImageTag: 0.2.10 + dockerImageTag: 0.2.11 documentationUrl: https://docs.airbyte.io/integrations/sources/github icon: github.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 723f59f8fdd60..b1c3d5005fb4c 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1985,7 +1985,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-github:0.2.10" +- dockerImage: "airbyte/source-github:0.2.11" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/github" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index 3a7e115d64896..3508845d3b6ad 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.10 +LABEL io.airbyte.version=0.2.11 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 62c65a6956888..a9499124ac303 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -52,10 +52,15 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, def should_retry(self, response: requests.Response) -> bool: # We don't call `super()` here because we have custom error handling and GitHub API sometimes returns strange # errors. So in `read_records()` we have custom error handling which don't require to call `super()` here. - return response.headers.get("X-RateLimit-Remaining") == "0" or response.status_code in ( + retry_flag = response.headers.get("X-RateLimit-Remaining") == "0" or response.status_code in ( requests.codes.SERVER_ERROR, requests.codes.BAD_GATEWAY, ) + if retry_flag: + self.logger.info( + f"Rate limit handling for the response with {response.status_code} status code with message: {response.json()}" + ) + return retry_flag def backoff_time(self, response: requests.Response) -> Union[int, float]: # This method is called if we run into the rate limit. GitHub limits requests to 5000 per hour and provides @@ -765,9 +770,6 @@ def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: for parent_record in self._parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice): yield {self.parent_key: parent_record[self.parent_key], "repository": stream_slice["repository"]} - def request_headers(self, **kwargs) -> Mapping[str, Any]: - return {"Accept": "application/vnd.github.squirrel-girl-preview+json"} - class CommitCommentReactions(ReactionStream): """ diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 1f06233fd45a2..9bc04e8db2729 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -20,7 +20,12 @@ def test_bad_gateway_retry(time_mock): stream = PullRequestCommentReactions(**args) stream_slice = {"repository": "test_repo", "id": "id"} - responses.add("GET", "https://api.github.com/repos/test_repo/pulls/comments/id/reactions", status=HTTPStatus.BAD_GATEWAY) + responses.add( + "GET", + "https://api.github.com/repos/test_repo/pulls/comments/id/reactions", + status=HTTPStatus.BAD_GATEWAY, + json={"message": "Bad request"}, + ) with pytest.raises(BaseBackoffException): list(stream.read_records(sync_mode="full_refresh", stream_slice=stream_slice)) @@ -28,7 +33,12 @@ def test_bad_gateway_retry(time_mock): assert sleep_delays == DEFAULT_BACKOFF_DELAYS time_mock.reset_mock() - responses.add("GET", "https://api.github.com/repos/test_repo/pulls/comments/id/reactions", status=HTTPStatus.INTERNAL_SERVER_ERROR) + responses.add( + "GET", + "https://api.github.com/repos/test_repo/pulls/comments/id/reactions", + status=HTTPStatus.INTERNAL_SERVER_ERROR, + json={"message": "Server Error"}, + ) with pytest.raises(BaseBackoffException): list(stream.read_records(sync_mode="full_refresh", stream_slice=stream_slice)) diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 7e6b0972a08eb..76ec536519b2d 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -92,6 +92,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response | | 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental | | 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses | | 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | From edc0925ecaadf581c8c546f0f82cef77e9f2ff54 Mon Sep 17 00:00:00 2001 From: Anna Lvova <37615075+annalvova05@users.noreply.github.com> Date: Mon, 17 Jan 2022 17:17:52 +0100 Subject: [PATCH 138/215] =?UTF-8?q?=F0=9F=8E=89=20SurveyMonkey=20source:?= =?UTF-8?q?=20scopes=20change=20(#9508)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * change list of scopes * upd changelog * bump version --- .../main/resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-surveymonkey/Dockerfile | 2 +- .../source_surveymonkey/source.py | 16 +--------------- docs/integrations/sources/surveymonkey.md | 17 +++++++++-------- 5 files changed, 13 insertions(+), 26 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index fe8fb0d204504..7160cb6ecc2d3 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -696,7 +696,7 @@ - name: SurveyMonkey sourceDefinitionId: badc5925-0485-42be-8caa-b34096cb71b5 dockerRepository: airbyte/source-surveymonkey - dockerImageTag: 0.1.5 + dockerImageTag: 0.1.6 documentationUrl: https://docs.airbyte.io/integrations/sources/surveymonkey icon: surveymonkey.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index b1c3d5005fb4c..e8cafd8576377 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -7236,7 +7236,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-surveymonkey:0.1.5" +- dockerImage: "airbyte/source-surveymonkey:0.1.6" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/surveymonkey" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-surveymonkey/Dockerfile b/airbyte-integrations/connectors/source-surveymonkey/Dockerfile index 734ce959d6a54..3afe32aaffdba 100644 --- a/airbyte-integrations/connectors/source-surveymonkey/Dockerfile +++ b/airbyte-integrations/connectors/source-surveymonkey/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.5 +LABEL io.airbyte.version=0.1.6 LABEL io.airbyte.name=airbyte/source-surveymonkey diff --git a/airbyte-integrations/connectors/source-surveymonkey/source_surveymonkey/source.py b/airbyte-integrations/connectors/source-surveymonkey/source_surveymonkey/source.py index 48a6fbbca0adf..23effdc7bc752 100644 --- a/airbyte-integrations/connectors/source-surveymonkey/source_surveymonkey/source.py +++ b/airbyte-integrations/connectors/source-surveymonkey/source_surveymonkey/source.py @@ -16,21 +16,7 @@ class SourceSurveymonkey(AbstractSource): - SCOPES = { - "collectors_read", - "contacts_read", - "groups_read", - "library_read", - "responses_read", - "responses_read_detail", - "roles_read", - "surveys_read", - "users_read", - "webhooks_read", - "workgroups_members_read", - "workgroups_read", - "workgroups_shares_read", - } + SCOPES = {"responses_read_detail", "surveys_read", "users_read"} def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Any]: url = "https://api.surveymonkey.com/v3/users/me" diff --git a/docs/integrations/sources/surveymonkey.md b/docs/integrations/sources/surveymonkey.md index 08b65ce2ccd7b..950a1be7cea02 100644 --- a/docs/integrations/sources/surveymonkey.md +++ b/docs/integrations/sources/surveymonkey.md @@ -53,12 +53,13 @@ Please read this [docs](https://developer.surveymonkey.com/api/v3/#getting-start ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.5 | 2021-12-28 | [8628](https://github.com/airbytehq/airbyte/pull/8628) | Update fields in source-connectors specifications | -| 0.1.4 | 2021-11-11 | [7868](https://github.com/airbytehq/airbyte/pull/7868) | Improve 'check' using '/users/me' API call | -| 0.1.3 | 2021-11-01 | [7433](https://github.com/airbytehq/airbyte/pull/7433) | Remove unsused oAuth flow parameters | -| 0.1.2 | 2021-10-27 | [7433](https://github.com/airbytehq/airbyte/pull/7433) | Add OAuth support | -| 0.1.1 | 2021-09-10 | [5983](https://github.com/airbytehq/airbyte/pull/5983) | Fix caching for gzip compressed http response | -| 0.1.0 | 2021-07-06 | [4097](https://github.com/airbytehq/airbyte/pull/4097) | Initial Release | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:-------------------------------------------------------|:--------------------------------------------------| +| 0.1.6 | 2022-01-14 | [9508](https://github.com/airbytehq/airbyte/pull/9508) | Scopes change | +| 0.1.5 | 2021-12-28 | [8628](https://github.com/airbytehq/airbyte/pull/8628) | Update fields in source-connectors specifications | +| 0.1.4 | 2021-11-11 | [7868](https://github.com/airbytehq/airbyte/pull/7868) | Improve 'check' using '/users/me' API call | +| 0.1.3 | 2021-11-01 | [7433](https://github.com/airbytehq/airbyte/pull/7433) | Remove unsused oAuth flow parameters | +| 0.1.2 | 2021-10-27 | [7433](https://github.com/airbytehq/airbyte/pull/7433) | Add OAuth support | +| 0.1.1 | 2021-09-10 | [5983](https://github.com/airbytehq/airbyte/pull/5983) | Fix caching for gzip compressed http response | +| 0.1.0 | 2021-07-06 | [4097](https://github.com/airbytehq/airbyte/pull/4097) | Initial Release | From 032b06d2e2062da5f29f12543d621eee5a539b02 Mon Sep 17 00:00:00 2001 From: "Sherif A. Nada" Date: Mon, 17 Jan 2022 16:53:35 -0800 Subject: [PATCH 139/215] Add section to UX handbook about pointing to page anchors --- docs/connector-development/ux-handbook.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/connector-development/ux-handbook.md b/docs/connector-development/ux-handbook.md index e640ea7dda77b..f0c1fcad400e0 100644 --- a/docs/connector-development/ux-handbook.md +++ b/docs/connector-development/ux-handbook.md @@ -187,6 +187,10 @@ Adding a parameter “attribution\_lookback\_window” with no explanation might If a user needs to obtain an API key or host name, tell them exactly where to find it. Ideally you would show them screenshots, though include a date and API version in those if possible, so it’s clear when they’ve aged out of date. +**Links should point to page anchors where applicable**. + +Often, you are trying to redirect the user to a specific part of the page. For example, if you wanted to point someone to the "Input Configuration" section of this doc, it is better to point them to `https://docs.airbyte.com/connector-development/ux-handbook#input-configuration` instead of `https://docs.airbyte.com/connector-development/ux-handbook`. + **Fail fast & actionably** A user should not be able to configure something that will not work. If a user’s configuration is invalid, we should inform them as precisely as possible about what they need to do to fix the issue. From 6b502d8c326bcde791128f5711a21cc03bde19f3 Mon Sep 17 00:00:00 2001 From: midavadim Date: Tue, 18 Jan 2022 09:11:27 +0200 Subject: [PATCH 140/215] =?UTF-8?q?=F0=9F=8E=89=20square:=20added=20oauth?= =?UTF-8?q?=20support=20(#6842)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fixed test which check incorrect cred config * Added oauth2 authentication * Added oauth creds * fixed formatting * added oauth2 spec section, added missing type hints * Added java part of Square OAuth * fixed checkstyle * removed commented code * added support for old format of spec.json files, updated change logs docs * renamed spec property 'authentication' to default 'credentials'. fixed changes in java part * recovered empty files * updated OAuthImplementationFactory.java * fixed issue with autheticator for sub streams, added config catalog with all streams, updated docs * use advanced_auth * added advanced_auth * moved scopes to private property * updated source version * Revert "updated source version" This reverts commit ce3d06165c4bbbe1592e22203d6b6c545deec9a9. * updated source version * added new version for airbyte index Co-authored-by: ievgeniit --- .../77225a51-cd15-4a13-af02-65816bd0ecf4.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 118 ++++++++++++++-- .../connectors/source-square/Dockerfile | 2 +- .../source-square/acceptance-test-config.yml | 6 + .../configured_catalog_oauth.json | 40 ++++++ .../source-square/source_square/source.py | 87 ++++++++++-- .../source-square/source_square/spec.json | 131 ++++++++++++++++-- .../unit_tests/connection_test.py | 8 +- .../oauth/OAuthImplementationFactory.java | 1 + .../airbyte/oauth/flows/SquareOAuthFlow.java | 106 ++++++++++++++ .../SquareOAuthFlowIntegrationTest.java | 90 ++++++++++++ .../oauth/flows/SquareOAuthFlowTest.java | 23 +++ docs/integrations/sources/square.md | 13 +- 14 files changed, 588 insertions(+), 41 deletions(-) create mode 100644 airbyte-integrations/connectors/source-square/integration_tests/configured_catalog_oauth.json create mode 100644 airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SquareOAuthFlow.java create mode 100644 airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SquareOAuthFlowIntegrationTest.java create mode 100644 airbyte-oauth/src/test/java/io/airbyte/oauth/flows/SquareOAuthFlowTest.java diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/77225a51-cd15-4a13-af02-65816bd0ecf4.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/77225a51-cd15-4a13-af02-65816bd0ecf4.json index 7daaf44dedab3..e5fb4122ccebc 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/77225a51-cd15-4a13-af02-65816bd0ecf4.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/77225a51-cd15-4a13-af02-65816bd0ecf4.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "77225a51-cd15-4a13-af02-65816bd0ecf4", "name": "Square", "dockerRepository": "airbyte/source-square", - "dockerImageTag": "0.1.3", + "dockerImageTag": "0.1.4", "documentationUrl": "https://docs.airbyte.io/integrations/sources/square", "icon": "square.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 7160cb6ecc2d3..372fbe1101018 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -676,7 +676,7 @@ - name: Square sourceDefinitionId: 77225a51-cd15-4a13-af02-65816bd0ecf4 dockerRepository: airbyte/source-square - dockerImageTag: 0.1.3 + dockerImageTag: 0.1.4 documentationUrl: https://docs.airbyte.io/integrations/sources/square icon: square.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index e8cafd8576377..1a6fc9b152605 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -7049,7 +7049,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-square:0.1.3" +- dockerImage: "airbyte/source-square:0.1.4" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/square" connectionSpecification: @@ -7057,15 +7057,9 @@ title: "Square Source CDK Specifications" type: "object" required: - - "api_key" - "is_sandbox" - additionalProperties: false + additionalProperties: true properties: - api_key: - type: "string" - description: "The API key for a Square application." - title: "API Key" - airbyte_secret: true is_sandbox: type: "boolean" description: "Determines whether to use the sandbox or production environment." @@ -7073,7 +7067,7 @@ examples: - true - false - default: true + default: false start_date: type: "string" description: "UTC date in the format YYYY-MM-DD. Any data before this date\ @@ -7081,20 +7075,122 @@ title: "Start Date" examples: - "2021-01-01" - default: "1970-01-01" + default: "2021-01-01" pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" include_deleted_objects: type: "boolean" description: "In some streams there is an option to include deleted objects\ \ (Items, Categories, Discounts, Taxes)" - title: "Include Deleded Objects" + title: "Include Deleted Objects" examples: - true - false default: false + credentials: + type: "object" + title: "Credential Type" + oneOf: + - title: "Oauth authentication" + type: "object" + required: + - "auth_type" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Oauth" + enum: + - "Oauth" + default: "Oauth" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Square-issued ID of your application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The Square-issued application secret for your application" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + - type: "object" + title: "API Key" + required: + - "auth_type" + - "api_key" + properties: + auth_type: + type: "string" + const: "Apikey" + enum: + - "Apikey" + default: "Apikey" + order: 1 + api_key: + title: "API key token" + type: "string" + description: "The API key for a Square application" + airbyte_secret: true supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" + advanced_auth: + auth_flow_type: "oauth2.0" + predicate_key: + - "credentials" + - "auth_type" + predicate_value: "Oauth" + oauth_config_specification: + complete_oauth_output_specification: + type: "object" + additionalProperties: false + properties: + refresh_token: + type: "string" + path_in_connector_config: + - "credentials" + - "refresh_token" + complete_oauth_server_input_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + client_secret: + type: "string" + complete_oauth_server_output_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + path_in_connector_config: + - "credentials" + - "client_id" + client_secret: + type: "string" + path_in_connector_config: + - "credentials" + - "client_secret" - dockerImage: "airbyte/source-strava:0.1.2" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/strava" diff --git a/airbyte-integrations/connectors/source-square/Dockerfile b/airbyte-integrations/connectors/source-square/Dockerfile index ec5e91b1622fd..ef116771ded83 100644 --- a/airbyte-integrations/connectors/source-square/Dockerfile +++ b/airbyte-integrations/connectors/source-square/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-square diff --git a/airbyte-integrations/connectors/source-square/acceptance-test-config.yml b/airbyte-integrations/connectors/source-square/acceptance-test-config.yml index 68414755e1ea8..e3cfdf9227838 100644 --- a/airbyte-integrations/connectors/source-square/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-square/acceptance-test-config.yml @@ -5,13 +5,18 @@ tests: connection: - config_path: "secrets/config.json" status: "succeed" + - config_path: "secrets/config_oauth.json" + status: "succeed" - config_path: "integration_tests/invalid_config.json" status: "failed" discovery: - config_path: "secrets/config.json" + - config_path: "secrets/config_oauth.json" basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + - config_path: "secrets/config_oauth.json" + configured_catalog_path: "integration_tests/configured_catalog_oauth.json" incremental: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" @@ -19,3 +24,4 @@ tests: full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + diff --git a/airbyte-integrations/connectors/source-square/integration_tests/configured_catalog_oauth.json b/airbyte-integrations/connectors/source-square/integration_tests/configured_catalog_oauth.json new file mode 100644 index 0000000000000..316537a5a67fe --- /dev/null +++ b/airbyte-integrations/connectors/source-square/integration_tests/configured_catalog_oauth.json @@ -0,0 +1,40 @@ +{ + "streams": [ + { + "stream": { + "name": "locations", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": ["id"] + }, + "sync_mode": "full_refresh", + "cursor_field": ["id"], + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "team_members", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": ["id"] + }, + "sync_mode": "full_refresh", + "cursor_field": ["id"], + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "team_member_wages", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": ["id"] + }, + "sync_mode": "full_refresh", + "cursor_field": ["id"], + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-square/source_square/source.py b/airbyte-integrations/connectors/source-square/source_square/source.py index 1c697cfce1d70..6d0bd64c638f3 100644 --- a/airbyte-integrations/connectors/source-square/source_square/source.py +++ b/airbyte-integrations/connectors/source-square/source_square/source.py @@ -4,15 +4,18 @@ import json from abc import ABC, abstractmethod -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union import pendulum import requests +from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.models import SyncMode from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.http import HttpStream -from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator +from airbyte_cdk.sources.streams.http.auth.core import HttpAuthenticator +from airbyte_cdk.sources.streams.http.requests_native_auth import Oauth2Authenticator, TokenAuthenticator +from requests.auth import AuthBase from source_square.utils import separate_items_by_count @@ -35,8 +38,16 @@ def parse_square_error_response(error: requests.exceptions.HTTPError) -> SquareE class SquareStream(HttpStream, ABC): - def __init__(self, is_sandbox: bool, api_version: str, start_date: str, include_deleted_objects: bool, **kwargs): - super().__init__(**kwargs) + def __init__( + self, + is_sandbox: bool, + api_version: str, + start_date: str, + include_deleted_objects: bool, + authenticator: Union[AuthBase, HttpAuthenticator], + ): + super().__init__(authenticator) + self._authenticator = authenticator self.is_sandbox = is_sandbox self.api_version = api_version # Converting users ISO 8601 format (YYYY-MM-DD) to RFC 3339 (2021-06-14T13:47:56.799Z) @@ -358,16 +369,75 @@ def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: yield {"location_ids": location} +class Oauth2AuthenticatorSquare(Oauth2Authenticator): + def refresh_access_token(self) -> Tuple[str, int]: + """Handle differences in expiration attr: + from API: "expires_at": "2021-11-05T14:26:57Z" + expected: "expires_in": number of seconds + """ + token, expires_at = super().refresh_access_token() + expires_in = pendulum.parse(expires_at) - pendulum.now() + return token, expires_in.seconds + + class SourceSquare(AbstractSource): - api_version = "2021-06-16" # Latest Stable Release + api_version = "2021-09-15" # Latest Stable Release + + @staticmethod + def get_auth(config: Mapping[str, Any]) -> AuthBase: + + credential = config.get("credentials", {}) + auth_type = credential.get("auth_type") + if auth_type == "Oauth": + # scopes needed for all currently supported streams: + scopes = [ + "CUSTOMERS_READ", + "EMPLOYEES_READ", + "ITEMS_READ", + "MERCHANT_PROFILE_READ", + "ORDERS_READ", + "PAYMENTS_READ", + "TIMECARDS_READ", + # OAuth Permissions: + # https://developer.squareup.com/docs/oauth-api/square-permissions + # https://developer.squareup.com/reference/square/enums/OAuthPermission + # "DISPUTES_READ", + # "GIFTCARDS_READ", + # "INVENTORY_READ", + # "INVOICES_READ", + # "TIMECARDS_SETTINGS_READ", + # "LOYALTY_READ", + # "ONLINE_STORE_SITE_READ", + # "ONLINE_STORE_SNIPPETS_READ", + # "SUBSCRIPTIONS_READ", + ] + + auth = Oauth2AuthenticatorSquare( + token_refresh_endpoint="https://connect.squareup.com/oauth2/token", + client_secret=credential.get("client_secret"), + client_id=credential.get("client_id"), + refresh_token=credential.get("refresh_token"), + scopes=scopes, + expires_in_name="expires_at", + ) + elif auth_type == "Apikey": + auth = TokenAuthenticator(token=credential.get("api_key")) + elif not auth_type and config.get("api_key"): + auth = TokenAuthenticator(token=config.get("api_key")) + else: + raise Exception(f"Invalid auth type: {auth_type}") + + return auth - def check_connection(self, logger, config) -> Tuple[bool, any]: + def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, any]: headers = { "Square-Version": self.api_version, - "Authorization": "Bearer {}".format(config["api_key"]), "Content-Type": "application/json", } + auth = self.get_auth(config) + headers.update(auth.get_auth_header()) + url = "https://connect.squareup{}.com/v2/catalog/info".format("sandbox" if config["is_sandbox"] else "") try: @@ -383,9 +453,8 @@ def check_connection(self, logger, config) -> Tuple[bool, any]: def streams(self, config: Mapping[str, Any]) -> List[Stream]: - auth = TokenAuthenticator(token=config["api_key"]) args = { - "authenticator": auth, + "authenticator": self.get_auth(config), "is_sandbox": config["is_sandbox"], "api_version": self.api_version, "start_date": config["start_date"], diff --git a/airbyte-integrations/connectors/source-square/source_square/spec.json b/airbyte-integrations/connectors/source-square/source_square/spec.json index 6a7acf383ddbb..23c77b753f8fe 100644 --- a/airbyte-integrations/connectors/source-square/source_square/spec.json +++ b/airbyte-integrations/connectors/source-square/source_square/spec.json @@ -4,37 +4,146 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Square Source CDK Specifications", "type": "object", - "required": ["api_key", "is_sandbox"], - "additionalProperties": false, + "required": ["is_sandbox"], + "additionalProperties": true, "properties": { - "api_key": { - "type": "string", - "description": "The API key for a Square application.", - "title": "API Key", - "airbyte_secret": true - }, "is_sandbox": { "type": "boolean", "description": "Determines whether to use the sandbox or production environment.", "title": "Sandbox", "examples": [true, false], - "default": true + "default": false }, "start_date": { "type": "string", "description": "UTC date in the format YYYY-MM-DD. Any data before this date will not be replicated. If not set, all data will be replicated.", "title": "Start Date", "examples": ["2021-01-01"], - "default": "1970-01-01", + "default": "2021-01-01", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" }, "include_deleted_objects": { "type": "boolean", "description": "In some streams there is an option to include deleted objects (Items, Categories, Discounts, Taxes)", - "title": "Include Deleded Objects", + "title": "Include Deleted Objects", "examples": [true, false], "default": false + }, + "credentials": { + "type": "object", + "title": "Credential Type", + "oneOf": [ + { + "title": "Oauth authentication", + "type": "object", + "required": [ + "auth_type", + "client_id", + "client_secret", + "refresh_token" + ], + "properties": { + "auth_type": { + "type": "string", + "const": "Oauth", + "enum": ["Oauth"], + "default": "Oauth", + "order": 0 + }, + "client_id": { + "title": "Client ID", + "type": "string", + "description": "The Square-issued ID of your application", + "airbyte_secret": true + }, + "client_secret": { + "title": "Client Secret", + "type": "string", + "description": "The Square-issued application secret for your application", + "airbyte_secret": true + }, + "refresh_token": { + "title": "Refresh Token", + "type": "string", + "description": "A refresh token generated using the above client ID and secret", + "airbyte_secret": true + } + } + }, + { + "type": "object", + "title": "API Key", + "required": ["auth_type", "api_key"], + "properties": { + "auth_type": { + "type": "string", + "const": "Apikey", + "enum": ["Apikey"], + "default": "Apikey", + "order": 1 + }, + "api_key": { + "title": "API key token", + "type": "string", + "description": "The API key for a Square application", + "airbyte_secret": true + } + } + } + ] + } + } + }, + "authSpecification": { + "auth_type": "oauth2.0", + "oauth2Specification": { + "rootObject": ["credentials", 0], + "oauthFlowInitParameters": [["client_id"], ["client_secret"]], + "oauthFlowOutputParameters": [["refresh_token"]] + } + }, + "advanced_auth": { + "auth_flow_type": "oauth2.0", + "predicate_key": ["credentials", "auth_type"], + "predicate_value": "Oauth", + "oauth_config_specification": { + "complete_oauth_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "refresh_token": { + "type": "string", + "path_in_connector_config": ["credentials", "refresh_token"] + } + } + }, + "complete_oauth_server_input_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string" + }, + "client_secret": { + "type": "string" + } + } + }, + "complete_oauth_server_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string", + "path_in_connector_config": ["credentials", "client_id"] + }, + "client_secret": { + "type": "string", + "path_in_connector_config": ["credentials", "client_secret"] + } + } } } } + } diff --git a/airbyte-integrations/connectors/source-square/unit_tests/connection_test.py b/airbyte-integrations/connectors/source-square/unit_tests/connection_test.py index f47cb7ca956b9..5e028777a7cf8 100644 --- a/airbyte-integrations/connectors/source-square/unit_tests/connection_test.py +++ b/airbyte-integrations/connectors/source-square/unit_tests/connection_test.py @@ -8,5 +8,11 @@ def test_source_wrong_credentials(): source = SourceSquare() - status, error = source.check_connection(logger=AirbyteLogger(), config={"api_key": "wrong.api.key", "is_sandbox": True}) + config = { + "credentials": {"auth_type": "Apikey", "api_key": "bla"}, + "is_sandbox": True, + "start_date": "2021-06-01", + "include_deleted_objects": False, + } + status, error = source.check_connection(logger=AirbyteLogger(), config=config) assert not status diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java index d9de82bf9eccb..7a549d65a3650 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java @@ -47,6 +47,7 @@ public OAuthImplementationFactory(final ConfigRepository configRepository, final .put("airbyte/source-salesforce", new SalesforceOAuthFlow(configRepository, httpClient)) .put("airbyte/source-slack", new SlackOAuthFlow(configRepository, httpClient)) .put("airbyte/source-snapchat-marketing", new SnapchatMarketingOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-square", new SquareOAuthFlow(configRepository, httpClient)) .put("airbyte/source-strava", new StravaOAuthFlow(configRepository, httpClient)) .put("airbyte/source-surveymonkey", new SurveymonkeyOAuthFlow(configRepository, httpClient)) .put("airbyte/source-trello", new TrelloOAuthFlow(configRepository, httpClient)) diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SquareOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SquareOAuthFlow.java new file mode 100644 index 0000000000000..1e9d821a67a1b --- /dev/null +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SquareOAuthFlow.java @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.oauth.BaseOAuth2Flow; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URLDecoder; +import java.net.http.HttpClient; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import org.apache.http.client.utils.URIBuilder; + +public class SquareOAuthFlow extends BaseOAuth2Flow { + + private static final List SCOPES = Arrays.asList( + "CUSTOMERS_READ", + "EMPLOYEES_READ", + "ITEMS_READ", + "MERCHANT_PROFILE_READ", + "ORDERS_READ", + "PAYMENTS_READ", + "TIMECARDS_READ" + // OAuth Permissions: + // https://developer.squareup.com/docs/oauth-api/square-permissions + // https://developer.squareup.com/reference/square/enums/OAuthPermission + // "DISPUTES_READ", + // "GIFTCARDS_READ", + // "INVENTORY_READ", + // "INVOICES_READ", + // "TIMECARDS_SETTINGS_READ", + // "LOYALTY_READ", + // "ONLINE_STORE_SITE_READ", + // "ONLINE_STORE_SNIPPETS_READ", + // "SUBSCRIPTIONS_READ", + ); + private static final String AUTHORIZE_URL = "https://connect.squareup.com/oauth2/authorize"; + private static final String ACCESS_TOKEN_URL = "https://connect.squareup.com/oauth2/token"; + + public SquareOAuthFlow(ConfigRepository configRepository, final HttpClient httpClient) { + super(configRepository, httpClient); + } + + @VisibleForTesting + public SquareOAuthFlow(ConfigRepository configRepository, + HttpClient httpClient, + Supplier stateSupplier) { + super(configRepository, httpClient, stateSupplier); + } + + @Override + protected String formatConsentUrl(final UUID definitionId, + final String clientId, + final String redirectUrl, + final JsonNode inputOAuthConfiguration) + throws IOException { + try { + // Need to have decoded format, otherwise square fails saying that scope is incorrect + return URLDecoder.decode(new URIBuilder(AUTHORIZE_URL) + .addParameter("client_id", clientId) + .addParameter("scope", String.join("+", SCOPES)) + .addParameter("session", "False") + .addParameter("state", getState()) + .build().toString(), StandardCharsets.UTF_8); + } catch (URISyntaxException e) { + throw new IOException("Failed to format Consent URL for OAuth flow", e); + } + } + + @Override + protected String getAccessTokenUrl(final JsonNode inputOAuthConfiguration) { + return ACCESS_TOKEN_URL; + } + + @Override + protected Map getAccessTokenQueryParameters(String clientId, + String clientSecret, + String authCode, + String redirectUrl) { + String scopes = SCOPES.stream() + .map(name -> ('"' + name + '"')) + .collect(Collectors.joining(",")); + scopes = '[' + scopes + ']'; + + return ImmutableMap.builder() + // required + .put("client_id", clientId) + .put("client_secret", clientSecret) + .put("code", authCode) + .put("grant_type", "authorization_code") + .put("scopes", scopes) + .build(); + } + +} diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SquareOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SquareOAuthFlowIntegrationTest.java new file mode 100644 index 0000000000000..03da842f2471b --- /dev/null +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SquareOAuthFlowIntegrationTest.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.SourceOAuthParameter; +import io.airbyte.config.persistence.ConfigNotFoundException; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.oauth.OAuthFlowImplementation; +import io.airbyte.validation.json.JsonValidationException; +import java.io.IOException; +import java.net.http.HttpClient; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class SquareOAuthFlowIntegrationTest extends OAuthFlowIntegrationTest { + + protected static final Path CREDENTIALS_PATH = Path.of("secrets/square.json"); + protected static final String REDIRECT_URL = "http://localhost:8000/code"; + protected static final int SERVER_LISTENING_PORT = 8000; + + @Override + protected Path getCredentialsPath() { + return CREDENTIALS_PATH; + } + + @Override + protected OAuthFlowImplementation getFlowImplementation(final ConfigRepository configRepository, final HttpClient httpClient) { + return new SquareOAuthFlow(configRepository, httpClient); + } + + @Override + protected int getServerListeningPort() { + return SERVER_LISTENING_PORT; + } + + @Override + @BeforeEach + public void setup() throws IOException { + super.setup(); + } + + @Test + public void testFullSquareOAuthFlow() throws InterruptedException, ConfigNotFoundException, IOException, JsonValidationException { + int limit = 20; + final UUID workspaceId = UUID.randomUUID(); + final UUID definitionId = UUID.randomUUID(); + final String fullConfigAsString = new String(Files.readAllBytes(CREDENTIALS_PATH)); + final JsonNode credentialsJson = Jsons.deserialize(fullConfigAsString); + when(configRepository.listSourceOAuthParam()).thenReturn(List.of(new SourceOAuthParameter() + .withOauthParameterId(UUID.randomUUID()) + .withSourceDefinitionId(definitionId) + .withWorkspaceId(workspaceId) + .withConfiguration(Jsons.jsonNode( + Map.of("authorization", + ImmutableMap.builder() + .put("client_id", credentialsJson.get("client_id").asText()) + .put("client_secret", credentialsJson.get("client_secret").asText()) + .build()))))); + + final String url = flow.getSourceConsentUrl(workspaceId, definitionId, REDIRECT_URL, Jsons.emptyObject(), null); + LOGGER.info("Waiting for user consent at: {}", url); + + // TODO: To automate, start a selenium job to navigate to the Consent URL and click on allowing + // access... + while (!serverHandler.isSucceeded() && limit > 0) { + Thread.sleep(1000); + limit -= 1; + } + assertTrue(serverHandler.isSucceeded(), "Failed to get User consent on time"); + final Map params = flow.completeSourceOAuth(workspaceId, definitionId, + Map.of("code", serverHandler.getParamValue()), REDIRECT_URL); + LOGGER.info("Response from completing OAuth Flow is: {}", params.toString()); + assertTrue(params.containsKey("access_token")); + assertTrue(params.get("access_token").toString().length() > 0); + } + +} diff --git a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/SquareOAuthFlowTest.java b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/SquareOAuthFlowTest.java new file mode 100644 index 0000000000000..5c54891086a59 --- /dev/null +++ b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/SquareOAuthFlowTest.java @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import io.airbyte.oauth.BaseOAuthFlow; + +public class SquareOAuthFlowTest extends BaseOAuthFlowTest { + + @Override + protected BaseOAuthFlow getOAuthFlow() { + return new SquareOAuthFlow(getConfigRepository(), getHttpClient(), this::getConstantState); + } + + @Override + protected String getExpectedConsentUrl() { + return "https://connect.squareup.com/oauth2/authorize?client_id=test_client_id" + + "&scope=CUSTOMERS_READ+EMPLOYEES_READ+ITEMS_READ+MERCHANT_PROFILE_READ+ORDERS_READ+PAYMENTS_READ+TIMECARDS_READ" + + "&session=False&state=state"; + } + +} diff --git a/docs/integrations/sources/square.md b/docs/integrations/sources/square.md index 559b64b60ea9c..ee964435d3e52 100644 --- a/docs/integrations/sources/square.md +++ b/docs/integrations/sources/square.md @@ -75,10 +75,11 @@ Some Square API endpoints has different page size limitation ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.3 | 2021-12-06 | [8425](https://github.com/airbytehq/airbyte/pull/8425) | Update title, description fields in spec | -| 0.1.2 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | -| 0.1.1 | 2021-07-09 | [4645](https://github.com/airbytehq/airbyte/pull/4645) | Update \_send\_request method due to Airbyte CDK changes | -| 0.1.0 | 2021-06-30 | [4439](https://github.com/airbytehq/airbyte/pull/4439) | Initial release supporting the Square API | +| Version | Date | Pull Request | Subject | +|:--------|:-----------| :--- |:---------------------------------------------------------| +| 0.1.4 | 2021-12-02 | [6842](https://github.com/airbytehq/airbyte/pull/6842) | Added oauth support | +| 0.1.3 | 2021-12-06 | [8425](https://github.com/airbytehq/airbyte/pull/8425) | Update title, description fields in spec | +| 0.1.2 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | +| 0.1.1 | 2021-07-09 | [4645](https://github.com/airbytehq/airbyte/pull/4645) | Update \_send\_request method due to Airbyte CDK changes | +| 0.1.0 | 2021-06-30 | [4439](https://github.com/airbytehq/airbyte/pull/4439) | Initial release supporting the Square API | From c4b365ccc8fa54238046cbb3f735070a6a402b6e Mon Sep 17 00:00:00 2001 From: Subodh Kant Chaturvedi Date: Tue, 18 Jan 2022 17:38:38 +0530 Subject: [PATCH 141/215] fix master formatting (#9571) --- .../source-square/source_square/spec.json | 1 - .../airbyte/oauth/flows/SquareOAuthFlow.java | 42 +++++++++---------- .../oauth/flows/SquareOAuthFlowTest.java | 4 +- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/airbyte-integrations/connectors/source-square/source_square/spec.json b/airbyte-integrations/connectors/source-square/source_square/spec.json index 23c77b753f8fe..f8719f27c0622 100644 --- a/airbyte-integrations/connectors/source-square/source_square/spec.json +++ b/airbyte-integrations/connectors/source-square/source_square/spec.json @@ -145,5 +145,4 @@ } } } - } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SquareOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SquareOAuthFlow.java index 1e9d821a67a1b..674efdd4d0f45 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SquareOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SquareOAuthFlow.java @@ -25,25 +25,25 @@ public class SquareOAuthFlow extends BaseOAuth2Flow { private static final List SCOPES = Arrays.asList( - "CUSTOMERS_READ", - "EMPLOYEES_READ", - "ITEMS_READ", - "MERCHANT_PROFILE_READ", - "ORDERS_READ", - "PAYMENTS_READ", - "TIMECARDS_READ" - // OAuth Permissions: - // https://developer.squareup.com/docs/oauth-api/square-permissions - // https://developer.squareup.com/reference/square/enums/OAuthPermission - // "DISPUTES_READ", - // "GIFTCARDS_READ", - // "INVENTORY_READ", - // "INVOICES_READ", - // "TIMECARDS_SETTINGS_READ", - // "LOYALTY_READ", - // "ONLINE_STORE_SITE_READ", - // "ONLINE_STORE_SNIPPETS_READ", - // "SUBSCRIPTIONS_READ", + "CUSTOMERS_READ", + "EMPLOYEES_READ", + "ITEMS_READ", + "MERCHANT_PROFILE_READ", + "ORDERS_READ", + "PAYMENTS_READ", + "TIMECARDS_READ" + // OAuth Permissions: + // https://developer.squareup.com/docs/oauth-api/square-permissions + // https://developer.squareup.com/reference/square/enums/OAuthPermission + // "DISPUTES_READ", + // "GIFTCARDS_READ", + // "INVENTORY_READ", + // "INVOICES_READ", + // "TIMECARDS_SETTINGS_READ", + // "LOYALTY_READ", + // "ONLINE_STORE_SITE_READ", + // "ONLINE_STORE_SNIPPETS_READ", + // "SUBSCRIPTIONS_READ", ); private static final String AUTHORIZE_URL = "https://connect.squareup.com/oauth2/authorize"; private static final String ACCESS_TOKEN_URL = "https://connect.squareup.com/oauth2/token"; @@ -89,8 +89,8 @@ protected Map getAccessTokenQueryParameters(String clientId, String authCode, String redirectUrl) { String scopes = SCOPES.stream() - .map(name -> ('"' + name + '"')) - .collect(Collectors.joining(",")); + .map(name -> ('"' + name + '"')) + .collect(Collectors.joining(",")); scopes = '[' + scopes + ']'; return ImmutableMap.builder() diff --git a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/SquareOAuthFlowTest.java b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/SquareOAuthFlowTest.java index 5c54891086a59..54f981a303719 100644 --- a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/SquareOAuthFlowTest.java +++ b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/SquareOAuthFlowTest.java @@ -16,8 +16,8 @@ protected BaseOAuthFlow getOAuthFlow() { @Override protected String getExpectedConsentUrl() { return "https://connect.squareup.com/oauth2/authorize?client_id=test_client_id" + - "&scope=CUSTOMERS_READ+EMPLOYEES_READ+ITEMS_READ+MERCHANT_PROFILE_READ+ORDERS_READ+PAYMENTS_READ+TIMECARDS_READ" + - "&session=False&state=state"; + "&scope=CUSTOMERS_READ+EMPLOYEES_READ+ITEMS_READ+MERCHANT_PROFILE_READ+ORDERS_READ+PAYMENTS_READ+TIMECARDS_READ" + + "&session=False&state=state"; } } From 3f9cbecbaf08ca66fe865d2d954ff9ae2d2feab5 Mon Sep 17 00:00:00 2001 From: Koji Matsumoto Date: Tue, 18 Jan 2022 21:41:12 +0900 Subject: [PATCH 142/215] Destination BigQuery: Accept Dataset ID field prefixed by Project ID (#8383) * add Dataset ID parse method * add BigQuery Destination unit test * update change log * fit to the latest code base * update change log * change var name to const name * change public method to private * add test cases for testGetDatasetIdFail * add integration test for dataset-id prefixed with project-id * fix getDatasetId * add comment to parameterized test provider * update docker image versions * update docker image versions again --- .../079d5540-f236-4294-ba7c-ade8fd918496.json | 2 +- .../22f6c74f-5699-40ff-833c-4a879ea40133.json | 2 +- .../seed/destination_definitions.yaml | 4 +- .../resources/seed/destination_specs.yaml | 4 +- .../Dockerfile | 2 +- .../destination-bigquery/Dockerfile | 2 +- .../bigquery/BigQueryDestination.java | 2 +- .../destination/bigquery/BigQueryUtils.java | 22 +++++- .../bigquery/BigQueryDestinationTest.java | 61 +++++++++++++--- .../bigquery/BigQueryUtilsTest.java | 69 +++++++++++++++++++ docs/integrations/destinations/bigquery.md | 2 + 11 files changed, 152 insertions(+), 20 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json index aee3fcbfd47a5..d433add5e8071 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "079d5540-f236-4294-ba7c-ade8fd918496", "name": "BigQuery (denormalized typed struct)", "dockerRepository": "airbyte/destination-bigquery-denormalized", - "dockerImageTag": "0.2.3", + "dockerImageTag": "0.2.4", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery", "icon": "bigquery.svg" } diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json index e8bfbb235d976..d076c305f7c2f 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "22f6c74f-5699-40ff-833c-4a879ea40133", "name": "BigQuery", "dockerRepository": "airbyte/destination-bigquery", - "dockerImageTag": "0.6.3", + "dockerImageTag": "0.6.4", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery", "icon": "bigquery.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 1e561ba8289d8..d1584307abe4b 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -13,13 +13,13 @@ - name: BigQuery destinationDefinitionId: 22f6c74f-5699-40ff-833c-4a879ea40133 dockerRepository: airbyte/destination-bigquery - dockerImageTag: 0.6.3 + dockerImageTag: 0.6.4 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery icon: bigquery.svg - name: BigQuery (denormalized typed struct) destinationDefinitionId: 079d5540-f236-4294-ba7c-ade8fd918496 dockerRepository: airbyte/destination-bigquery-denormalized - dockerImageTag: 0.2.3 + dockerImageTag: 0.2.4 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery icon: bigquery.svg - name: Cassandra diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 9276dac6c07b6..235c7a9ebb711 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -188,7 +188,7 @@ supportsDBT: false supported_destination_sync_modes: - "append" -- dockerImage: "airbyte/destination-bigquery:0.6.3" +- dockerImage: "airbyte/destination-bigquery:0.6.4" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" connectionSpecification: @@ -378,7 +378,7 @@ - "overwrite" - "append" - "append_dedup" -- dockerImage: "airbyte/destination-bigquery-denormalized:0.2.3" +- dockerImage: "airbyte/destination-bigquery-denormalized:0.2.4" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" connectionSpecification: diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile index 45801e0f01138..11d11d4387ad0 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-bigquery-denormalized COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.2.3 +LABEL io.airbyte.version=0.2.4 LABEL io.airbyte.name=airbyte/destination-bigquery-denormalized diff --git a/airbyte-integrations/connectors/destination-bigquery/Dockerfile b/airbyte-integrations/connectors/destination-bigquery/Dockerfile index 605329e28e0bc..ef038a9654030 100644 --- a/airbyte-integrations/connectors/destination-bigquery/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-bigquery COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.6.3 +LABEL io.airbyte.version=0.6.4 LABEL io.airbyte.name=airbyte/destination-bigquery diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java index 4f4c3502afd65..e243a59ef9b73 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java @@ -56,7 +56,7 @@ public BigQueryDestination() { @Override public AirbyteConnectionStatus check(final JsonNode config) { try { - final String datasetId = config.get(BigQueryConsts.CONFIG_DATASET_ID).asText(); + final String datasetId = BigQueryUtils.getDatasetId(config); final String datasetLocation = BigQueryUtils.getDatasetLocation(config); final BigQuery bigquery = getBigQuery(config); final UploadingMethod uploadingMethod = BigQueryUtils.getLoadingMethod(config); diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java index fd19e8861e197..545e8aff58969 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java @@ -5,6 +5,7 @@ package io.airbyte.integrations.destination.bigquery; import static io.airbyte.integrations.destination.bigquery.helpers.LoggerHelper.getJobErrorMessage; +import static java.util.Objects.isNull; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -162,6 +163,25 @@ public static JsonNode getGcsAvroJsonNodeConfig(final JsonNode config) { return gcsJsonNode; } + public static String getDatasetId(final JsonNode config) { + String datasetId = config.get(BigQueryConsts.CONFIG_DATASET_ID).asText(); + + int colonIndex = datasetId.indexOf(":"); + if (colonIndex != -1) { + String projectIdPart = datasetId.substring(0, colonIndex); + String projectId = config.get(BigQueryConsts.CONFIG_PROJECT_ID).asText(); + if (!(projectId.equals(projectIdPart))) { + throw new IllegalArgumentException(String.format( + "Project ID included in Dataset ID must match Project ID field's value: Project ID is `%s`, but you specified `%s` in Dataset ID", + projectId, + projectIdPart)); + } + } + // if colonIndex is -1, then this returns the entire string + // otherwise it returns everything after the colon + return datasetId.substring(colonIndex + 1); + } + public static String getDatasetLocation(final JsonNode config) { if (config.has(BigQueryConsts.CONFIG_DATASET_LOCATION)) { return config.get(BigQueryConsts.CONFIG_DATASET_LOCATION).asText(); @@ -214,7 +234,7 @@ public static void transformJsonDateTimeToBigDataFormat(List dateTimeFie } public static String getSchema(final JsonNode config, final ConfiguredAirbyteStream stream) { - final String defaultSchema = config.get(BigQueryConsts.CONFIG_DATASET_ID).asText(); + final String defaultSchema = getDatasetId(config); final String srcNamespace = stream.getStream().getNamespace(); if (srcNamespace == null) { return defaultSchema; diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java index d364c479715bc..70c7e9dd1627e 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java @@ -55,13 +55,18 @@ import java.time.Instant; import java.util.List; import java.util.Set; +import java.util.function.Consumer; import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.stream.StreamSupport; import org.apache.commons.lang3.tuple.ImmutablePair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInfo; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -197,16 +202,20 @@ void testSpec() throws Exception { assertEquals(expected, actual); } - @Test - void testCheckSuccess() { + @ParameterizedTest + @MethodSource("datasetIdResetterProvider") + void testCheckSuccess(DatasetIdResetter resetDatasetId) { + resetDatasetId.accept(config); final AirbyteConnectionStatus actual = new BigQueryDestination().check(config); final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); assertEquals(expected, actual); } - @Test - void testCheckFailure() { + @ParameterizedTest + @MethodSource("datasetIdResetterProvider") + void testCheckFailure(DatasetIdResetter resetDatasetId) { ((ObjectNode) config).put(BigQueryConsts.CONFIG_PROJECT_ID, "fake"); + resetDatasetId.accept(config); final AirbyteConnectionStatus actual = new BigQueryDestination().check(config); final String actualMessage = actual.getMessage(); LOGGER.info("Checking expected failure message:" + actualMessage); @@ -215,8 +224,10 @@ void testCheckFailure() { assertEquals(expected, actual.withMessage("")); } - @Test - void testWriteSuccess() throws Exception { + @ParameterizedTest + @MethodSource("datasetIdResetterProvider") + void testWriteSuccess(DatasetIdResetter resetDatasetId) throws Exception { + resetDatasetId.accept(config); final BigQueryDestination destination = new BigQueryDestination(); final AirbyteMessageConsumer consumer = destination.getConsumer(config, catalog, Destination::defaultOutputRecordCollector); @@ -244,8 +255,10 @@ void testWriteSuccess() throws Exception { .collect(Collectors.toList())); } - @Test - void testWriteFailure() throws Exception { + @ParameterizedTest + @MethodSource("datasetIdResetterProvider") + void testWriteFailure(DatasetIdResetter resetDatasetId) throws Exception { + resetDatasetId.accept(config); // hack to force an exception to be thrown from within the consumer. final AirbyteMessage spiedMessage = spy(MESSAGE_USERS1); doThrow(new RuntimeException()).when(spiedMessage).getRecord(); @@ -305,8 +318,10 @@ private List retrieveRecords(final String tableName) throws Exception .collect(Collectors.toList()); } - @Test - void testWritePartitionOverUnpartitioned() throws Exception { + @ParameterizedTest + @MethodSource("datasetIdResetterProvider") + void testWritePartitionOverUnpartitioned(DatasetIdResetter resetDatasetId) throws Exception { + resetDatasetId.accept(config); final String raw_table_name = String.format("_airbyte_raw_%s", USERS_STREAM_NAME); createUnpartitionedTable(bigquery, dataset, raw_table_name); assertFalse(isTablePartitioned(bigquery, dataset, raw_table_name)); @@ -369,4 +384,30 @@ private boolean isTablePartitioned(final BigQuery bigquery, final Dataset datase return false; } + private static class DatasetIdResetter { + private Consumer consumer; + + DatasetIdResetter(Consumer consumer) { + this.consumer = consumer; + } + + public void accept(JsonNode config) { + consumer.accept(config); + } + } + + private static Stream datasetIdResetterProvider() { + // parameterized test with two dataset-id patterns: `dataset_id` and `project-id:dataset_id` + return Stream.of( + Arguments.arguments(new DatasetIdResetter(config -> {})), + Arguments.arguments(new DatasetIdResetter( + config -> { + String projectId = ((ObjectNode) config).get(BigQueryConsts.CONFIG_PROJECT_ID).asText(); + String datasetId = ((ObjectNode) config).get(BigQueryConsts.CONFIG_DATASET_ID).asText(); + ((ObjectNode) config).put(BigQueryConsts.CONFIG_DATASET_ID, + String.format("%s:%s", projectId, datasetId)); + } + )) + ); + } } diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java new file mode 100644 index 0000000000000..586e0cf7ce747 --- /dev/null +++ b/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.bigquery; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import java.util.stream.Stream; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class BigQueryUtilsTest { + + private ImmutableMap.Builder configMapBuilder; + + @BeforeEach + public void init() { + configMapBuilder = ImmutableMap.builder() + .put(BigQueryConsts.CONFIG_CREDS, "test_secret") + .put(BigQueryConsts.CONFIG_DATASET_LOCATION, "US"); + } + + @ParameterizedTest + @MethodSource("validBigQueryIdProvider") + public void testGetDatasetIdSuccess(String projectId, String datasetId, String expected) throws Exception { + JsonNode config = Jsons.jsonNode(configMapBuilder + .put(BigQueryConsts.CONFIG_PROJECT_ID, projectId) + .put(BigQueryConsts.CONFIG_DATASET_ID, datasetId) + .build()); + + String actual = BigQueryUtils.getDatasetId(config); + + assertEquals(expected, actual); + } + + @ParameterizedTest + @MethodSource("invalidBigQueryIdProvider") + public void testGetDatasetIdFail(String projectId, String datasetId, String expected) throws Exception { + JsonNode config = Jsons.jsonNode(configMapBuilder + .put(BigQueryConsts.CONFIG_PROJECT_ID, projectId) + .put(BigQueryConsts.CONFIG_DATASET_ID, datasetId) + .build()); + + Exception exception = assertThrows(IllegalArgumentException.class, () -> BigQueryUtils.getDatasetId(config)); + + assertEquals(expected, exception.getMessage()); + } + + private static Stream validBigQueryIdProvider() { + return Stream.of( + Arguments.arguments("my-project", "my_dataset", "my_dataset"), + Arguments.arguments("my-project", "my-project:my_dataset", "my_dataset")); + } + + private static Stream invalidBigQueryIdProvider() { + return Stream.of( + Arguments.arguments("my-project", ":my_dataset", + "Project ID included in Dataset ID must match Project ID field's value: Project ID is `my-project`, but you specified `` in Dataset ID"), + Arguments.arguments("my-project", "your-project:my_dataset", + "Project ID included in Dataset ID must match Project ID field's value: Project ID is `my-project`, but you specified `your-project` in Dataset ID")); + } +} diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index 3a72ac40c3365..fffbea6c7c4f4 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -153,6 +153,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | |:--------| :--- | :--- | :--- | +| 0.6.4 | 2022-01-17 | [\#8383](https://github.com/airbytehq/airbyte/issues/8383) | Support dataset-id prefixed by project-id | | 0.6.3 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | | 0.6.2 | 2022-01-10 | [\#9121](https://github.com/airbytehq/airbyte/pull/9121) | Fixed check method for GCS mode to verify if all roles assigned to user | | 0.6.1 | 2021-12-22 | [\#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration to UI for GCS staging | @@ -172,6 +173,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------| :--- | +| 0.2.4 | 2022-01-17 | [\#8383](https://github.com/airbytehq/airbyte/issues/8383) | BigQuery/BiqQuery denorm Destinations : Support dataset-id prefixed by project-id | | 0.2.3 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | | 0.2.2 | 2021-12-22 | [\#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration to UI for GCS staging | | 0.2.1 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | From 62c433eeb1f02cfd26e930b41d466180afc5952a Mon Sep 17 00:00:00 2001 From: augan-rymkhan <93112548+augan-rymkhan@users.noreply.github.com> Date: Tue, 18 Jan 2022 20:42:21 +0600 Subject: [PATCH 143/215] Source Salesforce: fix pagination in REST API streams (#9151) * fix next_page_token * fix BULK API * fix BUlk incremental stream * added unit test and comments * format code * bump version * updated spec and def yaml Co-authored-by: auganbay --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-salesforce/Dockerfile | 2 +- .../source_salesforce/streams.py | 83 ++++++++++++++++--- .../source-salesforce/unit_tests/unit_test.py | 42 ++++++++++ docs/integrations/sources/salesforce.md | 1 + 6 files changed, 116 insertions(+), 16 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 372fbe1101018..6ce13683cf803 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -613,7 +613,7 @@ - name: Salesforce sourceDefinitionId: b117307c-14b6-41aa-9422-947e34922962 dockerRepository: airbyte/source-salesforce - dockerImageTag: 0.1.15 + dockerImageTag: 0.1.16 documentationUrl: https://docs.airbyte.io/integrations/sources/salesforce icon: salesforce.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 1a6fc9b152605..ecf1dee8fdd65 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6425,7 +6425,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-salesforce:0.1.15" +- dockerImage: "airbyte/source-salesforce:0.1.16" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-salesforce/Dockerfile b/airbyte-integrations/connectors/source-salesforce/Dockerfile index 6c59b3d8b1d38..812b159689030 100644 --- a/airbyte-integrations/connectors/source-salesforce/Dockerfile +++ b/airbyte-integrations/connectors/source-salesforce/Dockerfile @@ -25,5 +25,5 @@ COPY source_salesforce ./source_salesforce ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.15 +LABEL io.airbyte.version=0.1.16 LABEL io.airbyte.name=airbyte/source-salesforce diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py index 4ea39cde10149..6e2867b27f1d4 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py @@ -44,13 +44,17 @@ def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: def url_base(self) -> str: return self.sf_api.instance_url - def path(self, **kwargs) -> str: + def path(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> str: + if next_page_token: + """ + If `next_page_token` is set, subsequent requests use `nextRecordsUrl`. + """ + return next_page_token return f"/services/data/{self.sf_api.version}/queryAll" def next_page_token(self, response: requests.Response) -> str: response_data = response.json() - if len(response_data["records"]) == self.page_size and self.primary_key and self.name not in UNSUPPORTED_FILTERING_STREAMS: - return f"WHERE {self.primary_key} >= '{response_data['records'][-1][self.primary_key]}' " + return response_data.get("nextRecordsUrl") def request_params( self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None @@ -58,6 +62,11 @@ def request_params( """ Salesforce SOQL Query: https://developer.salesforce.com/docs/atlas.en-us.232.0.api_rest.meta/api_rest/dome_queryall.htm """ + if next_page_token: + """ + If `next_page_token` is set, subsequent requests use `nextRecordsUrl`, and do not include any parameters. + """ + return {} selected_properties = self.get_json_schema().get("properties", {}) @@ -70,11 +79,9 @@ def request_params( } query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " - if next_page_token: - query += next_page_token if self.primary_key and self.name not in UNSUPPORTED_FILTERING_STREAMS: - query += f"ORDER BY {self.primary_key} ASC LIMIT {self.page_size}" + query += f"ORDER BY {self.primary_key} ASC" return {"q": query} @@ -259,6 +266,32 @@ def next_page_token(self, last_record: dict) -> str: if self.primary_key and self.name not in UNSUPPORTED_FILTERING_STREAMS: return f"WHERE {self.primary_key} >= '{last_record[self.primary_key]}' " + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + """ + Salesforce SOQL Query: https://developer.salesforce.com/docs/atlas.en-us.232.0.api_rest.meta/api_rest/dome_queryall.htm + """ + + selected_properties = self.get_json_schema().get("properties", {}) + + # Salesforce BULK API currently does not support loading fields with data type base64 and compound data + if self.sf_api.api_type == "BULK": + selected_properties = { + key: value + for key, value in selected_properties.items() + if value.get("format") != "base64" and "object" not in value["type"] + } + + query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " + if next_page_token: + query += next_page_token + + if self.primary_key and self.name not in UNSUPPORTED_FILTERING_STREAMS: + query += f"ORDER BY {self.primary_key} ASC LIMIT {self.page_size}" + + return {"q": query} + def read_records( self, sync_mode: SyncMode, @@ -305,14 +338,15 @@ def format_start_date(start_date: Optional[str]) -> Optional[str]: if start_date: return pendulum.parse(start_date).strftime("%Y-%m-%dT%H:%M:%SZ") - def next_page_token(self, response: requests.Response) -> str: - response_data = response.json() - if len(response_data["records"]) == self.page_size and self.name not in UNSUPPORTED_FILTERING_STREAMS: - return response_data["records"][-1][self.cursor_field] - def request_params( self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None ) -> MutableMapping[str, Any]: + if next_page_token: + """ + If `next_page_token` is set, subsequent requests use `nextRecordsUrl`, and do not include any parameters. + """ + return {} + selected_properties = self.get_json_schema().get("properties", {}) # Salesforce BULK API currently does not support loading fields with data type base64 and compound data @@ -324,13 +358,13 @@ def request_params( } stream_date = stream_state.get(self.cursor_field) - start_date = next_page_token or stream_date or self.start_date + start_date = stream_date or self.start_date query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " if start_date: query += f"WHERE {self.cursor_field} >= {start_date} " if self.name not in UNSUPPORTED_FILTERING_STREAMS: - query += f"ORDER BY {self.cursor_field} ASC LIMIT {self.page_size}" + query += f"ORDER BY {self.cursor_field} ASC" return {"q": query} @property @@ -352,3 +386,26 @@ class BulkIncrementalSalesforceStream(BulkSalesforceStream, IncrementalSalesforc def next_page_token(self, last_record: dict) -> str: if self.name not in UNSUPPORTED_FILTERING_STREAMS: return last_record[self.cursor_field] + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + selected_properties = self.get_json_schema().get("properties", {}) + + # Salesforce BULK API currently does not support loading fields with data type base64 and compound data + if self.sf_api.api_type == "BULK": + selected_properties = { + key: value + for key, value in selected_properties.items() + if value.get("format") != "base64" and "object" not in value["type"] + } + + stream_date = stream_state.get(self.cursor_field) + start_date = next_page_token or stream_date or self.start_date + + query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " + if start_date: + query += f"WHERE {self.cursor_field} >= {start_date} " + if self.name not in UNSUPPORTED_FILTERING_STREAMS: + query += f"ORDER BY {self.cursor_field} ASC LIMIT {self.page_size}" + return {"q": query} diff --git a/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py index 40f417ebf516b..41f98f12772a2 100644 --- a/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py @@ -349,3 +349,45 @@ def test_discover_with_streams_criteria_param(streams_criteria, predicted_filter ) filtered_streams = sf_object.get_validated_streams(config=updated_config) assert sorted(filtered_streams) == sorted(predicted_filtered_streams) + + +def test_pagination_rest(stream_rest_config, stream_rest_api): + stream: SalesforceStream = _generate_stream("Account", stream_rest_config, stream_rest_api) + stream._wait_timeout = 0.1 # maximum wait timeout will be 6 seconds + next_page_url = "/services/data/v52.0/query/012345" + with requests_mock.Mocker() as m: + resp_1 = { + "done": False, + "totalSize": 4, + "nextRecordsUrl": next_page_url, + "records": [ + { + "ID": 1, + "LastModifiedDate": "2021-11-15", + }, + { + "ID": 2, + "LastModifiedDate": "2021-11-16", + }, + ], + } + resp_2 = { + "done": True, + "totalSize": 4, + "records": [ + { + "ID": 3, + "LastModifiedDate": "2021-11-17", + }, + { + "ID": 4, + "LastModifiedDate": "2021-11-18", + }, + ], + } + + m.register_uri("GET", stream.path(), json=resp_1) + m.register_uri("GET", next_page_url, json=resp_2) + + records = [record for record in stream.read_records(sync_mode=SyncMode.full_refresh)] + assert len(records) == 4 diff --git a/docs/integrations/sources/salesforce.md b/docs/integrations/sources/salesforce.md index 26c44a8ea0f6d..278eca3b9b5ae 100644 --- a/docs/integrations/sources/salesforce.md +++ b/docs/integrations/sources/salesforce.md @@ -737,6 +737,7 @@ List of available streams: | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:--------------------------------------------------------------------------| +| 0.1.16 | 2022-01-18 | [9151](https://github.com/airbytehq/airbyte/pull/9151) | Fix pagination in REST API streams | | 0.1.15 | 2022-01-11 | [9409](https://github.com/airbytehq/airbyte/pull/9409) | Correcting the presence of an extra `else` handler in the error handling | | 0.1.14 | 2022-01-11 | [9386](https://github.com/airbytehq/airbyte/pull/9386) | Handling 400 error, while `sobject` doesn't support `query` or `queryAll` requests | | 0.1.13 | 2022-01-11 | [8797](https://github.com/airbytehq/airbyte/pull/8797) | Switched from authSpecification to advanced_auth in specefication | From 081510017a4df8ffc951b08ba00297de9d9f54d9 Mon Sep 17 00:00:00 2001 From: Titas Skrebe Date: Tue, 18 Jan 2022 20:45:59 +0200 Subject: [PATCH 144/215] =?UTF-8?q?=F0=9F=8E=89=20New=20Source:=20Chartmog?= =?UTF-8?q?ul=20(#9381)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * New source: Chartmogul * update changelog * bootstrap.md * reorder schema * make acceptance test pass * add chartmogul to builds.md * Update airbyte-integrations/connectors/source-chartmogul/source_chartmogul/source.py Co-authored-by: Augustin * Update airbyte-integrations/connectors/source-chartmogul/source_chartmogul/source.py Co-authored-by: Augustin * address comments * fix unused imports * update source_specs Co-authored-by: alafanechere Co-authored-by: Augustin --- .../src/main/resources/icons/chartmogul.svg | 13 ++ .../resources/seed/source_definitions.yaml | 7 + .../src/main/resources/seed/source_specs.yaml | 27 ++++ airbyte-integrations/builds.md | 1 + .../connectors/source-chartmogul/Dockerfile | 38 +++++ .../connectors/source-chartmogul/README.md | 132 ++++++++++++++++++ .../acceptance-test-config.yml | 20 +++ .../acceptance-test-docker.sh | 16 +++ .../connectors/source-chartmogul/bootstrap.md | 21 +++ .../connectors/source-chartmogul/build.gradle | 9 ++ .../integration_tests/__init__.py | 3 + .../integration_tests/acceptance.py | 14 ++ .../integration_tests/configured_catalog.json | 22 +++ .../integration_tests/invalid_config.json | 3 + .../integration_tests/sample_config.json | 4 + .../connectors/source-chartmogul/main.py | 13 ++ .../source-chartmogul/requirements.txt | 2 + .../sample_files/configured_catalog.json | 22 +++ .../connectors/source-chartmogul/setup.py | 30 ++++ .../source_chartmogul/__init__.py | 8 ++ .../source_chartmogul/schemas/activities.json | 48 +++++++ .../source_chartmogul/schemas/customers.json | 128 +++++++++++++++++ .../source_chartmogul/source.py | 94 +++++++++++++ .../source_chartmogul/spec.json | 25 ++++ .../source-chartmogul/unit_tests/__init__.py | 3 + .../unit_tests/test_source.py | 30 ++++ .../unit_tests/test_streams.py | 84 +++++++++++ docs/SUMMARY.md | 1 + docs/integrations/README.md | 1 + docs/integrations/sources/chartmogul.md | 47 +++++++ 30 files changed, 866 insertions(+) create mode 100644 airbyte-config/init/src/main/resources/icons/chartmogul.svg create mode 100644 airbyte-integrations/connectors/source-chartmogul/Dockerfile create mode 100644 airbyte-integrations/connectors/source-chartmogul/README.md create mode 100644 airbyte-integrations/connectors/source-chartmogul/acceptance-test-config.yml create mode 100755 airbyte-integrations/connectors/source-chartmogul/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-chartmogul/bootstrap.md create mode 100644 airbyte-integrations/connectors/source-chartmogul/build.gradle create mode 100644 airbyte-integrations/connectors/source-chartmogul/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-chartmogul/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-chartmogul/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-chartmogul/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-chartmogul/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-chartmogul/main.py create mode 100644 airbyte-integrations/connectors/source-chartmogul/requirements.txt create mode 100644 airbyte-integrations/connectors/source-chartmogul/sample_files/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-chartmogul/setup.py create mode 100644 airbyte-integrations/connectors/source-chartmogul/source_chartmogul/__init__.py create mode 100644 airbyte-integrations/connectors/source-chartmogul/source_chartmogul/schemas/activities.json create mode 100644 airbyte-integrations/connectors/source-chartmogul/source_chartmogul/schemas/customers.json create mode 100644 airbyte-integrations/connectors/source-chartmogul/source_chartmogul/source.py create mode 100644 airbyte-integrations/connectors/source-chartmogul/source_chartmogul/spec.json create mode 100644 airbyte-integrations/connectors/source-chartmogul/unit_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-chartmogul/unit_tests/test_source.py create mode 100644 airbyte-integrations/connectors/source-chartmogul/unit_tests/test_streams.py create mode 100644 docs/integrations/sources/chartmogul.md diff --git a/airbyte-config/init/src/main/resources/icons/chartmogul.svg b/airbyte-config/init/src/main/resources/icons/chartmogul.svg new file mode 100644 index 0000000000000..4957ba4e9b413 --- /dev/null +++ b/airbyte-config/init/src/main/resources/icons/chartmogul.svg @@ -0,0 +1,13 @@ + + + + + + diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 6ce13683cf803..2d86e3f7b999a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -116,6 +116,13 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/chargebee icon: chargebee.svg sourceType: api +- name: Chartmogul + sourceDefinitionId: b6604cbd-1b12-4c08-8767-e140d0fb0877 + dockerRepository: airbyte/source-chartmogul + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/chartmogul + icon: chartmogul.svg + sourceType: api - name: ClickHouse sourceDefinitionId: bad83517-5e54-4a3d-9b53-63e85fbd4d7c dockerRepository: airbyte/source-clickhouse diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index ecf1dee8fdd65..005c5af601381 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1000,6 +1000,33 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-chartmogul:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/chartmogul" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Chartmogul Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "Chartmogul API key" + airbyte_secret: true + order: 0 + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. When\ + \ feasible, any data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + order: 1 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] - dockerImage: "airbyte/source-clickhouse:0.1.7" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/clickhouse" diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index 001ae80fd3248..c1adc5af18e50 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -20,6 +20,7 @@ | BigQuery | [![source-bigquery](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-bigquery%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-bigquery/) | | Bing Ads | [![source-bing-ads](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-bing-ads%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-bing-ads) | | Chargebee | [![source-chargebee](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-chargebee%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-chargebee/) | +| Chartmogul | [![source-chartmogul](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-chartmogul%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-chartmogul/) | | Cart.com | [![source-cart](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-cart%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-cart/) | | Close.com | [![source-close-com](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-close-com%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-close-com/) | | Dixa | [![source-dixa](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-dixa%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-dixa) | diff --git a/airbyte-integrations/connectors/source-chartmogul/Dockerfile b/airbyte-integrations/connectors/source-chartmogul/Dockerfile new file mode 100644 index 0000000000000..35cef5c730227 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_chartmogul ./source_chartmogul + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-chartmogul diff --git a/airbyte-integrations/connectors/source-chartmogul/README.md b/airbyte-integrations/connectors/source-chartmogul/README.md new file mode 100644 index 0000000000000..2ba338da1728d --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/README.md @@ -0,0 +1,132 @@ +# Chartmogul Source + +This is the repository for the Chartmogul source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/chartmogul). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python3 -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-chartmogul:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/chartmogul) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_chartmogul/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source chartmogul test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-chartmogul:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-chartmogul:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-chartmogul:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-chartmogul:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-chartmogul:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-chartmogul:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-chartmogul:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-chartmogul:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-chartmogul/acceptance-test-config.yml b/airbyte-integrations/connectors/source-chartmogul/acceptance-test-config.yml new file mode 100644 index 0000000000000..ba3029611366a --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/acceptance-test-config.yml @@ -0,0 +1,20 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-chartmogul:dev +tests: + spec: + - spec_path: "source_chartmogul/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: ["activities"] + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-chartmogul/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-chartmogul/acceptance-test-docker.sh new file mode 100755 index 0000000000000..c51577d10690c --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-chartmogul/bootstrap.md b/airbyte-integrations/connectors/source-chartmogul/bootstrap.md new file mode 100644 index 0000000000000..d6fee29243dad --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/bootstrap.md @@ -0,0 +1,21 @@ +# Chartmogul +Chartmogul is an online subscription analytics platform. It retrieves data from payment processors (e.g. Stripe) and makes sense out of it. + +## Streams + +Connector currently implements following full refresh streams: +* [Customers](https://dev.chartmogul.com/reference/list-customers) +* [Activities](https://dev.chartmogul.com/reference/list-activities) + +`start_date` config is used for retrieving `Activies`. `Customers` stream does not use this config. Even if it was possible to filter by `start_date`, it would cause issues when modeling data. That is because activies after `start_date` can be triggered by customers who were created way before that. + +### Incremental streams +Incremental streams were not implemented due to following reasons: +* `Customers` API endpoint does not provide filtering by creation/update date. +* `Activities` API does provide pagination based on last entries UUID, however it is not stable, since it is possible to for activity to disappear retrospectively. + +### Next steps +It is theoretically possible to make `Activities` stream incremental. One would need to keep track of both UUID and created_at and read stream until `datetime.now()`. Dynamic end date would be necessary since activities can also have a future date. Since data can be changed retrospectively, a `lookback window` would also be necessary to catch all the changes. + +### Rate limits +The API rate limit is at 40 requests/second. Read [Rate Limits](https://dev.chartmogul.com/docs/rate-limits) for more informations. \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-chartmogul/build.gradle b/airbyte-integrations/connectors/source-chartmogul/build.gradle new file mode 100644 index 0000000000000..0f9c12cdb8203 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_chartmogul' +} diff --git a/airbyte-integrations/connectors/source-chartmogul/integration_tests/__init__.py b/airbyte-integrations/connectors/source-chartmogul/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-chartmogul/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-chartmogul/integration_tests/acceptance.py new file mode 100644 index 0000000000000..0347f2a0b143d --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-chartmogul/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-chartmogul/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..3b788b27041d1 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/integration_tests/configured_catalog.json @@ -0,0 +1,22 @@ +{ + "streams": [ + { + "stream": { + "name": "customers", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "activities", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-chartmogul/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-chartmogul/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..dc521ade7acf9 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/integration_tests/invalid_config.json @@ -0,0 +1,3 @@ +{ + "api_key": "" +} diff --git a/airbyte-integrations/connectors/source-chartmogul/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-chartmogul/integration_tests/sample_config.json new file mode 100644 index 0000000000000..c09fca36f4670 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/integration_tests/sample_config.json @@ -0,0 +1,4 @@ +{ + "api_key": "", + "start_date": "2022-01-05T12:09:00Z" +} diff --git a/airbyte-integrations/connectors/source-chartmogul/main.py b/airbyte-integrations/connectors/source-chartmogul/main.py new file mode 100644 index 0000000000000..89e0ffbb29778 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_chartmogul import SourceChartmogul + +if __name__ == "__main__": + source = SourceChartmogul() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-chartmogul/requirements.txt b/airbyte-integrations/connectors/source-chartmogul/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-chartmogul/sample_files/configured_catalog.json b/airbyte-integrations/connectors/source-chartmogul/sample_files/configured_catalog.json new file mode 100644 index 0000000000000..3b788b27041d1 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/sample_files/configured_catalog.json @@ -0,0 +1,22 @@ +{ + "streams": [ + { + "stream": { + "name": "customers", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "activities", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-chartmogul/setup.py b/airbyte-integrations/connectors/source-chartmogul/setup.py new file mode 100644 index 0000000000000..446c4426fb9bb --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/setup.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.1", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", + "requests-mock", +] + +setup( + name="source_chartmogul", + description="Source implementation for Chartmogul.", + author="Titas Skrebe", + author_email="titas@omnisend.com", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/__init__.py b/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/__init__.py new file mode 100644 index 0000000000000..9d8418296529d --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceChartmogul + +__all__ = ["SourceChartmogul"] diff --git a/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/schemas/activities.json b/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/schemas/activities.json new file mode 100644 index 0000000000000..6cf82203be202 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/schemas/activities.json @@ -0,0 +1,48 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "description": { + "type": ["null", "string"] + }, + "activity-mrr-movement": { + "type": ["null", "integer"] + }, + "activity-mrr": { + "type": ["null", "integer"] + }, + "activity-arr": { + "type": ["null", "integer"] + }, + "date": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "currency": { + "type": ["null", "string"] + }, + "subscription-external-id": { + "type": ["null", "string"] + }, + "plan-external-id": { + "type": ["null", "string"] + }, + "customer-name": { + "type": ["null", "string"] + }, + "customer-uuid": { + "type": ["null", "string"] + }, + "customer-external-id": { + "type": ["null", "string"] + }, + "billing-connector-uuid": { + "type": ["null", "string"] + }, + "uuid": { + "type": ["string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/schemas/customers.json b/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/schemas/customers.json new file mode 100644 index 0000000000000..eb499b01a0d83 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/schemas/customers.json @@ -0,0 +1,128 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["integer"] + }, + "uuid": { + "type": ["string"] + }, + "external_id": { + "type": ["null", "string"] + }, + "external_ids": { + "type": ["null", "array"] + }, + "data_source_uuid": { + "type": ["null", "string"] + }, + "data_source_uuids": { + "type": ["null", "array"] + }, + "name": { + "type": ["null", "string"] + }, + "company": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, + "lead_created_at": { + "type": ["null", "string"] + }, + "free_trial_started_at": { + "type": ["null", "string"] + }, + "customer_since": { + "type": ["null", "string"] + }, + "city": { + "type": ["null", "string"] + }, + "state": { + "type": ["null", "string"] + }, + "country": { + "type": ["null", "string"] + }, + "zip": { + "type": ["null", "string"] + }, + "attributes": { + "type": ["null", "object"], + "properties": { + "tags": { + "type": ["null", "array"] + }, + "stripe": { + "type": ["null", "object"], + "properties": { + "brandID": { + "type": ["null", "string"] + }, + "brandName": { + "type": ["null", "string"] + }, + "createdAt": { + "type": ["null", "string"] + }, + "platformName": { + "type": ["null", "string"] + } + } + }, + "clearbit": { + "type": ["null", "object"], + "properties": {} + }, + "custom": { + "type": ["null", "object"], + "properties": {} + } + } + }, + "address": { + "type": ["null", "object"], + "properties": { + "country": { + "type": ["null", "string"] + }, + "state": { + "type": ["null", "string"] + }, + "city": { + "type": ["null", "string"] + }, + "address_zip": { + "type": ["null", "string"] + } + } + }, + "mrr": { + "type": ["null", "integer"] + }, + "arr": { + "type": ["null", "integer"] + }, + "billing-system-url": { + "type": ["null", "string"] + }, + "chartmogul-url": { + "type": ["null", "string"] + }, + "billing-system-type": { + "type": ["null", "string"] + }, + "currency": { + "type": ["null", "string"] + }, + "currency-sign": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/source.py b/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/source.py new file mode 100644 index 0000000000000..6a3a8fb675fe2 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/source.py @@ -0,0 +1,94 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from abc import ABC +from base64 import b64encode +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator + + +# Basic full refresh stream +class ChartmogulStream(HttpStream, ABC): + url_base = "https://api.chartmogul.com" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + yield from response.json().get("entries", []) + + +class Customers(ChartmogulStream): + primary_key = "id" + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + json_response = response.json() + if json_response.get("has_more", False): + return {"page": json_response.get("current_page") + 1} + + return None + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + return {"page": 1 if not next_page_token else next_page_token["page"]} + + def path(self, **kwargs) -> str: + return "v1/customers" + + +class Activities(ChartmogulStream): + primary_key = "uuid" + + def __init__(self, start_date: str, **kwargs): + super().__init__(**kwargs) + self.start_date = start_date + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + json_response = response.json() + if not json_response.get("has_more", False): + return None + + return {"start-after": json_response["entries"][-1][self.primary_key]} + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = {} + + if next_page_token: + params.update(next_page_token) + elif self.start_date: + params["start-date"] = self.start_date + + return params + + def path(self, **kwargs) -> str: + return "v1/activities" + + +class HttpBasicAuthenticator(TokenAuthenticator): + def __init__(self, token: str, auth_method: str = "Basic", **kwargs): + auth_string = f"{token}:".encode("utf8") + b64_encoded = b64encode(auth_string).decode("utf8") + super().__init__(token=b64_encoded, auth_method=auth_method, **kwargs) + + +# Source +class SourceChartmogul(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + auth = HttpBasicAuthenticator(config["api_key"], auth_method="Basic").get_auth_header() + url = f"{ChartmogulStream.url_base}/v1/ping" + try: + resp = requests.get(url, headers=auth) + resp.raise_for_status() + return True, None + except Exception as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = HttpBasicAuthenticator(config["api_key"], auth_method="Basic") + return [Customers(authenticator=auth), Activities(authenticator=auth, start_date=config.get("start_date"))] diff --git a/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/spec.json b/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/spec.json new file mode 100644 index 0000000000000..d6f16407e70ce --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/source_chartmogul/spec.json @@ -0,0 +1,25 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/sources/chartmogul", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Chartmogul Spec", + "type": "object", + "required": ["api_key"], + "additionalProperties": false, + "properties": { + "api_key": { + "type": "string", + "description": "Chartmogul API key", + "airbyte_secret": true, + "order": 0 + }, + "start_date": { + "type": "string", + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", + "description": "UTC date and time in the format 2017-01-25T00:00:00Z. When feasible, any data before this date will not be replicated.", + "examples": ["2017-01-25T00:00:00Z"], + "order": 1 + } + } + } +} diff --git a/airbyte-integrations/connectors/source-chartmogul/unit_tests/__init__.py b/airbyte-integrations/connectors/source-chartmogul/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-chartmogul/unit_tests/test_source.py b/airbyte-integrations/connectors/source-chartmogul/unit_tests/test_source.py new file mode 100644 index 0000000000000..32b7774924343 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/unit_tests/test_source.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +from requests.exceptions import HTTPError +from source_chartmogul.source import SourceChartmogul + + +def test_check_connection(mocker, requests_mock): + source = SourceChartmogul() + logger_mock, config_mock = MagicMock(), MagicMock() + + # success + requests_mock.get("https://api.chartmogul.com/v1/ping", json={"data": "pong!"}) + assert source.check_connection(logger_mock, config_mock) == (True, None) + + # failure + requests_mock.get("https://api.chartmogul.com/v1/ping", status_code=500) + ok, err = source.check_connection(logger_mock, config_mock) + assert (ok, type(err)) == (False, HTTPError) + + +def test_streams(mocker): + source = SourceChartmogul() + config_mock = MagicMock() + streams = source.streams(config_mock) + expected_streams_number = 2 + assert len(streams) == expected_streams_number diff --git a/airbyte-integrations/connectors/source-chartmogul/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-chartmogul/unit_tests/test_streams.py new file mode 100644 index 0000000000000..fc11b843aba87 --- /dev/null +++ b/airbyte-integrations/connectors/source-chartmogul/unit_tests/test_streams.py @@ -0,0 +1,84 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import pytest +from source_chartmogul.source import Activities, Customers + + +@pytest.fixture +def patch_base_class(mocker): + # Mock abstract methods to enable instantiating abstract class + mocker.patch.object(Customers, "__abstractmethods__", set()) + mocker.patch.object(Activities, "__abstractmethods__", set()) + + +class TestCustomers: + def test_request_params(self): + stream = Customers() + inputs = {"stream_slice": None, "stream_state": None, "next_page_token": None} + expected_params = {"page": 1} + assert stream.request_params(**inputs) == expected_params + + next_page_token = {"page": 3} + inputs = {"stream_slice": None, "stream_state": None, "next_page_token": next_page_token} + expected_params = {"page": 3} + assert stream.request_params(**inputs) == expected_params + + def test_next_page_token(self, mocker): + stream = Customers() + response = mocker.MagicMock() + + # no more results + response.json.return_value = {"has_more": False} + inputs = {"response": response} + assert stream.next_page_token(**inputs) is None + + # there is more results + response.json.return_value = {"has_more": True, "current_page": 42} + inputs = {"response": response} + assert stream.next_page_token(**inputs) == {"page": 43} + + def test_parse_response(self, mocker): + stream = Customers() + response = mocker.MagicMock() + response.json.return_value = {"entries": [{"one": 1}, {"two": 2}]} + inputs = {"response": response} + expected_parsed_object = {"one": 1} + assert next(stream.parse_response(**inputs)) == expected_parsed_object + + +# Activites stream tests + + +class TestActivities: + def test_request_params(self): + # no start_date set + stream = Activities(start_date=None) + inputs = {"stream_slice": None, "stream_state": None, "next_page_token": None} + assert stream.request_params(**inputs) == {} + + # start_date is set + stream.start_date = "2010-01-01" + inputs = {"stream_slice": None, "stream_state": None, "next_page_token": None} + assert stream.request_params(**inputs) == {"start-date": stream.start_date} + + # start-after is available + next_page_token = {"start-after": "a-b-c"} + inputs = {"stream_slice": None, "stream_state": None, "next_page_token": next_page_token} + expected_params = next_page_token + assert stream.request_params(**inputs) == expected_params + + def test_next_page_token(self, mocker): + stream = Activities(start_date=None) + response = mocker.MagicMock() + + # no more results + response.json.return_value = {"has_more": False} + inputs = {"response": response} + assert stream.next_page_token(**inputs) is None + + # there is more results + response.json.return_value = {"has_more": True, "entries": [{"uuid": "unique-uuid"}]} + inputs = {"response": response} + assert stream.next_page_token(**inputs) == {"start-after": "unique-uuid"} diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 56c36d6dec8e2..f37b28be05589 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -51,6 +51,7 @@ * [Braintree](integrations/sources/braintree.md) * [Cart](integrations/sources/cart.md) * [Chargebee](integrations/sources/chargebee.md) + * [Chartmogul](integrations/sources/chartmogul.md) * [ClickHouse](integrations/sources/clickhouse.md) * [Close.com](integrations/sources/close-com.md) * [CockroachDB](integrations/sources/cockroachdb.md) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index f91687ce224ae..91b6163db9d6b 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -30,6 +30,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [Bing Ads](sources/bing-ads.md) | Beta | | [Cart.com](sources/cart.md) | Beta | | [Chargebee](sources/chargebee.md) | Alpha | +| [Chartmogul](sources/chartmogul.md) | Alpha | | [ClickHouse](sources/clickhouse.md) | Beta | | [Close.com](sources/close-com.md) | Beta | | [CockroachDB](sources/cockroachdb.md) | Beta | diff --git a/docs/integrations/sources/chartmogul.md b/docs/integrations/sources/chartmogul.md new file mode 100644 index 0000000000000..e8d19c2ddda27 --- /dev/null +++ b/docs/integrations/sources/chartmogul.md @@ -0,0 +1,47 @@ +# Chartmogul + +## Sync overview + +The Chartmogul source supports Full Refresh syncs only. + +This source syncs data for the [Chartmogul API](https://dev.chartmogul.com/reference/). + +### Notes + +If `start_date` is set, it will only apply to `Activities` stream. `Customers`' endpoint does not provide a way to filter by creation or update dates. + +### Output schema + +This Source is capable of syncing the following streams: + +* [Customers](https://dev.chartmogul.com/reference/list-customers) +* [Activities](https://dev.chartmogul.com/reference/list-activities) + +### Features + +| Feature | Supported?\(Yes/No\) +| :--- | :--- | +| Full Refresh Sync | Yes | +| Incremental - Append Sync | No | +| Namespaces | No | + +### Performance considerations + +The Chartmogul connector should not run into Chartmogul API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. + +## Getting started + +### Requirements + +* Chartmogul Account +* Chartmogul API Key + +### Setup guide + +Please read [How to find your API key](https://dev.chartmogul.com/docs/authentication). + +## Changelog + +| Version | Date | Pull Request | Subject | +| :--- | :--- | :--- | :--- | +| 0.1.0 | 2022-01-10 | [9381](https://github.com/airbytehq/airbyte/pull/9381) | New Source: Chartmogul | From 0187efd03070def1eb705535435947b0e60ef008 Mon Sep 17 00:00:00 2001 From: Yurii Bidiuk <35812734+yurii-bidiuk@users.noreply.github.com> Date: Tue, 18 Jan 2022 21:59:01 +0200 Subject: [PATCH 145/215] Destination BigQuery: update description for some fields (#9573) * update desrcription for bq destination setup * bump version --- .../079d5540-f236-4294-ba7c-ade8fd918496.json | 2 +- .../22f6c74f-5699-40ff-833c-4a879ea40133.json | 2 +- .../seed/destination_definitions.yaml | 4 +-- .../resources/seed/destination_specs.yaml | 29 ++++++++++++------- .../Dockerfile | 2 +- .../src/main/resources/spec.json | 2 +- .../destination-bigquery/Dockerfile | 2 +- .../src/main/resources/spec.json | 4 +-- docs/integrations/destinations/bigquery.md | 2 ++ 9 files changed, 29 insertions(+), 20 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json index d433add5e8071..6c8151fe5c2e4 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "079d5540-f236-4294-ba7c-ade8fd918496", "name": "BigQuery (denormalized typed struct)", "dockerRepository": "airbyte/destination-bigquery-denormalized", - "dockerImageTag": "0.2.4", + "dockerImageTag": "0.2.5", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery", "icon": "bigquery.svg" } diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json index d076c305f7c2f..d968513b3de6e 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "22f6c74f-5699-40ff-833c-4a879ea40133", "name": "BigQuery", "dockerRepository": "airbyte/destination-bigquery", - "dockerImageTag": "0.6.4", + "dockerImageTag": "0.6.5", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery", "icon": "bigquery.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index d1584307abe4b..6f120e18eca22 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -13,13 +13,13 @@ - name: BigQuery destinationDefinitionId: 22f6c74f-5699-40ff-833c-4a879ea40133 dockerRepository: airbyte/destination-bigquery - dockerImageTag: 0.6.4 + dockerImageTag: 0.6.5 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery icon: bigquery.svg - name: BigQuery (denormalized typed struct) destinationDefinitionId: 079d5540-f236-4294-ba7c-ade8fd918496 dockerRepository: airbyte/destination-bigquery-denormalized - dockerImageTag: 0.2.4 + dockerImageTag: 0.2.5 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery icon: bigquery.svg - name: Cassandra diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 235c7a9ebb711..6f9cb6856d36c 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -188,7 +188,7 @@ supportsDBT: false supported_destination_sync_modes: - "append" -- dockerImage: "airbyte/destination-bigquery:0.6.4" +- dockerImage: "airbyte/destination-bigquery:0.6.5" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" connectionSpecification: @@ -203,10 +203,12 @@ big_query_client_buffer_size_mb: title: "Google BigQuery client chunk size" description: "Google BigQuery client's chunk(buffer) size (MIN=1, MAX =\ - \ 15) for each table. The default 15MiB value is used if not set explicitly.\ - \ It's recommended to decrease value for big data sets migration for less\ - \ HEAP memory consumption and avoiding crashes. For more details refer\ - \ to https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html" + \ 15) for each table. The size that will be written by a single RPC. Written\ + \ data will be buffered and only flushed upon reaching this size or closing\ + \ the channel. The default 15MiB value is used if not set explicitly.\ + \ It's recommended to decrease value for big data sets migration for\ + \ less HEAP memory consumption and avoiding crashes. For more details\ + \ refer to https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html" type: "integer" minimum: 1 maximum: 15 @@ -271,9 +273,12 @@ airbyte_secret: true transformation_priority: type: "string" - description: "When running custom transformations or Basic normalization,\ - \ running queries on interactive mode can hit BQ limits, choosing batch\ - \ will solve those limits." + description: "Interactive run type means that the query is executed as soon\ + \ as possible, and these queries count towards concurrent rate limit and\ + \ daily limit. Batch queries are queued and started as soon as idle resources\ + \ are available in the BigQuery shared resource pool, which usually occurs\ + \ within a few minutes. Batch queries don’t count towards your concurrent\ + \ rate limit." title: "Transformation Query Run Type" default: "interactive" enum: @@ -378,7 +383,7 @@ - "overwrite" - "append" - "append_dedup" -- dockerImage: "airbyte/destination-bigquery-denormalized:0.2.4" +- dockerImage: "airbyte/destination-bigquery-denormalized:0.2.5" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" connectionSpecification: @@ -393,9 +398,11 @@ big_query_client_buffer_size_mb: title: "Google BigQuery client chunk size" description: "Google BigQuery client's chunk (buffer) size (MIN = 1, MAX\ - \ = 15) for each table. It defaults to 15MiB. Smaller chunk size means\ + \ = 15) for each table. The size that will be written by a single RPC.\ + \ Written data will be buffered and only flushed upon reaching this size\ + \ or closing the channel. It defaults to 15MiB. Smaller chunk size means\ \ less memory consumption, and is recommended for big data sets. For more\ - \ details refer to the documentation here" type: "integer" minimum: 1 diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile index 11d11d4387ad0..e283bc9062d5e 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-bigquery-denormalized COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.2.4 +LABEL io.airbyte.version=0.2.5 LABEL io.airbyte.name=airbyte/destination-bigquery-denormalized diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json index 829f6c3199a09..ee93cfcf42907 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json @@ -13,7 +13,7 @@ "properties": { "big_query_client_buffer_size_mb": { "title": "Google BigQuery client chunk size", - "description": "Google BigQuery client's chunk (buffer) size (MIN = 1, MAX = 15) for each table. It defaults to 15MiB. Smaller chunk size means less memory consumption, and is recommended for big data sets. For more details refer to the documentation here", + "description": "Google BigQuery client's chunk (buffer) size (MIN = 1, MAX = 15) for each table. The size that will be written by a single RPC. Written data will be buffered and only flushed upon reaching this size or closing the channel. It defaults to 15MiB. Smaller chunk size means less memory consumption, and is recommended for big data sets. For more details refer to the documentation here", "type": "integer", "minimum": 1, "maximum": 15, diff --git a/airbyte-integrations/connectors/destination-bigquery/Dockerfile b/airbyte-integrations/connectors/destination-bigquery/Dockerfile index ef038a9654030..57decbb9135eb 100644 --- a/airbyte-integrations/connectors/destination-bigquery/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-bigquery COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.6.4 +LABEL io.airbyte.version=0.6.5 LABEL io.airbyte.name=airbyte/destination-bigquery diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json index 879d4c12c6391..dd86d32e9e077 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json @@ -13,7 +13,7 @@ "properties": { "big_query_client_buffer_size_mb": { "title": "Google BigQuery client chunk size", - "description": "Google BigQuery client's chunk(buffer) size (MIN=1, MAX = 15) for each table. The default 15MiB value is used if not set explicitly. It's recommended to decrease value for big data sets migration for less HEAP memory consumption and avoiding crashes. For more details refer to https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html", + "description": "Google BigQuery client's chunk(buffer) size (MIN=1, MAX = 15) for each table. The size that will be written by a single RPC. Written data will be buffered and only flushed upon reaching this size or closing the channel. The default 15MiB value is used if not set explicitly. It's recommended to decrease value for big data sets migration for less HEAP memory consumption and avoiding crashes. For more details refer to https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html", "type": "integer", "minimum": 1, "maximum": 15, @@ -77,7 +77,7 @@ }, "transformation_priority": { "type": "string", - "description": "When running custom transformations or Basic normalization, running queries on interactive mode can hit BQ limits, choosing batch will solve those limits.", + "description": "Interactive run type means that the query is executed as soon as possible, and these queries count towards concurrent rate limit and daily limit. Batch queries are queued and started as soon as idle resources are available in the BigQuery shared resource pool, which usually occurs within a few minutes. Batch queries don’t count towards your concurrent rate limit.", "title": "Transformation Query Run Type", "default": "interactive", "enum": ["interactive", "batch"] diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index fffbea6c7c4f4..d87b2c73143e4 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -153,6 +153,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | |:--------| :--- | :--- | :--- | +| 0.6.5 | 2022-01-18 | [\#9573](https://github.com/airbytehq/airbyte/pull/9573) | BigQuery Destination : update description for some input fields | | 0.6.4 | 2022-01-17 | [\#8383](https://github.com/airbytehq/airbyte/issues/8383) | Support dataset-id prefixed by project-id | | 0.6.3 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | | 0.6.2 | 2022-01-10 | [\#9121](https://github.com/airbytehq/airbyte/pull/9121) | Fixed check method for GCS mode to verify if all roles assigned to user | @@ -173,6 +174,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------| :--- | +| 0.2.5 | 2022-01-18 | [\#9573](https://github.com/airbytehq/airbyte/pull/9573) | BigQuery Destination : update description for some input fields | | 0.2.4 | 2022-01-17 | [\#8383](https://github.com/airbytehq/airbyte/issues/8383) | BigQuery/BiqQuery denorm Destinations : Support dataset-id prefixed by project-id | | 0.2.3 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | | 0.2.2 | 2021-12-22 | [\#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration to UI for GCS staging | From cbba9fa6da39a2f3766ac06d62081b06027aeac8 Mon Sep 17 00:00:00 2001 From: Tim Roes Date: Tue, 18 Jan 2022 21:44:23 +0100 Subject: [PATCH 146/215] =?UTF-8?q?=F0=9F=90=9B=20Fix=20syntax=20errors=20?= =?UTF-8?q?in=20our=20specification=20docs=20(#9576)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/understanding-airbyte/airbyte-specification.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/understanding-airbyte/airbyte-specification.md b/docs/understanding-airbyte/airbyte-specification.md index 545bb0460ba64..02726cf31e793 100644 --- a/docs/understanding-airbyte/airbyte-specification.md +++ b/docs/understanding-airbyte/airbyte-specification.md @@ -107,9 +107,7 @@ read(Config, ConfiguredAirbyteCatalog, State) -> Stream "type": "string" }, "age": { - "type": { - "number" - } + "type": "number" } } } @@ -150,8 +148,8 @@ read(Config, ConfiguredAirbyteCatalog, State) -> Stream "type": "object", "required": ["name", "productId"], "properties": { - "name": "string", - "productId": "number" + "name": { "type": "string" }, + "productId": { "type": "number" } } } } From 7a2624612dec7175fd5d2493bb6d6da18fed8779 Mon Sep 17 00:00:00 2001 From: Tim Roes Date: Tue, 18 Jan 2022 21:44:49 +0100 Subject: [PATCH 147/215] =?UTF-8?q?=F0=9F=90=9B=20Minor=20correction=20in?= =?UTF-8?q?=20the=20frontend=20tech-stack=20(#9547)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/understanding-airbyte/tech-stack.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/understanding-airbyte/tech-stack.md b/docs/understanding-airbyte/tech-stack.md index 59a6cc4b9fe08..cb5a9476007ce 100644 --- a/docs/understanding-airbyte/tech-stack.md +++ b/docs/understanding-airbyte/tech-stack.md @@ -26,7 +26,8 @@ Connectors can be written in any language. However the most common languages are * CI/CD: [GitHub Actions](https://github.com/features/actions) * Containerization: [Docker](https://www.docker.com/) and [Docker Compose](https://docs.docker.com/compose/) -* Linter \(Frontend\): [Prettier](https://prettier.io/) +* Linter \(Frontend\): [ESLint](https://eslint.org/) +* Formatter \(Frontend\): [Prettier](https://prettier.io/) * Formatter \(Backend\): [Spotless](https://github.com/diffplug/spotless) ## FAQ From f3798edba6b817662bbabf04fac78af68a389e57 Mon Sep 17 00:00:00 2001 From: Artem Astapenko <3767150+Jamakase@users.noreply.github.com> Date: Wed, 19 Jan 2022 00:13:27 +0300 Subject: [PATCH 148/215] add node strict check (#9335) --- airbyte-webapp/.npmrc | 1 + airbyte-webapp/package-lock.json | 3 +++ airbyte-webapp/package.json | 3 +++ 3 files changed, 7 insertions(+) create mode 100644 airbyte-webapp/.npmrc diff --git a/airbyte-webapp/.npmrc b/airbyte-webapp/.npmrc new file mode 100644 index 0000000000000..b6f27f1359546 --- /dev/null +++ b/airbyte-webapp/.npmrc @@ -0,0 +1 @@ +engine-strict=true diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 888bf1f20d1f8..929778b2c117f 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -85,6 +85,9 @@ "tar": "^6.1.11", "tmpl": "^1.0.5", "typescript": "4.2.4" + }, + "engines": { + "node": ">=16.0.0" } }, "node_modules/@babel/code-frame": { diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index f71dd4be04ee4..f8e99c4f9f898 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -2,6 +2,9 @@ "name": "airbyte-webapp", "version": "0.35.5-alpha", "private": true, + "engines": { + "node": ">=16.0.0" + }, "scripts": { "start": "react-scripts start", "build": "react-scripts build", From 0a3713a5a52995dc0dc205d8edfd097bf625899f Mon Sep 17 00:00:00 2001 From: augan-rymkhan <93112548+augan-rymkhan@users.noreply.github.com> Date: Wed, 19 Jan 2022 16:23:54 +0600 Subject: [PATCH 149/215] Source Salesforce: Deprecate API Type parameter (#9302) * use BULK for the first sync, REST for incremental sync * if stream contains compound data or/and base64 use always REST * fix get stream state from connector state * fix integration test * refactor catalog name * format code * refactor unit tests * refactor unit tests 2 * format code 2 * Set additionalProperties to true not to break test temporarily * fix unit test and remove unnecessary filtering fields * bump version * updated spec and def yaml Co-authored-by: auganbay --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 15 +- .../connectors/source-salesforce/Dockerfile | 2 +- .../acceptance-test-config.yml | 15 +- .../integration_tests/bulk_error_test.py | 4 +- ...alog_bulk.json => configured_catalog.json} | 12 ++ .../configured_catalog_rest.json | 98 --------- .../integration_tests/invalid_config.json | 3 +- .../source_salesforce/api.py | 8 +- .../source_salesforce/source.py | 36 ++-- .../source_salesforce/spec.json | 11 +- .../source_salesforce/streams.py | 35 --- .../source-salesforce/unit_tests/unit_test.py | 201 ++++++++---------- docs/integrations/sources/salesforce.md | 1 + 14 files changed, 135 insertions(+), 308 deletions(-) rename airbyte-integrations/connectors/source-salesforce/integration_tests/{configured_catalog_bulk.json => configured_catalog.json} (89%) delete mode 100644 airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 2d86e3f7b999a..118ba80d73046 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -620,7 +620,7 @@ - name: Salesforce sourceDefinitionId: b117307c-14b6-41aa-9422-947e34922962 dockerRepository: airbyte/source-salesforce - dockerImageTag: 0.1.16 + dockerImageTag: 0.1.17 documentationUrl: https://docs.airbyte.io/integrations/sources/salesforce icon: salesforce.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 005c5af601381..e4d51db2ea2f3 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6452,7 +6452,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-salesforce:0.1.16" +- dockerImage: "airbyte/source-salesforce:0.1.17" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce" connectionSpecification: @@ -6463,8 +6463,7 @@ - "client_id" - "client_secret" - "refresh_token" - - "api_type" - additionalProperties: false + additionalProperties: true properties: auth_type: type: "string" @@ -6506,16 +6505,6 @@ >docs." type: "boolean" default: false - api_type: - title: "API Type" - description: "Unless you know that you are transferring a very small amount\ - \ of data, prefer using the BULK API. This will help avoid using up all\ - \ of your API call quota with Salesforce. Valid values are BULK or REST." - type: "string" - enum: - - "BULK" - - "REST" - default: "BULK" streams_criteria: type: "array" items: diff --git a/airbyte-integrations/connectors/source-salesforce/Dockerfile b/airbyte-integrations/connectors/source-salesforce/Dockerfile index 812b159689030..37713bed463b8 100644 --- a/airbyte-integrations/connectors/source-salesforce/Dockerfile +++ b/airbyte-integrations/connectors/source-salesforce/Dockerfile @@ -25,5 +25,5 @@ COPY source_salesforce ./source_salesforce ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.16 +LABEL io.airbyte.version=0.1.17 LABEL io.airbyte.name=airbyte/source-salesforce diff --git a/airbyte-integrations/connectors/source-salesforce/acceptance-test-config.yml b/airbyte-integrations/connectors/source-salesforce/acceptance-test-config.yml index ec590cfd42275..6d379470c03ec 100644 --- a/airbyte-integrations/connectors/source-salesforce/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-salesforce/acceptance-test-config.yml @@ -5,8 +5,6 @@ tests: spec: - spec_path: "source_salesforce/spec.json" connection: - - config_path: "secrets/config_bulk.json" - status: "succeed" - config_path: "secrets/config.json" status: "succeed" - config_path: "integration_tests/invalid_config.json" @@ -15,18 +13,11 @@ tests: - config_path: "secrets/config.json" basic_read: - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog_rest.json" - - config_path: "secrets/config_bulk.json" - configured_catalog_path: "integration_tests/configured_catalog_bulk.json" + configured_catalog_path: "integration_tests/configured_catalog.json" incremental: - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog_rest.json" - future_state_path: "integration_tests/future_state.json" - - config_path: "secrets/config_bulk.json" - configured_catalog_path: "integration_tests/configured_catalog_bulk.json" + configured_catalog_path: "integration_tests/configured_catalog.json" future_state_path: "integration_tests/future_state.json" full_refresh: - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog_rest.json" - - config_path: "secrets/config_bulk.json" - configured_catalog_path: "integration_tests/configured_catalog_bulk.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-salesforce/integration_tests/bulk_error_test.py b/airbyte-integrations/connectors/source-salesforce/integration_tests/bulk_error_test.py index 8525384bb697e..fb6f63f2ec277 100644 --- a/airbyte-integrations/connectors/source-salesforce/integration_tests/bulk_error_test.py +++ b/airbyte-integrations/connectors/source-salesforce/integration_tests/bulk_error_test.py @@ -16,7 +16,7 @@ @pytest.fixture(name="input_config") def parse_input_config(): - with open(HERE.parent / "secrets/config_bulk.json", "r") as file: + with open(HERE.parent / "secrets/config.json", "r") as file: return json.loads(file.read()) @@ -28,7 +28,7 @@ def get_stream(input_config: Mapping[str, Any], stream_name: str) -> Stream: def get_any_real_stream(input_config: Mapping[str, Any]) -> Stream: - return get_stream(input_config, "Account") + return get_stream(input_config, "ActiveFeatureLicenseMetric") def test_not_queryable_stream(caplog, input_config): diff --git a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_bulk.json b/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog.json similarity index 89% rename from airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_bulk.json rename to airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog.json index cfac02393b5f3..c5f317729aa59 100644 --- a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_bulk.json +++ b/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog.json @@ -58,6 +58,18 @@ "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, + { + "stream": { + "name": "Asset", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["SystemModstamp"], + "source_defined_primary_key": [["Id"]] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append" + }, { "stream": { "name": "FormulaFunctionAllowedType", diff --git a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json b/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json deleted file mode 100644 index d81043ee5e6e7..0000000000000 --- a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json +++ /dev/null @@ -1,98 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "Account", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["SystemModstamp"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "ActiveFeatureLicenseMetric", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["SystemModstamp"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "ActivePermSetLicenseMetric", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["SystemModstamp"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "ActiveProfileMetric", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["SystemModstamp"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "AppDefinition", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "Asset", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["SystemModstamp"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "PermissionSetTabSetting", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["SystemModstamp"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "LeadHistory", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["CreatedDate"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - } - ] -} diff --git a/airbyte-integrations/connectors/source-salesforce/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-salesforce/integration_tests/invalid_config.json index 9b8f07e9c38fa..7bf437ac9fe3d 100644 --- a/airbyte-integrations/connectors/source-salesforce/integration_tests/invalid_config.json +++ b/airbyte-integrations/connectors/source-salesforce/integration_tests/invalid_config.json @@ -3,6 +3,5 @@ "client_secret": "fake-client-secret", "refresh_token": "fake-refresh-token", "start_date": "2020-10-02T00:00:00Z", - "is_sandbox": false, - "api_type": "REST" + "is_sandbox": false } diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/api.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/api.py index 51f9cf9fc27ed..6487b4dcf507e 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/api.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/api.py @@ -182,10 +182,8 @@ def __init__( client_secret: str = None, is_sandbox: bool = None, start_date: str = None, - api_type: str = None, **kwargs, ): - self.api_type = api_type.upper() if api_type else None self.refresh_token = refresh_token self.token = token self.client_id = client_id @@ -200,11 +198,7 @@ def _get_standard_headers(self): return {"Authorization": "Bearer {}".format(self.access_token)} def get_streams_black_list(self) -> List[str]: - black_list = QUERY_RESTRICTED_SALESFORCE_OBJECTS + QUERY_INCOMPATIBLE_SALESFORCE_OBJECTS - if self.api_type == "REST": - return black_list - else: - return black_list + UNSUPPORTED_BULK_API_SALESFORCE_OBJECTS + return QUERY_RESTRICTED_SALESFORCE_OBJECTS + QUERY_INCOMPATIBLE_SALESFORCE_OBJECTS def filter_streams(self, stream_name: str) -> bool: # REST and BULK API do not support all entities that end with `ChangeEvent`. diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/source.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/source.py index d1cf51cd2f351..c28e74045f811 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/source.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/source.py @@ -12,7 +12,7 @@ from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator from airbyte_cdk.sources.utils.schema_helpers import split_config -from .api import UNSUPPORTED_FILTERING_STREAMS, Salesforce +from .api import UNSUPPORTED_BULK_API_SALESFORCE_OBJECTS, UNSUPPORTED_FILTERING_STREAMS, Salesforce from .streams import BulkIncrementalSalesforceStream, BulkSalesforceStream, IncrementalSalesforceStream, SalesforceStream @@ -28,18 +28,30 @@ def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> return True, None @classmethod - def generate_streams(cls, config: Mapping[str, Any], stream_names: List[str], sf_object: Salesforce) -> List[Stream]: + def generate_streams( + cls, config: Mapping[str, Any], stream_names: List[str], sf_object: Salesforce, state: Mapping[str, Any] = None + ) -> List[Stream]: """ "Generates a list of stream by their names. It can be used for different tests too""" authenticator = TokenAuthenticator(sf_object.access_token) - streams_kwargs = {} - if config["api_type"] == "REST": - full_refresh, incremental = SalesforceStream, IncrementalSalesforceStream - else: - full_refresh, incremental = BulkSalesforceStream, BulkIncrementalSalesforceStream - streams_kwargs["wait_timeout"] = config.get("wait_timeout") - streams = [] for stream_name in stream_names: + streams_kwargs = {} + stream_state = state.get(stream_name, {}) if state else {} + + selected_properties = sf_object.generate_schema(stream_name).get("properties", {}) + # Salesforce BULK API currently does not support loading fields with data type base64 and compound data + properties_not_supported_by_bulk = { + key: value for key, value in selected_properties.items() if value.get("format") == "base64" or "object" in value["type"] + } + + if stream_state or stream_name in UNSUPPORTED_BULK_API_SALESFORCE_OBJECTS or properties_not_supported_by_bulk: + # Use REST API + full_refresh, incremental = SalesforceStream, IncrementalSalesforceStream + else: + # Use BULK API + full_refresh, incremental = BulkSalesforceStream, BulkIncrementalSalesforceStream + streams_kwargs["wait_timeout"] = config.get("wait_timeout") + json_schema = sf_object.generate_schema(stream_name) pk, replication_key = sf_object.get_pk_and_replication_key(json_schema) streams_kwargs.update(dict(sf_api=sf_object, pk=pk, stream_name=stream_name, schema=json_schema, authenticator=authenticator)) @@ -50,10 +62,10 @@ def generate_streams(cls, config: Mapping[str, Any], stream_names: List[str], sf return streams - def streams(self, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog = None) -> List[Stream]: + def streams(self, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog = None, state: Mapping[str, Any] = None) -> List[Stream]: sf = self._get_sf_object(config) stream_names = sf.get_validated_streams(config=config, catalog=catalog) - return self.generate_streams(config, stream_names, sf) + return self.generate_streams(config, stream_names, sf, state=state) def read( self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None @@ -66,7 +78,7 @@ def read( config, internal_config = split_config(config) # get the streams once in case the connector needs to make any queries to generate them logger.info("Starting generating streams") - stream_instances = {s.name: s for s in self.streams(config, catalog=catalog)} + stream_instances = {s.name: s for s in self.streams(config, catalog=catalog, state=state)} logger.info(f"Starting syncing {self.name}") self._stream_to_instance_map = stream_instances for configured_stream in catalog.streams: diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json b/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json index 1435f17cf04e9..1c72c854ecd22 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json @@ -4,8 +4,8 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Salesforce Source Spec", "type": "object", - "required": ["client_id", "client_secret", "refresh_token", "api_type"], - "additionalProperties": false, + "required": ["client_id", "client_secret", "refresh_token"], + "additionalProperties": true, "properties": { "auth_type": { "type": "string", @@ -41,13 +41,6 @@ "type": "boolean", "default": false }, - "api_type": { - "title": "API Type", - "description": "Unless you know that you are transferring a very small amount of data, prefer using the BULK API. This will help avoid using up all of your API call quota with Salesforce. Valid values are BULK or REST.", - "type": "string", - "enum": ["BULK", "REST"], - "default": "BULK" - }, "streams_criteria": { "type": "array", "items": { diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py index 6e2867b27f1d4..aa6d3ca9fd6d1 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py @@ -22,7 +22,6 @@ class SalesforceStream(HttpStream, ABC): page_size = 2000 - transformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization) def __init__(self, sf_api: Salesforce, pk: str, stream_name: str, schema: dict = None, **kwargs): @@ -69,15 +68,6 @@ def request_params( return {} selected_properties = self.get_json_schema().get("properties", {}) - - # Salesforce BULK API currently does not support loading fields with data type base64 and compound data - if self.sf_api.api_type == "BULK": - selected_properties = { - key: value - for key, value in selected_properties.items() - if value.get("format") != "base64" and "object" not in value["type"] - } - query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " if self.primary_key and self.name not in UNSUPPORTED_FILTERING_STREAMS: @@ -274,15 +264,6 @@ def request_params( """ selected_properties = self.get_json_schema().get("properties", {}) - - # Salesforce BULK API currently does not support loading fields with data type base64 and compound data - if self.sf_api.api_type == "BULK": - selected_properties = { - key: value - for key, value in selected_properties.items() - if value.get("format") != "base64" and "object" not in value["type"] - } - query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " if next_page_token: query += next_page_token @@ -349,14 +330,6 @@ def request_params( selected_properties = self.get_json_schema().get("properties", {}) - # Salesforce BULK API currently does not support loading fields with data type base64 and compound data - if self.sf_api.api_type == "BULK": - selected_properties = { - key: value - for key, value in selected_properties.items() - if value.get("format") != "base64" and "object" not in value["type"] - } - stream_date = stream_state.get(self.cursor_field) start_date = stream_date or self.start_date @@ -392,14 +365,6 @@ def request_params( ) -> MutableMapping[str, Any]: selected_properties = self.get_json_schema().get("properties", {}) - # Salesforce BULK API currently does not support loading fields with data type base64 and compound data - if self.sf_api.api_type == "BULK": - selected_properties = { - key: value - for key, value in selected_properties.items() - if value.get("format") != "base64" and "object" not in value["type"] - } - stream_date = stream_state.get(self.cursor_field) start_date = next_page_token or stream_date or self.start_date diff --git a/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py index 41f98f12772a2..19db4c2ddcb2e 100644 --- a/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py @@ -14,7 +14,7 @@ @pytest.fixture(scope="module") -def stream_bulk_config(): +def stream_config(): """Generates streams settings for BULK logic""" return { "client_id": "fake_client_id", @@ -23,39 +23,11 @@ def stream_bulk_config(): "start_date": "2010-01-18T21:18:20Z", "is_sandbox": False, "wait_timeout": 15, - "api_type": "BULK", } @pytest.fixture(scope="module") -def stream_bulk_config_without_start_date(): - """Generates streams settings for BULK logic without start_date""" - return { - "client_id": "fake_client_id", - "client_secret": "fake_client_secret", - "refresh_token": "fake_refresh_token", - "is_sandbox": False, - "wait_timeout": 15, - "api_type": "BULK", - } - - -@pytest.fixture(scope="module") -def stream_rest_config(): - """Generates streams settings for BULK logic""" - return { - "client_id": "fake_client_id", - "client_secret": "fake_client_secret", - "refresh_token": "fake_refresh_token", - "start_date": "2010-01-18T21:18:20Z", - "is_sandbox": False, - "wait_timeout": 15, - "api_type": "REST", - } - - -@pytest.fixture(scope="module") -def stream_rest_config_date_format(): +def stream_config_date_format(): """Generates streams settings with `start_date` in format YYYY-MM-DD""" return { "client_id": "fake_client_id", @@ -64,12 +36,11 @@ def stream_rest_config_date_format(): "start_date": "2010-01-18", "is_sandbox": False, "wait_timeout": 15, - "api_type": "REST", } @pytest.fixture(scope="module") -def stream_rest_config_without_start_date(): +def stream_config_without_start_date(): """Generates streams settings for REST logic without start_date""" return { "client_id": "fake_client_id", @@ -77,51 +48,39 @@ def stream_rest_config_without_start_date(): "refresh_token": "fake_refresh_token", "is_sandbox": False, "wait_timeout": 15, - "api_type": "REST", } -def _stream_api(stream_config): +def _stream_api(stream_config, describe_response_data=None): sf_object = Salesforce(**stream_config) sf_object.login = Mock() sf_object.access_token = Mock() sf_object.instance_url = "https://fase-account.salesforce.com" - sf_object.describe = Mock(return_value={"fields": [{"name": "LastModifiedDate", "type": "string"}]}) + + response_data = {"fields": [{"name": "LastModifiedDate", "type": "string"}]} + if describe_response_data: + response_data = describe_response_data + sf_object.describe = Mock(return_value=response_data) return sf_object @pytest.fixture(scope="module") -def stream_rest_api(stream_rest_config): - return _stream_api(stream_rest_config) +def stream_api(stream_config): + return _stream_api(stream_config) @pytest.fixture(scope="module") -def stream_bulk_api(stream_bulk_config): - return _stream_api(stream_bulk_config) +def stream_api_v2(stream_config): + describe_response_data = {"fields": [{"name": "LastModifiedDate", "type": "string"}, {"name": "BillingAddress", "type": "address"}]} + return _stream_api(stream_config, describe_response_data=describe_response_data) -def _generate_stream(stream_name, stream_config, stream_api): - return SourceSalesforce.generate_streams(stream_config, [stream_name], stream_api)[0] +def _generate_stream(stream_name, stream_config, stream_api, state=None): + return SourceSalesforce.generate_streams(stream_config, [stream_name], stream_api, state=state)[0] -@pytest.mark.parametrize( - "api_type,stream_name,expected_cls", - [ - ("BULK", "Account", BulkIncrementalSalesforceStream), - ("BULK", "FormulaFunctionAllowedType", BulkSalesforceStream), - ("REST", "ActiveFeatureLicenseMetric", IncrementalSalesforceStream), - ("REST", "AppDefinition", SalesforceStream), - ], -) -def test_stream_generator(api_type, stream_name, expected_cls, stream_bulk_config, stream_bulk_api, stream_rest_config, stream_rest_api): - stream_config, stream_api = (stream_rest_config, stream_rest_api) if api_type == "REST" else (stream_bulk_config, stream_bulk_api) - stream = _generate_stream(stream_name, stream_config, stream_api) - assert stream.name == stream_name - assert isinstance(stream, expected_cls) - - -def test_bulk_sync_creation_failed(stream_bulk_config, stream_bulk_api): - stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_bulk_config, stream_bulk_api) +def test_bulk_sync_creation_failed(stream_config, stream_api): + stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_config, stream_api) with requests_mock.Mocker() as m: m.register_uri("POST", stream.path(), status_code=400, json=[{"message": "test_error"}]) with pytest.raises(HTTPError) as err: @@ -129,31 +88,48 @@ def test_bulk_sync_creation_failed(stream_bulk_config, stream_bulk_api): assert err.value.response.json()[0]["message"] == "test_error" -def test_bulk_sync_unsupported_stream(stream_bulk_config, stream_bulk_api, caplog): +def test_stream_unsupported_by_bulk(stream_config, stream_api, caplog): + """ + Stream `AcceptedEventRelation` is not supported by BULK API, so that REST API stream will be used for it. + """ stream_name = "AcceptedEventRelation" - stream: BulkIncrementalSalesforceStream = _generate_stream(stream_name, stream_bulk_config, stream_bulk_api) - with requests_mock.Mocker() as m: - m.register_uri( - "POST", - stream.path(), - status_code=400, - json=[{"errorCode": "INVALIDENTITY", "message": f"Entity '{stream_name}' is not supported by the Bulk API."}], - ) - list(stream.read_records(sync_mode=SyncMode.full_refresh)) + stream = _generate_stream(stream_name, stream_config, stream_api) + assert not isinstance(stream, BulkSalesforceStream) - logs = caplog.records - assert logs - assert logs[1].levelname == "ERROR" - assert ( - logs[1].msg - == f"Cannot receive data for stream '{stream_name}' using BULK API, error message: 'Entity '{stream_name}' is not supported by the Bulk API.'" - ) +def test_stream_contains_unsupported_properties_by_bulk(stream_config, stream_api_v2): + """ + Stream `Account` contains compound field such as BillingAddress, which is not supported by BULK API (csv), + in that case REST API stream will be used for it. + """ + stream_name = "Account" + stream = _generate_stream(stream_name, stream_config, stream_api_v2) + assert not isinstance(stream, BulkSalesforceStream) + + +def test_stream_has_state_rest_api_should_be_used(stream_config, stream_api): + """ + Stream `ActiveFeatureLicenseMetric` has state, in that case REST API stream will be used for it. + """ + stream_name = "ActiveFeatureLicenseMetric" + state = {stream_name: {"SystemModstamp": "2122-08-22T05:08:29.000Z"}} + stream = _generate_stream(stream_name, stream_config, stream_api, state=state) + assert not isinstance(stream, BulkSalesforceStream) + + +def test_stream_has_no_state_bulk_api_should_be_used(stream_config, stream_api): + """ + Stream `ActiveFeatureLicenseMetric` has no state, in that case BULK API stream will be used for it. + """ + stream_name = "ActiveFeatureLicenseMetric" + state = {"other_stream": {"SystemModstamp": "2122-08-22T05:08:29.000Z"}} + stream = _generate_stream(stream_name, stream_config, stream_api, state=state) + assert isinstance(stream, BulkSalesforceStream) @pytest.mark.parametrize("item_number", [0, 15, 2000, 2324, 193434]) -def test_bulk_sync_pagination(item_number, stream_bulk_config, stream_bulk_api): - stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_bulk_config, stream_bulk_api) +def test_bulk_sync_pagination(item_number, stream_config, stream_api): + stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_config, stream_api) test_ids = [i for i in range(1, item_number)] pages = [test_ids[i : i + stream.page_size] for i in range(0, len(test_ids), stream.page_size)] if not pages: @@ -189,16 +165,16 @@ def _get_result_id(stream): return int(list(stream.read_records(sync_mode=SyncMode.full_refresh))[0]["ID"]) -def test_bulk_sync_successful(stream_bulk_config, stream_bulk_api): - stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_bulk_config, stream_bulk_api) +def test_bulk_sync_successful(stream_config, stream_api): + stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_config, stream_api) with requests_mock.Mocker() as m: job_id = _prepare_mock(m, stream) m.register_uri("GET", stream.path() + f"/{job_id}", [{"json": {"state": "JobComplete"}}]) assert _get_result_id(stream) == 1 -def test_bulk_sync_successful_long_response(stream_bulk_config, stream_bulk_api): - stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_bulk_config, stream_bulk_api) +def test_bulk_sync_successful_long_response(stream_config, stream_api): + stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_config, stream_api) with requests_mock.Mocker() as m: job_id = _prepare_mock(m, stream) m.register_uri( @@ -216,8 +192,8 @@ def test_bulk_sync_successful_long_response(stream_bulk_config, stream_bulk_api) # maximum timeout is wait_timeout * max_retry_attempt # this test tries to check a job state 17 times with +-1second for very one @pytest.mark.timeout(17) -def test_bulk_sync_successful_retry(stream_bulk_config, stream_bulk_api): - stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_bulk_config, stream_bulk_api) +def test_bulk_sync_successful_retry(stream_config, stream_api): + stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_config, stream_api) stream._wait_timeout = 0.1 # maximum wait timeout will be 6 seconds with requests_mock.Mocker() as m: job_id = _prepare_mock(m, stream) @@ -230,8 +206,8 @@ def test_bulk_sync_successful_retry(stream_bulk_config, stream_bulk_api): @pytest.mark.timeout(30) -def test_bulk_sync_failed_retry(stream_bulk_config, stream_bulk_api): - stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_bulk_config, stream_bulk_api) +def test_bulk_sync_failed_retry(stream_config, stream_api): + stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_config, stream_api) stream._wait_timeout = 0.1 # maximum wait timeout will be 6 seconds with requests_mock.Mocker() as m: job_id = _prepare_mock(m, stream) @@ -242,53 +218,43 @@ def test_bulk_sync_failed_retry(stream_bulk_config, stream_bulk_api): @pytest.mark.parametrize( - "api_type,start_date_provided,stream_name,expected_start_date", + "start_date_provided,stream_name,expected_start_date", [ - ("BULK", True, "Account", "2010-01-18T21:18:20Z"), - ("BULK", False, "Account", None), - ("REST", True, "ActiveFeatureLicenseMetric", "2010-01-18T21:18:20Z"), - ("REST", False, "ActiveFeatureLicenseMetric", None), + (True, "Account", "2010-01-18T21:18:20Z"), + (False, "Account", None), + (True, "ActiveFeatureLicenseMetric", "2010-01-18T21:18:20Z"), + (False, "ActiveFeatureLicenseMetric", None), ], ) def test_stream_start_date( - api_type, start_date_provided, stream_name, expected_start_date, - stream_bulk_config, - stream_bulk_api, - stream_rest_config, - stream_rest_api, - stream_rest_config_without_start_date, - stream_bulk_config_without_start_date, + stream_config, + stream_api, + stream_config_without_start_date, ): if start_date_provided: - stream_config, stream_api = (stream_rest_config, stream_rest_api) if api_type == "REST" else (stream_bulk_config, stream_bulk_api) stream = _generate_stream(stream_name, stream_config, stream_api) else: - stream_config, stream_api = ( - (stream_rest_config_without_start_date, stream_rest_api) - if api_type == "REST" - else (stream_bulk_config_without_start_date, stream_bulk_api) - ) - stream = _generate_stream(stream_name, stream_config, stream_api) + stream = _generate_stream(stream_name, stream_config_without_start_date, stream_api) assert stream.start_date == expected_start_date -def test_stream_start_date_should_be_converted_to_datetime_format(stream_rest_config_date_format, stream_rest_api): - stream: IncrementalSalesforceStream = _generate_stream("ActiveFeatureLicenseMetric", stream_rest_config_date_format, stream_rest_api) +def test_stream_start_date_should_be_converted_to_datetime_format(stream_config_date_format, stream_api): + stream: IncrementalSalesforceStream = _generate_stream("ActiveFeatureLicenseMetric", stream_config_date_format, stream_api) assert stream.start_date == "2010-01-18T00:00:00Z" -def test_stream_start_datetime_format_should_not_changed(stream_rest_config, stream_rest_api): - stream: IncrementalSalesforceStream = _generate_stream("ActiveFeatureLicenseMetric", stream_rest_config, stream_rest_api) +def test_stream_start_datetime_format_should_not_changed(stream_config, stream_api): + stream: IncrementalSalesforceStream = _generate_stream("ActiveFeatureLicenseMetric", stream_config, stream_api) assert stream.start_date == "2010-01-18T21:18:20Z" -def test_download_data_filter_null_bytes(stream_bulk_config, stream_bulk_api): +def test_download_data_filter_null_bytes(stream_config, stream_api): job_full_url: str = "https://fase-account.salesforce.com/services/data/v52.0/jobs/query/7504W00000bkgnpQAA" - stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_bulk_config, stream_bulk_api) + stream: BulkIncrementalSalesforceStream = _generate_stream("Account", stream_config, stream_api) with requests_mock.Mocker() as m: m.register_uri("GET", f"{job_full_url}/results", content=b"\x00") @@ -327,9 +293,9 @@ def test_download_data_filter_null_bytes(stream_bulk_config, stream_bulk_api): ), ], ) -def test_discover_with_streams_criteria_param(streams_criteria, predicted_filtered_streams, stream_rest_config): - updated_config = {**stream_rest_config, **{"streams_criteria": streams_criteria}} - sf_object = Salesforce(**stream_rest_config) +def test_discover_with_streams_criteria_param(streams_criteria, predicted_filtered_streams, stream_config): + updated_config = {**stream_config, **{"streams_criteria": streams_criteria}} + sf_object = Salesforce(**stream_config) sf_object.login = Mock() sf_object.access_token = Mock() sf_object.instance_url = "https://fase-account.salesforce.com" @@ -351,8 +317,11 @@ def test_discover_with_streams_criteria_param(streams_criteria, predicted_filter assert sorted(filtered_streams) == sorted(predicted_filtered_streams) -def test_pagination_rest(stream_rest_config, stream_rest_api): - stream: SalesforceStream = _generate_stream("Account", stream_rest_config, stream_rest_api) +def test_pagination_rest(stream_config, stream_api): + stream_name = "ActiveFeatureLicenseMetric" + state = {stream_name: {"SystemModstamp": "2122-08-22T05:08:29.000Z"}} + + stream: SalesforceStream = _generate_stream(stream_name, stream_config, stream_api, state=state) stream._wait_timeout = 0.1 # maximum wait timeout will be 6 seconds next_page_url = "/services/data/v52.0/query/012345" with requests_mock.Mocker() as m: diff --git a/docs/integrations/sources/salesforce.md b/docs/integrations/sources/salesforce.md index 278eca3b9b5ae..f52bed6b32de2 100644 --- a/docs/integrations/sources/salesforce.md +++ b/docs/integrations/sources/salesforce.md @@ -737,6 +737,7 @@ List of available streams: | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:--------------------------------------------------------------------------| +| 0.1.17 | 2022-01-19 | [9302](https://github.com/airbytehq/airbyte/pull/9302) | Deprecate API Type parameter | | 0.1.16 | 2022-01-18 | [9151](https://github.com/airbytehq/airbyte/pull/9151) | Fix pagination in REST API streams | | 0.1.15 | 2022-01-11 | [9409](https://github.com/airbytehq/airbyte/pull/9409) | Correcting the presence of an extra `else` handler in the error handling | | 0.1.14 | 2022-01-11 | [9386](https://github.com/airbytehq/airbyte/pull/9386) | Handling 400 error, while `sobject` doesn't support `query` or `queryAll` requests | From 38a5d846119053bc965e35b86a4a733844d0f75f Mon Sep 17 00:00:00 2001 From: Tim Roes Date: Wed, 19 Jan 2022 11:47:40 +0100 Subject: [PATCH 150/215] =?UTF-8?q?=F0=9F=90=9B=20Prevent=20SVG=20icons=20?= =?UTF-8?q?from=20leaking=20into=20DOM=20(#9574)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- airbyte-webapp/src/utils/imageUtils.tsx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/airbyte-webapp/src/utils/imageUtils.tsx b/airbyte-webapp/src/utils/imageUtils.tsx index 7dcb5671c8efa..f64222ada8df7 100644 --- a/airbyte-webapp/src/utils/imageUtils.tsx +++ b/airbyte-webapp/src/utils/imageUtils.tsx @@ -3,12 +3,9 @@ import styled from "styled-components"; import { DefaultLogoCatalog } from "components"; -const IconContainer = styled.div` +const IconContainer = styled.img` height: 100%; - & > svg { - height: 100%; - width: 100%; - } + width: 100%; `; const IconDefaultContainer = styled.div` @@ -24,5 +21,10 @@ export const getIcon = (icon?: string): React.ReactNode => { ); } - return ; + return ( + + ); }; From cb8644e2d509c62ad31fed37977618f04c4fac2e Mon Sep 17 00:00:00 2001 From: Tim Roes Date: Wed, 19 Jan 2022 12:11:07 +0100 Subject: [PATCH 151/215] Remove empty helm workflow (#9592) --- .github/workflows/helm.yaml | 74 ------------------------------------- 1 file changed, 74 deletions(-) delete mode 100644 .github/workflows/helm.yaml diff --git a/.github/workflows/helm.yaml b/.github/workflows/helm.yaml deleted file mode 100644 index 8aa51fd9ead63..0000000000000 --- a/.github/workflows/helm.yaml +++ /dev/null @@ -1,74 +0,0 @@ -#name: Helm -#on: -# push: -# paths: -# - ".github/workflows/helm.yaml" -# - "charts/**" -# pull_request: -# paths: -# - ".github/workflows/helm.yaml" -# - "charts/**" -#jobs: -# lint: -# name: Lint -# runs-on: ubuntu-latest -# timeout-minutes: 20 -# steps: -# - uses: actions/checkout@v2 -# - name: Setup Kubectl -# uses: azure/setup-kubectl@v1 -# - name: Setup Helm -# uses: azure/setup-helm@v1 -# with: -# version: "3.6.3" -# - name: Lint Chart -# working-directory: ./charts/airbyte -# run: ./ci.sh lint -# -# generate-docs: -# name: Generate Docs Parameters -# runs-on: ubuntu-latest -# timeout-minutes: 10 -# steps: -# - uses: actions/checkout@v2 -# - name: Checkout bitnami-labs/readme-generator-for-helm -# uses: actions/checkout@v2 -# with: -# repository: "bitnami-labs/readme-generator-for-helm" -# ref: "55cab5dd2191c4ffa7245cfefa428d4d9bb12730" -# path: readme-generator-for-helm -# - name: Install readme-generator-for-helm dependencies -# working-directory: readme-generator-for-helm -# run: npm install -g -# - name: Test can update README with generated parameters -# working-directory: charts/airbyte -# run: echo Temporarily disabled ./ci.sh check-docs-updated -# -# install: -# name: Install -# runs-on: ubuntu-latest -# timeout-minutes: 20 -# steps: -# - uses: actions/checkout@v2 -# - name: Setup Kubectl -# uses: azure/setup-kubectl@v1 -# - name: Setup Helm -# uses: azure/setup-helm@v1 -# with: -# version: "3.6.3" -# - name: Setup Kind Cluster -# uses: helm/kind-action@v1.2.0 -# with: -# version: "v0.11.1" -# image: "kindest/node:v1.21.1" -# - name: Install airbyte chart -# working-directory: ./charts/airbyte -# run: ./ci.sh install -# - if: always() -# name: Print diagnostics -# working-directory: ./charts/airbyte -# run: ./ci.sh diagnostics -# - if: success() -# name: Test airbyte chart -# working-directory: ./charts/airbyte -# run: ./ci.sh test From a1e2b6383d4e2e911adff57052d715101ef33474 Mon Sep 17 00:00:00 2001 From: ycherniaiev <94798230+ycherniaiev@users.noreply.github.com> Date: Wed, 19 Jan 2022 13:16:15 +0200 Subject: [PATCH 152/215] Update fields in source-connectors specifications: google-ads (#9149) Signed-off-by: Sergey Chvalyuk Co-authored-by: Serhii Chvaliuk --- .../253487c0-2246-43ba-a21f-5116b20a2c50.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 38 +++++++++---------- .../connectors/source-google-ads/Dockerfile | 2 +- .../source_google_ads/spec.json | 22 +++++------ docs/integrations/sources/google-ads.md | 1 + 6 files changed, 34 insertions(+), 33 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/253487c0-2246-43ba-a21f-5116b20a2c50.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/253487c0-2246-43ba-a21f-5116b20a2c50.json index 72f16d6fba47c..29173d341d50e 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/253487c0-2246-43ba-a21f-5116b20a2c50.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/253487c0-2246-43ba-a21f-5116b20a2c50.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "253487c0-2246-43ba-a21f-5116b20a2c50", "name": "Google Ads", "dockerRepository": "airbyte/source-google-ads", - "dockerImageTag": "0.1.20", + "dockerImageTag": "0.1.21", "documentationUrl": "https://docs.airbyte.io/integrations/sources/google-ads", "icon": "google-adwords.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 118ba80d73046..17c997cb072fb 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -238,7 +238,7 @@ - name: Google Ads sourceDefinitionId: 253487c0-2246-43ba-a21f-5116b20a2c50 dockerRepository: airbyte/source-google-ads - dockerImageTag: 0.1.20 + dockerImageTag: 0.1.21 documentationUrl: https://docs.airbyte.io/integrations/sources/google-ads icon: google-adwords.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index e4d51db2ea2f3..ea4564ac06a62 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2151,9 +2151,9 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-google-ads:0.1.20" +- dockerImage: "airbyte/source-google-ads:0.1.21" spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/google-ads" + documentationUrl: "https://docs.airbyte.com/integrations/sources/google-ads" connectionSpecification: $schema: "http://json-schema.org/draft-07/schema#" title: "Google Ads Spec" @@ -2178,44 +2178,42 @@ type: "string" title: "Developer Token" description: "Developer token granted by Google to use their APIs. More\ - \ instruction on how to find this value in our docs" airbyte_secret: true client_id: type: "string" - title: "Client Id" - description: "Google client id. More instruction on how to find this\ - \ value in our docs" client_secret: type: "string" title: "Client Secret" - description: "Google client secret. More instruction on how to find\ - \ this value in our docs" airbyte_secret: true access_token: type: "string" title: "Access Token" - description: "Access token generated using developer_token, oauth_client_id,\ - \ and oauth_client_secret. More instruction on how to find this value\ - \ in our docs" airbyte_secret: true refresh_token: type: "string" title: "Refresh Token" - description: "Refresh token generated using developer_token, oauth_client_id,\ - \ and oauth_client_secret. More instruction on how to find this value\ - \ in our docs" airbyte_secret: true customer_id: title: "Customer ID" type: "string" - description: "Customer id must be specified as a 10-digit number without\ + description: "Customer ID must be specified as a 10-digit number without\ \ dashes. More instruction on how to find this value in our docs. Metrics streams like AdGroupAdReport cannot be requested for\ \ a manager account." order: 1 @@ -2237,7 +2235,7 @@ properties: query: type: "string" - title: "Custom query" + title: "Custom Query" description: "A custom defined GAQL query for building the report.\ \ Should not contain segments.date expression as it used by incremental\ \ streams" @@ -2246,7 +2244,7 @@ \ FROM campaign WHERE campaign.status = 'PAUSED'" table_name: type: "string" - title: "Destination table name" + title: "Destination Table Name" description: "The table name in your destination database for choosen\ \ query." login_customer_id: @@ -2261,7 +2259,9 @@ conversion_window_days: title: "Conversion Window (Optional)" type: "integer" - description: "Define the historical replication lookback window in days" + description: "A conversion window is the period of time after an ad interaction\ + \ (such as an ad click or video view) during which a conversion, such\ + \ as a purchase, is recorded in Google Ads." minimum: 0 maximum: 1095 default: 14 diff --git a/airbyte-integrations/connectors/source-google-ads/Dockerfile b/airbyte-integrations/connectors/source-google-ads/Dockerfile index 69fc31985ff76..68d3e2e289084 100644 --- a/airbyte-integrations/connectors/source-google-ads/Dockerfile +++ b/airbyte-integrations/connectors/source-google-ads/Dockerfile @@ -13,5 +13,5 @@ RUN pip install . ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.20 +LABEL io.airbyte.version=0.1.21 LABEL io.airbyte.name=airbyte/source-google-ads diff --git a/airbyte-integrations/connectors/source-google-ads/source_google_ads/spec.json b/airbyte-integrations/connectors/source-google-ads/source_google_ads/spec.json index b4562f6d3674c..6b156bae62cdb 100644 --- a/airbyte-integrations/connectors/source-google-ads/source_google_ads/spec.json +++ b/airbyte-integrations/connectors/source-google-ads/source_google_ads/spec.json @@ -1,5 +1,5 @@ { - "documentationUrl": "https://docs.airbyte.io/integrations/sources/google-ads", + "documentationUrl": "https://docs.airbyte.com/integrations/sources/google-ads", "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Google Ads Spec", @@ -21,30 +21,30 @@ "developer_token": { "type": "string", "title": "Developer Token", - "description": "Developer token granted by Google to use their APIs. More instruction on how to find this value in our docs", + "description": "Developer token granted by Google to use their APIs. More instruction on how to find this value in our docs", "airbyte_secret": true }, "client_id": { "type": "string", - "title": "Client Id", - "description": "Google client id. More instruction on how to find this value in our docs" + "title": "Client ID", + "description": "The Client ID of your Google Ads developer application. More instruction on how to find this value in our docs" }, "client_secret": { "type": "string", "title": "Client Secret", - "description": "Google client secret. More instruction on how to find this value in our docs", + "description": "The Client Secret of your Google Ads developer application. More instruction on how to find this value in our docs", "airbyte_secret": true }, "access_token": { "type": "string", "title": "Access Token", - "description": "Access token generated using developer_token, oauth_client_id, and oauth_client_secret. More instruction on how to find this value in our docs", + "description": "Access Token for making authenticated requests. More instruction on how to find this value in our docs", "airbyte_secret": true }, "refresh_token": { "type": "string", "title": "Refresh Token", - "description": "Refresh token generated using developer_token, oauth_client_id, and oauth_client_secret. More instruction on how to find this value in our docs", + "description": "The token for obtaining a new access token. More instruction on how to find this value in our docs", "airbyte_secret": true } } @@ -52,7 +52,7 @@ "customer_id": { "title": "Customer ID", "type": "string", - "description": "Customer id must be specified as a 10-digit number without dashes. More instruction on how to find this value in our docs. Metrics streams like AdGroupAdReport cannot be requested for a manager account.", + "description": "Customer ID must be specified as a 10-digit number without dashes. More instruction on how to find this value in our docs. Metrics streams like AdGroupAdReport cannot be requested for a manager account.", "order": 1 }, "start_date": { @@ -72,7 +72,7 @@ "properties": { "query": { "type": "string", - "title": "Custom query", + "title": "Custom Query", "description": "A custom defined GAQL query for building the report. Should not contain segments.date expression as it used by incremental streams", "examples": [ "SELECT segments.ad_destination_type, campaign.advertising_channel_sub_type FROM campaign WHERE campaign.status = 'PAUSED'" @@ -80,7 +80,7 @@ }, "table_name": { "type": "string", - "title": "Destination table name", + "title": "Destination Table Name", "description": "The table name in your destination database for choosen query." } } @@ -95,7 +95,7 @@ "conversion_window_days": { "title": "Conversion Window (Optional)", "type": "integer", - "description": "Define the historical replication lookback window in days", + "description": "A conversion window is the period of time after an ad interaction (such as an ad click or video view) during which a conversion, such as a purchase, is recorded in Google Ads.", "minimum": 0, "maximum": 1095, "default": 14, diff --git a/docs/integrations/sources/google-ads.md b/docs/integrations/sources/google-ads.md index 7a0db50ec0166..840746766ab2c 100644 --- a/docs/integrations/sources/google-ads.md +++ b/docs/integrations/sources/google-ads.md @@ -102,6 +102,7 @@ This source is constrained by whatever API limits are set for the Google Ads tha | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| `0.1.21` | 2021-12-28 | [9149](https://github.com/airbytehq/airbyte/pull/9149) | Update title and description | | `0.1.20` | 2021-12-22 | [9071](https://github.com/airbytehq/airbyte/pull/9071) | Fix: Keyword schema enum | | `0.1.19` | 2021-12-14 | [8431](https://github.com/airbytehq/airbyte/pull/8431) | Add new streams: Geographic and Keyword | | `0.1.18` | 2021-12-09 | [8225](https://github.com/airbytehq/airbyte/pull/8225) | Include time_zone to sync. Remove streams for manager account. | From df4b4d645758a15ea942cd47521c8f6a8e00a6d1 Mon Sep 17 00:00:00 2001 From: ycherniaiev <94798230+ycherniaiev@users.noreply.github.com> Date: Wed, 19 Jan 2022 14:11:30 +0200 Subject: [PATCH 153/215] Update fields in source-connectors specifications: google-analytics-v4 (#9165) Signed-off-by: Sergey Chvalyuk Co-authored-by: Sergey Chvalyuk --- .../eff3616a-f9c3-11eb-9a03-0242ac130003.json | 2 +- .../main/resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 17 +++++++++-------- .../source-google-analytics-v4/Dockerfile | 2 +- .../source_google_analytics_v4/spec.json | 11 ++++++----- .../integrations/sources/google-analytics-v4.md | 1 + 6 files changed, 19 insertions(+), 16 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eff3616a-f9c3-11eb-9a03-0242ac130003.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eff3616a-f9c3-11eb-9a03-0242ac130003.json index ae410771e25ae..cfa492ef70a79 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eff3616a-f9c3-11eb-9a03-0242ac130003.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eff3616a-f9c3-11eb-9a03-0242ac130003.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "eff3616a-f9c3-11eb-9a03-0242ac130003", "name": "Google Analytics v4", "dockerRepository": "airbyte/source-google-analytics-v4", - "dockerImageTag": "0.1.13", + "dockerImageTag": "0.1.15", "documentationUrl": "https://docs.airbyte.io/integrations/sources/google-analytics-v4", "icon": "google-analytics.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 17c997cb072fb..1f82f0b7a6f01 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -245,7 +245,7 @@ - name: Google Analytics sourceDefinitionId: eff3616a-f9c3-11eb-9a03-0242ac130003 dockerRepository: airbyte/source-google-analytics-v4 - dockerImageTag: 0.1.14 + dockerImageTag: 0.1.15 documentationUrl: https://docs.airbyte.io/integrations/sources/google-analytics-v4 icon: google-analytics.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index ea4564ac06a62..34f1f168c55b5 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2283,7 +2283,7 @@ oauthFlowOutputParameters: - - "access_token" - - "refresh_token" -- dockerImage: "airbyte/source-google-analytics-v4:0.1.14" +- dockerImage: "airbyte/source-google-analytics-v4:0.1.15" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/google-analytics-v4" connectionSpecification: @@ -2306,7 +2306,8 @@ order: 1 type: "string" title: "Start Date" - description: "A date in the format YYYY-MM-DD." + description: "The date in the format YYYY-MM-DD. Any data before this date\ + \ will not be replicated." examples: - "2020-06-01" custom_reports: @@ -2338,24 +2339,23 @@ client_id: title: "Client ID" type: "string" - description: "The Client ID of your developer application" + description: "The Client ID of your Google Analytics developer application." airbyte_secret: true client_secret: title: "Client Secret" type: "string" - description: "The client secret of your developer application" + description: "The Client Secret of your Google Analytics developer\ + \ application." airbyte_secret: true refresh_token: title: "Refresh Token" type: "string" - description: "A refresh token generated using the above client ID\ - \ and secret" + description: "The token for obtaining a new access token." airbyte_secret: true access_token: title: "Access Token" type: "string" - description: "A access token generated using the above client ID,\ - \ secret and refresh_token" + description: "Access Token for making authenticated requests." airbyte_secret: true - type: "object" title: "Service Account Key Authentication" @@ -2370,6 +2370,7 @@ default: "Service" order: 0 credentials_json: + title: "JSON credentials" type: "string" description: "The JSON key of the service account to use for authorization" examples: diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/Dockerfile b/airbyte-integrations/connectors/source-google-analytics-v4/Dockerfile index f98cdc1908797..d1215de0e6410 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/Dockerfile +++ b/airbyte-integrations/connectors/source-google-analytics-v4/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.14 +LABEL io.airbyte.version=0.1.15 LABEL io.airbyte.name=airbyte/source-google-analytics-v4 diff --git a/airbyte-integrations/connectors/source-google-analytics-v4/source_google_analytics_v4/spec.json b/airbyte-integrations/connectors/source-google-analytics-v4/source_google_analytics_v4/spec.json index 411f4dc74a830..2aeb998bc2ad8 100644 --- a/airbyte-integrations/connectors/source-google-analytics-v4/source_google_analytics_v4/spec.json +++ b/airbyte-integrations/connectors/source-google-analytics-v4/source_google_analytics_v4/spec.json @@ -17,7 +17,7 @@ "order": 1, "type": "string", "title": "Start Date", - "description": "A date in the format YYYY-MM-DD.", + "description": "The date in the format YYYY-MM-DD. Any data before this date will not be replicated.", "examples": ["2020-06-01"] }, "custom_reports": { @@ -46,25 +46,25 @@ "client_id": { "title": "Client ID", "type": "string", - "description": "The Client ID of your developer application", + "description": "The Client ID of your Google Analytics developer application.", "airbyte_secret": true }, "client_secret": { "title": "Client Secret", "type": "string", - "description": "The client secret of your developer application", + "description": "The Client Secret of your Google Analytics developer application.", "airbyte_secret": true }, "refresh_token": { "title": "Refresh Token", "type": "string", - "description": "A refresh token generated using the above client ID and secret", + "description": "The token for obtaining a new access token.", "airbyte_secret": true }, "access_token": { "title": "Access Token", "type": "string", - "description": "A access token generated using the above client ID, secret and refresh_token", + "description": "Access Token for making authenticated requests.", "airbyte_secret": true } } @@ -82,6 +82,7 @@ "order": 0 }, "credentials_json": { + "title": "JSON credentials", "type": "string", "description": "The JSON key of the service account to use for authorization", "examples": [ diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index a68f12dc2f6c9..60d14991a4094 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -132,6 +132,7 @@ The Google Analytics connector should not run into Google Analytics API limitati | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.15 | 2021-12-28 | [9165](https://github.com/airbytehq/airbyte/pull/9165) | Update titles and descriptions | | 0.1.14 | 2021-12-09 | [8656](https://github.com/airbytehq/airbyte/pull/8656) | Fix date-format in schemas | | 0.1.13 | 2021-12-09 | [8676](https://github.com/airbytehq/airbyte/pull/8676) | Fix `window_in_days` validation issue | | 0.1.12 | 2021-12-03 | [8175](https://github.com/airbytehq/airbyte/pull/8175) | Fix validation of unknown metric(s) or dimension(s) error | From 89308f71af2beab675399a37a850846a3b168f8a Mon Sep 17 00:00:00 2001 From: oneshcheret <33333155+sashaNeshcheret@users.noreply.github.com> Date: Wed, 19 Jan 2022 15:09:26 +0200 Subject: [PATCH 154/215] Snowflake source: keep session alive to avoid breaking connection (#9567) * Snowflake source: keep session alive to avoid breaking connection * Snowflake source: bump version * Snowflake source: updated seed specs --- .../e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- .../init/src/main/resources/seed/source_specs.yaml | 2 +- airbyte-integrations/connectors/source-snowflake/Dockerfile | 2 +- .../SnowflakeSource.java | 5 +++-- docs/integrations/sources/snowflake.md | 1 + 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json index 9ebdba29dde73..2fccee5cd4e1c 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2", "name": "Snowflake", "dockerRepository": "airbyte/source-snowflake", - "dockerImageTag": "0.1.4", + "dockerImageTag": "0.1.5", "documentationUrl": "https://docs.airbyte.io/integrations/sources/snowflake", "icon": "snowflake.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 1f82f0b7a6f01..30b31d94d8219 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -676,7 +676,7 @@ - name: Snowflake sourceDefinitionId: e2d65910-8c8b-40a1-ae7d-ee2416b2bfa2 dockerRepository: airbyte/source-snowflake - dockerImageTag: 0.1.4 + dockerImageTag: 0.1.5 documentationUrl: https://docs.airbyte.io/integrations/sources/snowflake icon: snowflake.svg sourceType: database diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 34f1f168c55b5..a413b67fed7d6 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6997,7 +6997,7 @@ - - "client_secret" oauthFlowOutputParameters: - - "refresh_token" -- dockerImage: "airbyte/source-snowflake:0.1.4" +- dockerImage: "airbyte/source-snowflake:0.1.5" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/snowflake" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-snowflake/Dockerfile b/airbyte-integrations/connectors/source-snowflake/Dockerfile index 873b0b2d5902e..22356354f9002 100644 --- a/airbyte-integrations/connectors/source-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/source-snowflake/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-snowflake COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.4 +LABEL io.airbyte.version=0.1.5 LABEL io.airbyte.name=airbyte/source-snowflake diff --git a/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java b/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java index 9d6d55db7efe2..95e8f8836c9bf 100644 --- a/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java +++ b/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java @@ -39,14 +39,15 @@ public JsonNode toDatabaseConfig(final JsonNode config) { .put("host", config.get("host").asText()) .put("username", config.get("username").asText()) .put("password", config.get("password").asText()) - .put("connection_properties", String.format("role=%s;warehouse=%s;database=%s;schema=%s;JDBC_QUERY_RESULT_FORMAT=%s;", + .put("connection_properties", String.format("role=%s;warehouse=%s;database=%s;schema=%s;JDBC_QUERY_RESULT_FORMAT=%s;CLIENT_SESSION_KEEP_ALIVE=%s;", config.get("role").asText(), config.get("warehouse").asText(), config.get("database").asText(), config.get("schema").asText(), // Needed for JDK17 - see // https://stackoverflow.com/questions/67409650/snowflake-jdbc-driver-internal-error-fail-to-retrieve-row-count-for-first-arrow - "JSON")) + "JSON", + true)) .build()); } diff --git a/docs/integrations/sources/snowflake.md b/docs/integrations/sources/snowflake.md index 6e03b35ac86df..17a25a3a377e8 100644 --- a/docs/integrations/sources/snowflake.md +++ b/docs/integrations/sources/snowflake.md @@ -75,6 +75,7 @@ Your database user should now be ready for use with Airbyte. | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.5 | 2022-01-19 | [9567](https://github.com/airbytehq/airbyte/pull/9567) | Added parameter for keeping JDBC session alive | | 0.1.4 | 2021-12-30 | [9203](https://github.com/airbytehq/airbyte/pull/9203) | Update connector fields title/description | | 0.1.3 | 2021-01-11 | [9304](https://github.com/airbytehq/airbyte/pull/9304) | Upgrade version of JDBC driver | | 0.1.2 | 2021-10-21 | [7257](https://github.com/airbytehq/airbyte/pull/7257) | Fixed parsing of extreme values for FLOAT and NUMBER data types | From 6990cc73bab05b878470a8876a1b1642294d649a Mon Sep 17 00:00:00 2001 From: Serhii Lazebnyi <53845333+lazebnyi@users.noreply.github.com> Date: Wed, 19 Jan 2022 15:26:38 +0200 Subject: [PATCH 155/215] =?UTF-8?q?=F0=9F=90=9B=20Source=20Intercom:=20Fix?= =?UTF-8?q?=20handling=20of=20scroll=20param=20when=20it=20expired=20(#951?= =?UTF-8?q?3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add handling of scroll param when it expired * Updated PR number * Fix typo in docs * Add unittest * Updated scroll or standard switch mechanism * Updated to linters * Updated spec.yaml and defenitions --- .../d8313939-3782-41b0-be29-b3ca20d8dd3a.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-intercom/Dockerfile | 2 +- .../source-intercom/source_intercom/source.py | 24 +++++++++++---- .../source-intercom/unit_tests/unit_test.py | 30 +++++++++++++++++++ docs/integrations/sources/intercom.md | 5 ++-- 7 files changed, 55 insertions(+), 12 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d8313939-3782-41b0-be29-b3ca20d8dd3a.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d8313939-3782-41b0-be29-b3ca20d8dd3a.json index af8ad0f23e640..3e1baf8dd0463 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d8313939-3782-41b0-be29-b3ca20d8dd3a.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d8313939-3782-41b0-be29-b3ca20d8dd3a.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "d8313939-3782-41b0-be29-b3ca20d8dd3a", "name": "Intercom", "dockerRepository": "airbyte/source-intercom", - "dockerImageTag": "0.1.11", + "dockerImageTag": "0.1.13", "documentationUrl": "https://docs.airbyte.io/integrations/sources/intercom", "icon": "intercom.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 30b31d94d8219..1958801c835d0 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -321,7 +321,7 @@ - name: Intercom sourceDefinitionId: d8313939-3782-41b0-be29-b3ca20d8dd3a dockerRepository: airbyte/source-intercom - dockerImageTag: 0.1.12 + dockerImageTag: 0.1.13 documentationUrl: https://docs.airbyte.io/integrations/sources/intercom icon: intercom.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index a413b67fed7d6..cf7cca7be7980 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3134,7 +3134,7 @@ oauthFlowInitParameters: [] oauthFlowOutputParameters: - - "access_token" -- dockerImage: "airbyte/source-intercom:0.1.12" +- dockerImage: "airbyte/source-intercom:0.1.13" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/intercom" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-intercom/Dockerfile b/airbyte-integrations/connectors/source-intercom/Dockerfile index 7cea41697a667..2ae375afc8c42 100644 --- a/airbyte-integrations/connectors/source-intercom/Dockerfile +++ b/airbyte-integrations/connectors/source-intercom/Dockerfile @@ -35,5 +35,5 @@ COPY source_intercom ./source_intercom ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.12 +LABEL io.airbyte.version=0.1.13 LABEL io.airbyte.name=airbyte/source-intercom diff --git a/airbyte-integrations/connectors/source-intercom/source_intercom/source.py b/airbyte-integrations/connectors/source-intercom/source_intercom/source.py index 862b06985530a..8f99e9f473a51 100755 --- a/airbyte-integrations/connectors/source-intercom/source_intercom/source.py +++ b/airbyte-integrations/connectors/source-intercom/source_intercom/source.py @@ -171,6 +171,7 @@ class EndpointType(Enum): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._backoff_count = 0 + self._use_standard = False self._endpoint_type = self.EndpointType.scroll self._total_count = None # uses for saving of a total_count value once @@ -193,6 +194,9 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, return super().next_page_token(response) return None + def need_use_standard(self): + return not self.can_use_scroll() or self._use_standard + def can_use_scroll(self): """Check backoff count""" return self._backoff_count <= 3 @@ -202,38 +206,46 @@ def path(self, **kwargs) -> str: @classmethod def check_exists_scroll(cls, response: requests.Response) -> bool: - if response.status_code == 400: + if response.status_code in [400, 404]: # example response: # {..., "errors": [{'code': 'scroll_exists', 'message': 'scroll already exists for this workspace'}]} + # {..., "errors": [{'code': 'not_found', 'message':'scroll parameter not found'}]} err_body = response.json()["errors"][0] - if err_body["code"] == "scroll_exists": + if err_body["code"] in ["scroll_exists", "not_found"]: return True return False @property def raise_on_http_errors(self) -> bool: - if not self.can_use_scroll() and self._endpoint_type == self.EndpointType.scroll: + if self.need_use_standard() and self._endpoint_type == self.EndpointType.scroll: return False return True def stream_slices(self, sync_mode, **kwargs) -> Iterable[Optional[Mapping[str, any]]]: yield None - if not self.can_use_scroll(): + if self.need_use_standard(): self._endpoint_type = self.EndpointType.standard yield None def should_retry(self, response: requests.Response) -> bool: if self.check_exists_scroll(response): self._backoff_count += 1 - if not self.can_use_scroll(): - self.logger.error("Can't create a new scroll request within an minute. " "Let's try to use a standard non-scroll endpoint.") + if self.need_use_standard(): + self.logger.error( + "Can't create a new scroll request within an minute or scroll param was expired. " + "Let's try to use a standard non-scroll endpoint." + ) return False return True return super().should_retry(response) def backoff_time(self, response: requests.Response) -> Optional[float]: + if response.status_code == 404: + self._use_standard = True + # Need return value greater than zero to use UserDefinedBackoffException class + return 0.01 if self.check_exists_scroll(response): self.logger.warning("A previous scroll request is exists. " "It must be deleted within an minute automatically") # try to check 3 times diff --git a/airbyte-integrations/connectors/source-intercom/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-intercom/unit_tests/unit_test.py index 3690bce61b080..40b0cfc2eb911 100644 --- a/airbyte-integrations/connectors/source-intercom/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-intercom/unit_tests/unit_test.py @@ -5,6 +5,7 @@ import pytest import requests +from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams.http.auth import NoAuth from source_intercom.source import Companies, Contacts, IntercomStream @@ -46,3 +47,32 @@ def test_get_next_page_token(intercom_class, response_json, expected_output_toke test = intercom_class(authenticator=NoAuth).next_page_token(response) assert test == expected_output_token + + +def test_switch_to_standard_endpoint_if_scroll_expired(requests_mock): + """ + Test shows that if scroll param expired we try sync with standard API. + """ + + url = "https://api.intercom.io/companies/scroll" + requests_mock.get( + url, + json={"type": "company.list", "data": [{"type": "company", "id": "530370b477ad7120001d"}], "scroll_param": "expired_scroll_param"}, + ) + + url = "https://api.intercom.io/companies/scroll?scroll_param=expired_scroll_param" + requests_mock.get(url, json={"errors": [{"code": "not_found", "message": "scroll parameter not found"}]}, status_code=404) + + url = "https://api.intercom.io/companies" + requests_mock.get(url, json={"type": "company.list", "data": [{"type": "company", "id": "530370b477ad7120001d"}]}) + + stream1 = Companies(authenticator=NoAuth()) + + records = [] + + assert stream1._endpoint_type == Companies.EndpointType.scroll + + for slice in stream1.stream_slices(sync_mode=SyncMode.full_refresh): + records += list(stream1.read_records(sync_mode=SyncMode, stream_slice=slice)) + + assert stream1._endpoint_type == Companies.EndpointType.standard diff --git a/docs/integrations/sources/intercom.md b/docs/integrations/sources/intercom.md index 0d01b412efddc..537dba6b0a3f5 100644 --- a/docs/integrations/sources/intercom.md +++ b/docs/integrations/sources/intercom.md @@ -55,9 +55,10 @@ Please read [How to get your Access Token](https://developers.intercom.com/build | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.13 | 2022-01-14 | [9513](https://github.com/airbytehq/airbyte/pull/9513) | Added handling of scroll param when it expired | | 0.1.12 | 2021-12-14 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Updated fields and descriptions | | 0.1.11 | 2021-12-13 | [8685](https://github.com/airbytehq/airbyte/pull/8685) | Remove time.sleep for rate limit | -| 0.1.10 | 2021-12-10 | [8637](https://github.com/airbytehq/airbyte/pull/8637) | Fix 'conversations' order and sorting. Correction of the companies stream| +| 0.1.10 | 2021-12-10 | [8637](https://github.com/airbytehq/airbyte/pull/8637) | Fix 'conversations' order and sorting. Correction of the companies stream | | 0.1.9 | 2021-12-03 | [8395](https://github.com/airbytehq/airbyte/pull/8395) | Fix backoff of 'companies' stream | | 0.1.8 | 2021-11-09 | [7060](https://github.com/airbytehq/airbyte/pull/7060) | Added oauth support | | 0.1.7 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | @@ -67,4 +68,4 @@ Please read [How to get your Access Token](https://developers.intercom.com/build | 0.1.3 | 2021-09-08 | [5908](https://github.com/airbytehq/airbyte/pull/5908) | Corrected timestamp and arrays in schemas | | 0.1.2 | 2021-08-19 | [5531](https://github.com/airbytehq/airbyte/pull/5531) | Corrected pagination | | 0.1.1 | 2021-07-31 | [5123](https://github.com/airbytehq/airbyte/pull/5123) | Corrected rate limit | -| 0.1.0 | 2021-07-19 | [4676](https://github.com/airbytehq/airbyte/pull/4676) | Release Slack CDK Connector | +| 0.1.0 | 2021-07-19 | [4676](https://github.com/airbytehq/airbyte/pull/4676) | Release Intercom CDK Connector | From 208c14682d3e49af141aed6ff5de7dbe95597e35 Mon Sep 17 00:00:00 2001 From: andriikorotkov <88329385+andriikorotkov@users.noreply.github.com> Date: Wed, 19 Jan 2022 17:22:21 +0200 Subject: [PATCH 156/215] updated benchmarks databases and scripts (#9597) --- .../MsSqlRdsSourcePerformanceSecretTest.java | 6 +- .../sql/create_mssql_benchmarks.sql | 72 ++++++++++--------- .../MySqlRdsSourcePerformanceSecretTest.java | 6 +- .../sql/create_mysql_benchmarks.sql | 8 +-- .../PostgresRdsSourcePerformanceTest.java | 8 +-- 5 files changed, 51 insertions(+), 49 deletions(-) diff --git a/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MsSqlRdsSourcePerformanceSecretTest.java b/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MsSqlRdsSourcePerformanceSecretTest.java index cb056dd29a5f8..ae5dc98a4f443 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MsSqlRdsSourcePerformanceSecretTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MsSqlRdsSourcePerformanceSecretTest.java @@ -47,9 +47,9 @@ protected void setupDatabase(String dbName) { @BeforeAll public static void beforeAll() { AbstractSourcePerformanceTest.testArgs = Stream.of( - Arguments.of("test1000tables240columns200recordsDb", "dbo", 200, 240, 1000), - Arguments.of("newregular25tables50000records", "dbo", 50000, 8, 25), - Arguments.of("newsmall1000tableswith10000rows", "dbo", 10000, 8, 1000)); + Arguments.of("t1000_c240_r200", "dbo", 200, 240, 1000), + Arguments.of("t25_c8_r50k_s10kb", "dbo", 50000, 8, 25), + Arguments.of("t1000_c8_r10k_s500b", "dbo", 10000, 8, 1000)); } } diff --git a/airbyte-integrations/connectors/source-mssql/src/test-performance/sql/create_mssql_benchmarks.sql b/airbyte-integrations/connectors/source-mssql/src/test-performance/sql/create_mssql_benchmarks.sql index 2c55ca5203c66..e1e0870f27ed4 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-performance/sql/create_mssql_benchmarks.sql +++ b/airbyte-integrations/connectors/source-mssql/src/test-performance/sql/create_mssql_benchmarks.sql @@ -57,8 +57,6 @@ DECLARE @fullloop INT; DECLARE @fullloopcounter INT; SET -@dummyIpsum = '''dummy_ipsum''' -SET @vmax = @allrows; SET @vmaxx = @allrows; @@ -74,8 +72,8 @@ SET @fullloop = 0; SET @fullloopcounter = 0; - -while @vmaxx <= @vmaxoneinsert BEGIN +SET +@dummyIpsum = '''dummy_ipsum''' while @vmaxx <= @vmaxoneinsert BEGIN SET @vmaxoneinsert = @vmaxx; SET @@ -97,7 +95,7 @@ DECLARE @insertTable NVARCHAR(MAX) SET @insertTable = CONVERT( NVARCHAR(MAX), - 'insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longblobfield, timestampfield) values (' + 'insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longtextfield, timestampfield) values (' ); while @counter < @vmaxoneinsert BEGIN @@ -159,7 +157,7 @@ DECLARE @insertTableLasted NVARCHAR(MAX); SET @insertTableLasted = CONVERT( NVARCHAR(MAX), - 'insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longblobfield, timestampfield) values (' + 'insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longtextfield, timestampfield) values (' ); while @lastinsertcounter < @lastinsert BEGIN @@ -231,7 +229,7 @@ CREATE varchar3 VARCHAR(255), varchar4 VARCHAR(255), varchar5 VARCHAR(255), - longblobfield nvarchar(MAX), + longtextfield nvarchar(MAX), timestampfield datetime2(0) ); @@ -242,34 +240,38 @@ DECLARE @smallText NVARCHAR(MAX); DECLARE @regularText NVARCHAR(MAX); DECLARE @largeText NVARCHAR(MAX); + +DECLARE @someText nvarchar(MAX); + +SELECT + @someText = N'some text, some text, '; +SET +@extraSmallText = N'''test weight 50b - some text, some text, some text'''; SET -@extraSmallText = '''test weight 50b - 50b text, 50b text, 50b text''' +@smallText = N'''test weight 500b - '; SET -@smallText = CONCAT( - '''test weight 500b - ', - REPLICATE( - 'some text, some text, ', +@regularText = N'''test weight 10kb - '; +SET +@largeText = N'''test weight 100kb - '; + +SELECT + @smallText = @smallText + REPLICATE( + @someText, 20 - ), - '''' -) -SET -@regularText = CONCAT( - '''test weight 10kb - ', - REPLICATE( - 'some text, some text, some text, some text, ', - 295 - ), - 'some text''' -) -SET -@largeText = CONCAT( - '''test weight 100kb - ', - REPLICATE( - 'some text, some text, some text, some text, ', - 2225 - ), - 'some text''' + )+ N''''; + +SELECT + @regularText = @regularText + REPLICATE( + @someText, + 590 + )+ N'some text'''; + +SELECT + @largeText = @largeText + REPLICATE( + @someText, + 4450 + )+ N'some text'''; + ) -- TODO: change the following @allrows to control the number of records with different sizes -- number of 50B records EXEC insert_rows @allrows = 0, @@ -277,10 +279,10 @@ EXEC insert_rows @allrows = 0, @value = @extraSmallText -- number of 500B records EXEC insert_rows @allrows = 0, @insertcount = 998, -@value = @smallText -- number of 10KB records +@value = @smallText -- number of 10Kb records EXEC insert_rows @allrows = 0, @insertcount = 998, -@value = @regularText -- number of 100KB records +@value = @regularText -- number of 100Kb records EXEC insert_rows @allrows = 0, @insertcount = 98, @value = @largeText @@ -300,4 +302,4 @@ DROP PROCEDURE IF EXISTS table_copy; EXEC sp_rename 'test', -'test_0'; +'test_0'; \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/MySqlRdsSourcePerformanceSecretTest.java b/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/MySqlRdsSourcePerformanceSecretTest.java index 3a1a552a46a2b..6cf5c2fc43cce 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/MySqlRdsSourcePerformanceSecretTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/MySqlRdsSourcePerformanceSecretTest.java @@ -67,9 +67,9 @@ protected void setupDatabase(String dbName) throws Exception { @BeforeAll public static void beforeAll() { AbstractSourcePerformanceTest.testArgs = Stream.of( - Arguments.of("test1000tables240columns200recordsDb", "test1000tables240columns200recordsDb", 200, 240, 1000), - Arguments.of("newregular25tables50000records", "newregular25tables50000records", 50000, 8, 25), - Arguments.of("newsmall1000tableswith10000rows", "newsmall1000tableswith10000rows", 10000, 8, 1000)); + Arguments.of("t1000_c240_r200", "t1000_c240_r200", 200, 240, 1000), + Arguments.of("t25_c8_r50k_s10kb", "t25_c8_r50k_s10kb", 50000, 8, 25), + Arguments.of("t1000_c8_r10k_s500b", "t1000_c8_r10k_s500b", 10000, 8, 1000)); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-performance/sql/create_mysql_benchmarks.sql b/airbyte-integrations/connectors/source-mysql/src/test-performance/sql/create_mysql_benchmarks.sql index f4de7715e15e2..b24773e849771 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-performance/sql/create_mysql_benchmarks.sql +++ b/airbyte-integrations/connectors/source-mysql/src/test-performance/sql/create_mysql_benchmarks.sql @@ -78,7 +78,7 @@ END while; COMMIT; SET -@insertTable = concat('insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longblobfield, timestampfield) values ('); +@insertTable = concat('insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longtextfield, timestampfield) values ('); while @counter < @vmaxoneinsert do SET @@ -141,7 +141,7 @@ END while; COMMIT; SET -@insertTableLasted = concat('insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longblobfield, timestampfield) values ('); +@insertTableLasted = concat('insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longtextfield, timestampfield) values ('); while @lastinsertcounter < @lastinsert do SET @@ -206,7 +206,7 @@ delimiter # CREATE varchar3 VARCHAR(255), varchar4 VARCHAR(255), varchar5 VARCHAR(255), - longblobfield longblob, + longtextfield longtext, timestampfield TIMESTAMP ) engine = innodb; SET @@ -277,4 +277,4 @@ DROP PROCEDURE IF EXISTS table_copy; ALTER TABLE - test RENAME test_0; + test RENAME test_0; \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java index dff446f51cfde..2798dda944136 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java @@ -18,8 +18,8 @@ public class PostgresRdsSourcePerformanceTest extends AbstractSourcePerformanceTest { private static final String PERFORMANCE_SECRET_CREDS = "secrets/performance-config.json"; - private static final List SCHEMAS = List.of("test1000tables240columns200recordsDb", - "newregular25tables50000records", "newsmall1000tableswith10000rows"); + private static final List SCHEMAS = List.of("t1000_c240_r200", + "t25_c8_r50k_s10kb", "t1000_c8_r10k_s500b"); @Override protected String getImageName() { @@ -37,8 +37,8 @@ protected void setupDatabase(String dbName) { config = Jsons.jsonNode(ImmutableMap.builder() .put("host", plainConfig.get("host")) .put("port", plainConfig.get("port")) - .put("database", plainConfig.get("database")) - .put("schemas", SCHEMAS) + .put("database", dbName) + .put("schemas", List.of(dbName)) .put("username", plainConfig.get("username")) .put("password", plainConfig.get("password")) .put("ssl", true) From 1832c69c269e1c9f15c3eb36f7a1bd59a861ab73 Mon Sep 17 00:00:00 2001 From: h-chlor <43164320+h-chlor@users.noreply.github.com> Date: Wed, 19 Jan 2022 18:25:43 +0200 Subject: [PATCH 157/215] Add SonarQube to CI (#8362) --- .github/actions/ci-java-tests/action.yml | 28 ++ .github/actions/ci-py-tests/action.yml | 81 +++++ .github/actions/ci-tests-runner/action.yml | 187 ++++++++++ ...tect-changed-modules-and-build-reports.yml | 42 --- .github/workflows/sonar-scan.yml | 73 ++++ docs/contributing-to-airbyte/README.md | 2 + .../sonar-qube-workflow.md | 34 ++ pyproject.toml | 20 +- .../__init__.py | 0 .../ci_changes_detection/__init__.py | 0 .../ci_changes_detection/main.py | 89 +++++ .../ci_sonar_qube/__init__.py | 12 + .../ci_sonar_qube/log_parsers.py | 312 ++++++++++++++++ tools/ci_code_validator/ci_sonar_qube/main.py | 60 +++ .../ci_sonar_qube/sonar_qube_api.py | 342 ++++++++++++++++++ tools/ci_code_validator/setup.py | 43 +++ tools/ci_code_validator/tests/__init__.py | 0 .../black_smell_package_report.json | 14 + .../isort_smell_package_report.json | 14 + .../mypy_smell_package_report.json | 52 +++ .../simple_files/without_issues_report.json | 1 + .../tests/simple_package/__init__.py | 0 .../tests/simple_package/valid_file.py | 13 + .../tests/simple_smell_package/__init__.py | 0 .../simple_smell_package/invalid_file.py | 15 + .../tests/test_detect_changed_modules.py | 74 ++++ .../tests/test_sq_project.py | 20 + tools/ci_code_validator/tests/test_tools.py | 102 ++++++ tools/ci_credentials/setup.py | 2 +- tools/ci_static_check_reports/__init__.py | 3 - .../__init__.py | 3 - .../main.py | 100 ----- .../ci_detect_changed_modules/main.py | 52 --- tools/ci_static_check_reports/setup.py | 29 -- .../unit_tests/__init__.py | 3 - .../test_build_static_checkers_reports.py | 42 --- .../unit_tests/test_detect_changed_modules.py | 58 --- 37 files changed, 1584 insertions(+), 338 deletions(-) create mode 100644 .github/actions/ci-java-tests/action.yml create mode 100644 .github/actions/ci-py-tests/action.yml create mode 100644 .github/actions/ci-tests-runner/action.yml delete mode 100644 .github/workflows/detect-changed-modules-and-build-reports.yml create mode 100644 .github/workflows/sonar-scan.yml create mode 100644 docs/contributing-to-airbyte/sonar-qube-workflow.md rename tools/{ci_static_check_reports/ci_detect_changed_modules => ci_code_validator}/__init__.py (100%) create mode 100644 tools/ci_code_validator/ci_changes_detection/__init__.py create mode 100644 tools/ci_code_validator/ci_changes_detection/main.py create mode 100644 tools/ci_code_validator/ci_sonar_qube/__init__.py create mode 100644 tools/ci_code_validator/ci_sonar_qube/log_parsers.py create mode 100644 tools/ci_code_validator/ci_sonar_qube/main.py create mode 100644 tools/ci_code_validator/ci_sonar_qube/sonar_qube_api.py create mode 100644 tools/ci_code_validator/setup.py create mode 100644 tools/ci_code_validator/tests/__init__.py create mode 100644 tools/ci_code_validator/tests/simple_files/black_smell_package_report.json create mode 100644 tools/ci_code_validator/tests/simple_files/isort_smell_package_report.json create mode 100644 tools/ci_code_validator/tests/simple_files/mypy_smell_package_report.json create mode 100644 tools/ci_code_validator/tests/simple_files/without_issues_report.json create mode 100644 tools/ci_code_validator/tests/simple_package/__init__.py create mode 100644 tools/ci_code_validator/tests/simple_package/valid_file.py create mode 100644 tools/ci_code_validator/tests/simple_smell_package/__init__.py create mode 100644 tools/ci_code_validator/tests/simple_smell_package/invalid_file.py create mode 100644 tools/ci_code_validator/tests/test_detect_changed_modules.py create mode 100644 tools/ci_code_validator/tests/test_sq_project.py create mode 100644 tools/ci_code_validator/tests/test_tools.py delete mode 100644 tools/ci_static_check_reports/__init__.py delete mode 100644 tools/ci_static_check_reports/ci_build_python_static_checkers_reports/__init__.py delete mode 100644 tools/ci_static_check_reports/ci_build_python_static_checkers_reports/main.py delete mode 100644 tools/ci_static_check_reports/ci_detect_changed_modules/main.py delete mode 100644 tools/ci_static_check_reports/setup.py delete mode 100644 tools/ci_static_check_reports/unit_tests/__init__.py delete mode 100644 tools/ci_static_check_reports/unit_tests/test_build_static_checkers_reports.py delete mode 100644 tools/ci_static_check_reports/unit_tests/test_detect_changed_modules.py diff --git a/.github/actions/ci-java-tests/action.yml b/.github/actions/ci-java-tests/action.yml new file mode 100644 index 0000000000000..b98fa86dcae48 --- /dev/null +++ b/.github/actions/ci-java-tests/action.yml @@ -0,0 +1,28 @@ +name: "Runner CI Java Tests" +description: "Runner CI Java Tests" +inputs: + module-name: + required: true + module-folder: + required: true + +runs: + using: "composite" + steps: + - name: Install Java + uses: actions/setup-java@v1 + with: + java-version: '17' + + - name: "Build" + shell: bash + run: | + rm -rf ${{ inputs.module-folder }}/.venv ${{ inputs.module-folder }}/build + ROOT_DIR=$(git rev-parse --show-toplevel) + ARG=:$(python -c "import os; print(os.path.relpath('${{ inputs.module-folder }}', start='${ROOT_DIR}').replace('/', ':') )") + echo "./gradlew --no-daemon $ARG:build" + ./gradlew --no-daemon "$ARG:clean" + ./gradlew --no-daemon "$ARG:build" + + + diff --git a/.github/actions/ci-py-tests/action.yml b/.github/actions/ci-py-tests/action.yml new file mode 100644 index 0000000000000..c1f50251d5579 --- /dev/null +++ b/.github/actions/ci-py-tests/action.yml @@ -0,0 +1,81 @@ +name: "Runner CI Python Tests" +description: "Runner CI Python Tests" +inputs: + module-name: + required: true + module-folder: + required: true +outputs: + coverage-paths: + description: "Coverage Paths" + value: ${{ steps.build-coverage-reports.outputs.coverage-paths }} + flake8-logs: + description: "Flake8 Logs" + value: ${{ steps.build-linter-reports.outputs.flake8-logs }} + mypy-logs: + description: "MyPy Logs" + value: ${{ steps.build-linter-reports.outputs.mypy-logs }} + black-diff: + description: "Black Diff" + value: ${{ steps.build-linter-reports.outputs.black-diff }} + isort-diff: + description: "Isort Diff" + value: ${{ steps.build-linter-reports.outputs.isort-diff }} +runs: + using: "composite" + steps: + - name: Build Coverage Reports + id: build-coverage-reports + shell: bash + working-directory: ${{ inputs.module-folder }} + run: | + virtualenv .venv + source .venv/bin/activate + JSON_CONFIG='{"module": "${{ inputs.module-name }}", "folder": "${{ inputs.module-folder }}", "lang": "py"}' + pip install coverage[toml]~=6.2 + mkdir -p .venv/source-acceptance-test + mkdir -p reports + SAT_DIR=$(git rev-parse --show-toplevel)/airbyte-integrations/bases/source-acceptance-test + PYPROJECT_CONFIG=$(git rev-parse --show-toplevel)/pyproject.toml + git ls-tree -r HEAD --name-only $SAT_DIR | while read src; do cp -f $src .venv/source-acceptance-test; done + pip install build + python -m build .venv/source-acceptance-test + pip install .venv/source-acceptance-test/dist/source_acceptance_test-*.whl + [ -f requirements.txt ] && pip install --quiet -r requirements.txt + pip install .[tests] + coverage run --rcfile=${PYPROJECT_CONFIG} -m pytest ./unit_tests || true + coverage xml --rcfile=${PYPROJECT_CONFIG} -o reports/coverage.xml || true + + rm -rf .venv + echo "::set-output name=coverage-paths::reports/coverage.xml" + + - name: Build Linter Reports + id: build-linter-reports + shell: bash + working-directory: ${{ inputs.module-folder }} + run: | + JSON_CONFIG='{"module": "${{ inputs.module-name }}", "folder": "${{ inputs.module-folder }}", "lang": "py"}' + REPORT_FOLDER=reports + PYPROJECT_CONFIG=$(git rev-parse --show-toplevel)/pyproject.toml + + # run mypy + pip install lxml~=4.7 mypy~=0.910 . + mypy . --config-file=${PYPROJECT_CONFIG} | tee reports/mypy.log || true + + # run black + pip install black~=21.12b0 + XDG_CACHE_HOME=/dev/null black --config ${PYPROJECT_CONFIG} --diff . | tee reports/black.diff + + # run isort + pip install isort~=5.10.1 + cp ${PYPROJECT_CONFIG} ./pyproject.toml + isort --diff . | tee reports/isort.diff + + # run flake8 + pip install mccabe~=0.6.1 pyproject-flake8~=0.0.1a2 + pflake8 --exit-zero . | grep ^. | tee reports/flake.txt + + echo "::set-output name=mypy-logs::reports/mypy.log" + echo "::set-output name=black-diff::reports/black.diff" + echo "::set-output name=isort-diff::reports/isort.diff" + echo "::set-output name=flake8-logs::reports/flake.txt" diff --git a/.github/actions/ci-tests-runner/action.yml b/.github/actions/ci-tests-runner/action.yml new file mode 100644 index 0000000000000..3fa01cccb84ac --- /dev/null +++ b/.github/actions/ci-tests-runner/action.yml @@ -0,0 +1,187 @@ +name: "Setup CI Tests Env" +description: "Setup CI Tests Env for all module types" +inputs: + module-name: + description: "Unique module name. e.g.: connectors/source-s3, connectors/destination-s3" + required: true + module-folder: + description: "Path to module folder" + required: true + module-lang: + description: "Detected module language. Available values: py, java" + required: true + sonar-gcp-access-key: + required: true + sonar-token: + description: "Access token for using SonarQube API" + required: true + pull-request-id: + description: "Unique PR ID. For example: airbyte/1234" + default: "0" + token: + required: true + remove-sonar-project: + description: "This flag should be used if needed to remove sonar project after using" + default: false +runs: + using: "composite" + steps: + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + + - name: Tests of CI + shell: bash + run: | + # all CI python packages have the prefix "ci_" + pip install --quiet tox==3.24.4 + tox -r -c ./tools/tox_ci.ini + pip install --quiet -e ./tools/ci_* + echo "::echo::off" + + - name: Auth with gcloud CLI + uses: google-github-actions/setup-gcloud@v0 + with: + service_account_key: ${{ inputs.sonar-gcp-access-key }} + project_id: dataline-integration-testing + export_default_credentials: true + + - name: Create IAP tunnel + id: gcloud-tunnel + shell: bash + run: | + while true; do + PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 )) + status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)" + if [ "${status}" != "0" ]; then + echo "$PORT is free to use"; + break; + fi + done + IPS=($(hostname -I)) + LOCAL_IP_PORT="${IPS[0]}:${PORT}" + gcloud compute start-iap-tunnel sonarqube-1-vm 80 --local-host-port=${LOCAL_IP_PORT} --zone=europe-central2-a --project dataline-integration-testing & + echo ::set-output name=pid::$! + echo "::set-output name=sonar-host::http://${LOCAL_IP_PORT}/" + echo "::echo::on" + + - name: Python Tests + id: ci-py-tests + if: ${{ inputs.module-lang == 'py' }} + uses: ./.github/actions/ci-py-tests + with: + module-name: ${{ inputs.module-name }} + module-folder: ${{ inputs.module-folder }} + + - name: Java Tests + id: ci-java-tests + if: ${{ inputs.module-lang == 'java' }} + uses: ./.github/actions/ci-java-tests + with: + module-name: ${{ inputs.module-name }} + module-folder: ${{ inputs.module-folder }} + + + + + + - name: Prepare SQ Options + shell: bash + id: sq-options + working-directory: ${{ inputs.module-folder }} + run: | + REPORT_FOLDER=reports + mkdir -p ${REPORT_FOLDER} + declare -a REPORT_FILES + declare -a OPTIONS + if [ ${{ inputs.module-lang }} == 'py' ]; then + [ -f ${{ steps.ci-py-tests.outputs.mypy-logs }} ] && ci_sonar_qube --mypy_log ${{ steps.ci-py-tests.outputs.mypy-logs }} --output_file ${REPORT_FOLDER}/issues_mypy.json --host ${{ steps.gcloud-tunnel.outputs.sonar-host }} --token ${{ inputs.sonar-token }} + [ -f ${{ steps.ci-py-tests.outputs.mypy-logs }} ] && REPORT_FILES+=(${REPORT_FOLDER}/issues_mypy.json) + + [ -f ${{ steps.ci-py-tests.outputs.black-diff }} ] && ci_sonar_qube --black_diff ${{ steps.ci-py-tests.outputs.black-diff }} --output_file ${REPORT_FOLDER}/issues_black.json --host ${{ steps.gcloud-tunnel.outputs.sonar-host }} --token ${{ inputs.sonar-token }} + [ -f ${{ steps.ci-py-tests.outputs.black-diff }} ] && REPORT_FILES+=(${REPORT_FOLDER}/issues_black.json) + + [ -f ${{ steps.ci-py-tests.outputs.isort-diff }} ] && ci_sonar_qube --isort_diff ${{ steps.ci-py-tests.outputs.isort-diff }} --output_file ${REPORT_FOLDER}/issues_isort.json --host ${{ steps.gcloud-tunnel.outputs.sonar-host }} --token ${{ inputs.sonar-token }} + [ -f ${{ steps.ci-py-tests.outputs.isort-diff }} ] && REPORT_FILES+=(${REPORT_FOLDER}/issues_isort.json) + + [ -f ${{ steps.ci-py-tests.outputs.coverage-paths }} ] && OPTIONS+=("-Dsonar.python.coverage.reportPaths=${{ steps.ci-py-tests.outputs.coverage-paths }}") + [ -f ${{ steps.ci-py-tests.outputs.flake8-logs }} ] && OPTIONS+=("-Dsonar.python.flake8.reportPaths=${{ steps.ci-py-tests.outputs.flake8-logs }}") + fi + if [ ${{ inputs.module-lang }} == 'java' ]; then + [ -d "./src/main/java" ] && OPTIONS+=("-Dsonar.sources=./src/main/java") + [ -d "./src/test/java" ] && OPTIONS+=("-Dsonar.tests=./src/test/java") + [ -d "./build/test-results" ] && OPTIONS+=("-Dsonar.junit.reportsPath=./build/test-results") + [ -f "./build/jacoco/test.exec" ] && OPTIONS+=("-Dsonar.jacoco.reportPaths=./build/jacoco/test.exec") + [ -d "./build/classes/java/main" ] && OPTIONS+=("-Dsonar.java.binaries=./build/classes/java/main") + [ -d "./build/classes/java/test" ] && OPTIONS+=("-Dsonar.test.binaries=./build/classes/java/test") + + fi + + # join the array to string format + echo ::set-output name=external_reports::$(IFS=, ; echo "${REPORT_FILES[*]}") + echo ::set-output name=options::$(IFS=' ' ; echo "${OPTIONS[*]}") + + - name: Create SonarQube Project + shell: bash + id: create-sq-project + run: | + ci_sonar_qube --pr ${{ inputs.pull-request-id }} --create --module ${{ inputs.module-name }} --host ${{ steps.gcloud-tunnel.outputs.sonar-host }} --token ${{ inputs.sonar-token }} + echo "::set-output name=sq_project_name::$(ci_sonar_qube --pr ${{ inputs.pull-request-id }} --print_key --module ${{ inputs.module-name }})" + ROOT_DIR=$(git rev-parse --show-toplevel) + MODULE_DIR=$(python -c "print('${{ inputs.module-folder }}'.replace('${ROOT_DIR}', '.'))") + echo "::set-output name=module_dir::${MODULE_DIR}" + + - name: SonarQube Scan + + uses: sonarsource/sonarqube-scan-action@master + env: + SONAR_TOKEN: ${{ inputs.sonar-token }} + SONAR_HOST_URL: ${{ steps.gcloud-tunnel.outputs.sonar-host }} + with: + projectBaseDir: ${{ steps.create-sq-project.outputs.module_dir }} + args: > + -Dsonar.projectKey=${{ steps.create-sq-project.outputs.sq_project_name }} + -Dsonar.verbose=true + -Dsonar.working.directory=/tmp/scannerwork + -Dsonar.language=${{ inputs.module-lang }} + -Dsonar.sourceEncoding=UTF-8 + -Dsonar.projectBaseDir=${{ steps.create-sq-project.outputs.module_dir }} + -Dsonar.exclusions=reports/**,*.toml + -Dsonar.externalIssuesReportPaths=${{ steps.sq-options.outputs.external_reports }} + ${{ steps.sq-options.outputs.options }} + + - name: Generate SonarQube Report + shell: bash + id: generate-sq-report + run: | + # delay because SQ needs time for processing of all input data + sleep 10 + REPORT_FILE=/tmp/sq_report_$RANDOM.md + ci_sonar_qube --pr ${{ inputs.pull-request-id }} --report ${REPORT_FILE} --module ${{ inputs.module-name }} --host ${{ steps.gcloud-tunnel.outputs.sonar-host }} --token ${{ inputs.sonar-token }} + body="$(cat ${REPORT_FILE})" + body="${body//'%'/'%25'}" + body="${body//$'\n'/'%0A'}" + body="${body//$'\r'/'%0D'}" + echo "::set-output name=sq-report::$body" + + - name: Add Comment + if: ${{ github.event_name == 'pull_request' }} + uses: peter-evans/commit-comment@v1 + with: + body: ${{ steps.generate-sq-report.outputs.sq-report }} + token: ${{ inputs.token }} + + - name: Remove SonarQube Project + if: ${{ inputs.remove-sonar-project == true }} + shell: bash + id: remove-sq-project + run: | + ci_sonar_qube --pr ${{ inputs.pull-request-id }} --remove --module ${{ inputs.module-name }} --host ${{ steps.gcloud-tunnel.outputs.sonar-host }} --token ${{ inputs.sonar-token }} + + - name: Remove IAP tunnel + if: always() + shell: bash + run: | + kill ${{ steps.gcloud-tunnel.outputs.pid }} diff --git a/.github/workflows/detect-changed-modules-and-build-reports.yml b/.github/workflows/detect-changed-modules-and-build-reports.yml deleted file mode 100644 index ca5d9f9e730d2..0000000000000 --- a/.github/workflows/detect-changed-modules-and-build-reports.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: Detect Changed Modules and Build Reports -on: - push: -jobs: - detect-changed-modules: - name: Detect Changed Modules - timeout-minutes: 5 - runs-on: ubuntu-latest - outputs: - changed-modules: ${{ steps.detect-changed-modules.outputs.changed-modules }} - steps: - - name: Checkout Airbyte - uses: actions/checkout@v2 - with: - fetch-depth: 1000 - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: 3.7 - - name: Intall Requirements - run: pip install ./tools/ci_static_check_reports/. - - name: Detect Changed Modules - id: detect-changed-modules - run: | - git fetch - echo "::set-output name=changed-modules::'$(ci_detect_changed_modules $(git diff --name-only $(git merge-base HEAD origin/master)))'" - build-reports: - name: Build Python Static Checkers Reports - needs: - - detect-changed-modules - runs-on: ubuntu-latest - steps: - - name: Checkout Airbyte - uses: actions/checkout@v2 - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: 3.7 - - name: Intall Requirements - run: pip install ./tools/ci_static_check_reports/. - - name: Build Reports - run: ci_build_python_checkers_reports ${{needs.detect-changed-modules.outputs.changed-modules}} diff --git a/.github/workflows/sonar-scan.yml b/.github/workflows/sonar-scan.yml new file mode 100644 index 0000000000000..db57aef0b7f69 --- /dev/null +++ b/.github/workflows/sonar-scan.yml @@ -0,0 +1,73 @@ +name: Sonar Scan +on: + pull_request: + types: [opened, synchronize, reopened, closed, ready_for_review] + +jobs: + + detect-changes: + name: Detect Changed Modules + timeout-minutes: 5 + runs-on: ubuntu-latest + outputs: + changed-modules: ${{ steps.detect-changed-modules.outputs.changed-modules }} + steps: + - name: Checkout Airbyte + uses: actions/checkout@v2 + with: + fetch-depth: 1000 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Intall Requirements + run: | + pip install ./tools/ci_common_utils ./tools/ci_code_validator[tests] + pytest ./tools/ci_code_validator + - name: Detect Changed Modules + id: detect-changed-modules + run: | + git fetch + CHANGES=$(ci_changes_detection $(git diff --name-only $(git merge-base HEAD origin/master)) | jq -c .) + echo "::set-output name=changed-modules::{ \"include\": $CHANGES }" + + run-ci-tests: + if: github.event.pull_request.draft == false + needs: detect-changes + name: Tests for ${{ matrix.module }} + runs-on: ubuntu-latest + + strategy: + matrix: ${{fromJson(needs.detect-changes.outputs.changed-modules)}} + env: + MODULE_NAME: ${{ matrix.module }} + MODULE_LANG: ${{ matrix.lang }} + MODULE_FOLDER: ${{ matrix.folder }} + ENV_NAME: "github" + + + steps: + - name: Print Settings + run: | + echo "Module: ${{ env.MODULE_NAME }}, Lang: ${{ env.MODULE_LANG }}, Folder: ${{ env.MODULE_FOLDER }}" + - name: Checkout Airbyte + if: ${{ env.ENV_NAME == 'github' }} + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Run Tests Runner + id: run-python-tests + uses: ./.github/actions/ci-tests-runner + with: + token: ${{ secrets.GITHUB_TOKEN }} + module-name: ${{ env.MODULE_NAME }} + module-folder: ${{ env.MODULE_FOLDER }} + module-lang: ${{ env.MODULE_LANG }} + sonar-token: ${{ secrets.SONAR_TOKEN }} + sonar-gcp-access-key: ${{ secrets.GCP_SONAR_SA_KEY }} + pull-request-id: "${{ github.repository }}/${{ github.event.pull_request.number }}" + remove-sonar-project: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' }} + + + diff --git a/docs/contributing-to-airbyte/README.md b/docs/contributing-to-airbyte/README.md index 650bb3073341f..b38bd1138681a 100644 --- a/docs/contributing-to-airbyte/README.md +++ b/docs/contributing-to-airbyte/README.md @@ -120,3 +120,5 @@ As soon as you are done with your development, just put up a PR. You're also alw ‌Once your PR passes, we will merge it. +## **Airbyte CI workflows** +* [Testing by SonarQube](sonar-qube-workflow.md) diff --git a/docs/contributing-to-airbyte/sonar-qube-workflow.md b/docs/contributing-to-airbyte/sonar-qube-workflow.md new file mode 100644 index 0000000000000..5314655118ba8 --- /dev/null +++ b/docs/contributing-to-airbyte/sonar-qube-workflow.md @@ -0,0 +1,34 @@ +# SonarQube workflow + +## Goals + The Airbyte monorepo receives contributions from a lot of developers, and there is no way around human errors while merging PRs. +Likely every language has different tools for testing and validation of source files. And while it's best practice to lint and validate code before pushing to git branches, it doesn't always happen. +But it is optional, and as rule as we detect possible problems after launch test/publish commands only. Therefore, using of automated CI code validation can provided the following benefits: +* Problem/vulnerability reports available when the PR was created. And developers would fix bugs and remove smells before code reviews. +* Reviewers would be sure all standard checks were made and code changes satisfy the requirements. +* Set of tools and their options can be changed anytime globally. +* Progress of code changes are saved in SonarQube and this information helps to analyse quality of the product integrally and also its separate parts. + + +## UML diagram +![image](https://user-images.githubusercontent.com/11213273/149561440-0aceaa30-8f82-4e5b-9ee5-77bdcfd87695.png) + + +## Used tools +### Python +* [flake8](https://flake8.pycqa.org/en/stable/) +* [mypy](https://mypy.readthedocs.io/en/stable/) +* [isort](https://pycqa.github.io/isort/) +* [black](https://black.readthedocs.io/en/stable/) +* [coverage](https://coverage.readthedocs.io/en/6.2/) + +All Python tools use the common [pyproject.toml](https://github.com/airbytehq/airbyte/blob/master/pyproject.toml) file. + +### Common tools +* [SonarQube Scanner](https://docs.sonarqube.org/latest/analysis/scan/sonarscanner/) + +## Access to SonarQube +The Airbyte project uses a custom SonarQube instance. Access to it is explained [here](https://github.com/airbytehq/airbyte-cloud/wiki/IAP-tunnel-to-the-SonarQube-instance). + +## SonarQube settings +The SonarQube server uses default settings. All customisations are implemented into the Github WorkFlows. More details are [here](https://github.com/airbytehq/airbyte/tree/master/.github/actions/ci-tests-runner/action.yml) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 374c87023d494..773d47806fe7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,34 +6,44 @@ target-version = ["py37"] fail_under = 100 skip_empty = true sort = "-cover" - +omit = [ + "*_tests/**", + "setup.py" +] [tool.flake8] -extend-exclude = ".venv" +extend-exclude = [ + ".venv", + "build", + "models", + ".eggs", + ".tox" +] + max-complexity = 10 max-line-length = 140 [tool.isort] profile = "black" -color_output = true +color_output = false skip_gitignore = true [tool.mypy] platform = "linux" +exclude = "build" # Strictness +ignore_missing_imports = true allow_redefinition = true disallow_incomplete_defs = true disallow_untyped_defs = true no_implicit_reexport = true no_strict_optional = true strict_equality = true - # Output pretty = true show_column_numbers = true show_error_codes = true show_error_context = true - # Warnings warn_redundant_casts = true warn_return_any = true diff --git a/tools/ci_static_check_reports/ci_detect_changed_modules/__init__.py b/tools/ci_code_validator/__init__.py similarity index 100% rename from tools/ci_static_check_reports/ci_detect_changed_modules/__init__.py rename to tools/ci_code_validator/__init__.py diff --git a/tools/ci_code_validator/ci_changes_detection/__init__.py b/tools/ci_code_validator/ci_changes_detection/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tools/ci_code_validator/ci_changes_detection/main.py b/tools/ci_code_validator/ci_changes_detection/main.py new file mode 100644 index 0000000000000..9c945a12d3291 --- /dev/null +++ b/tools/ci_code_validator/ci_changes_detection/main.py @@ -0,0 +1,89 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# +import json +import sys +from pathlib import Path +from typing import Dict, List, Optional + +from ci_sonar_qube import ROOT_DIR + +from ci_common_utils import Logger + +# Filenames used to detect whether the dir is a module +LANGUAGE_MODULE_ID_FILE = { + ".py": "setup.py", + # TODO: Add ID files for other languages +} + +LOGGER = Logger() + + +def folder_generator(dir_path: Path) -> Path: + while dir_path and str(dir_path) != dir_path.root and dir_path != dir_path.parent: + if dir_path.is_dir(): + yield dir_path + dir_path = dir_path.parent + + +def find_py_module(changed_path: Path) -> Optional[Path]: + """All Python connectors have setup.py file into own sortware folders""" + for dir_path in folder_generator(changed_path): + setup_py_file = dir_path / "setup.py" + if setup_py_file.is_file(): + return dir_path + return None + + +def find_java_module(changed_path: Path) -> Optional[Path]: + """All Java connectors have a folder src/main/java into own folders""" + for dir_path in folder_generator(changed_path): + required_java_dir = dir_path / "src/main/java" + if required_java_dir.is_dir(): + return dir_path + return None + + +def list_changed_modules(changed_files: List[str]) -> List[Dict[str, str]]: + """ + changed_filed are the list of files which were modified in current branch. + E.g. changed_files = ["tools/ci_static_check_reports/__init__.py", "tools/ci_static_check_reports/setup.py", ...] + """ + module_folders = {} + for file_path in changed_files: + if not file_path.startswith("/"): + file_path = ROOT_DIR / file_path + else: + file_path = Path(file_path) + module_folder = find_py_module(file_path) + if module_folder: + module_folders[module_folder] = "py" + continue + module_folder = find_java_module(file_path) + if module_folder: + module_folders[module_folder] = "java" + + modules = [] + for module_folder, lang in module_folders.items(): + module_folder = str(module_folder) + if "airbyte-integrations/connectors" not in module_folder: + # now we need to detect connectors only + LOGGER.info(f"skip the folder {module_folder}...") + continue + parts = module_folder.split("/") + module_name = "/".join(parts[-2:]) + modules.append({"folder": module_folder, "lang": lang, "module": module_name}) + LOGGER.info(f"Detected the module: {module_name}({lang}) in the folder: {module_folder}") + # _, file_extension = os.path.splitext(file_path) + # find_base_path(file_path, modules, file_ext=file_extension, unique_modules=unique_modules) + return modules + + +def main() -> int: + changed_modules = list_changed_modules(sys.argv[1:]) + print(json.dumps(changed_modules)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/ci_code_validator/ci_sonar_qube/__init__.py b/tools/ci_code_validator/ci_sonar_qube/__init__.py new file mode 100644 index 0000000000000..0be1d14aa82d5 --- /dev/null +++ b/tools/ci_code_validator/ci_sonar_qube/__init__.py @@ -0,0 +1,12 @@ +import os +from pathlib import Path + +from ci_common_utils import Logger + +LOGGER = Logger() + +ROOT_DIR = Path(os.getcwd()) +while str(ROOT_DIR) != "/" and not (ROOT_DIR / "gradlew").is_file(): + ROOT_DIR = ROOT_DIR.parent +if str(ROOT_DIR) == "/": + LOGGER.critical("this script must be executed into the Airbite repo only") diff --git a/tools/ci_code_validator/ci_sonar_qube/log_parsers.py b/tools/ci_code_validator/ci_sonar_qube/log_parsers.py new file mode 100644 index 0000000000000..8389db9e03125 --- /dev/null +++ b/tools/ci_code_validator/ci_sonar_qube/log_parsers.py @@ -0,0 +1,312 @@ +import json +import os +import re +from collections import defaultdict +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Callable, TextIO, List, Optional, Mapping, Any + +try: + # these packages are not always needed + from mypy.errorcodes import error_codes as mypy_error_codes, ErrorCode + from unidiff import PatchSet +except ModuleNotFoundError: + pass + +from .sonar_qube_api import SonarQubeApi + +HERE = Path(os.getcwd()) +RE_MYPY_LINE = re.compile(r"^(.+):(\d+):(\d+):") +RE_MYPY_LINE_WO_COORDINATES = re.compile(r"^(.+): error: (.+)") + + +class IssueSeverity(Enum): + blocker = "BLOCKER" + critical = "CRITICAL" + major = "MAJOR" + minor = "MINOR" + info = "INFO" + + +@dataclass +class Rule: + class Type(Enum): + code_smell = "CODE_SMELL" + bug = "BUG" + vulnerability = "VULNERABILITY" + security_hotspot = "SECURITY_HOTSPOT" + + rule_type: Type + key: str + name: str + description: str + tool_name: str + template: str + severity: IssueSeverity + + @property + def unique_key(self): + return f"{self.tool_name}_{self.key}".replace("-", "_") + + @property + def sq_key(self): + lang_part = self.template.split(":")[0] + return f"{lang_part}:{self.tool_name}_{self.key}".replace("-", "_") + + +def generate_mypy_rules() -> Mapping[str, Rule]: + try: + addl_code = ErrorCode( + code="unknown", + description="Unknown error", + category="General", + ) + except NameError: + return [] + return {f"[{err.code}]": Rule( + rule_type=Rule.Type.code_smell, + key=err.code, + name=err.code.replace("-", " ").capitalize() + " (mypy)", + description=err.description, + tool_name="mypy", + severity=IssueSeverity.minor, + template="python:CommentRegularExpression" + ) for err in list(mypy_error_codes.values()) + [addl_code]} + + +class LogParser(SonarQubeApi): + _mypy_rules: Mapping[str, Rule] = generate_mypy_rules() + _black_rule = Rule( + rule_type=Rule.Type.code_smell, + key="need_format", + name="Should be formatted (black)", + description='Please run one of the commands: "black --config ./pyproject.toml " or "./gradlew format"', + tool_name="black", + severity=IssueSeverity.minor, + template="python:CommentRegularExpression" + ) + + _isort_rule = Rule( + rule_type=Rule.Type.code_smell, + key="need_format", + name="Should be formatted (isort)", + description='Please run one of the commands: "isort " or "./gradlew format"', + tool_name="isort", + severity=IssueSeverity.minor, + template="python:CommentRegularExpression" + ) + + @dataclass + class Issue: + path: str + + rule: Rule + description: str + + line_number: int = None # 1-indexed + column_number: int = None # 1-indexed + + def to_json(self): + data = { + "engineId": self.rule.tool_name, + "ruleId": self.rule.sq_key, + "severity": self.rule.severity.value, + "type": self.rule.rule_type.value, + "primaryLocation": { + "message": self.description, + "filePath": self.checked_path, + } + } + if self.line_number is not None: + data["primaryLocation"]["textRange"] = { + "startLine": self.line_number, + "endLine": self.line_number, + "startColumn": self.column_number - 1, # 0-indexed + "endColumn": self.column_number, # 0-indexed + } + return data + + @property + def checked_path(self): + if self.path.startswith(str(HERE) + "/"): + # remove a parent part of path + return self.path[len(str(HERE) + "/"):] + return self.path + + def __init__(self, output_file: str, host: str, token: str): + super().__init__(host=host, token=token, pr_name="0") + self.output_file = output_file + + def prepare_file(func: Callable) -> Callable: + def intra(self, input_file: str) -> int: + if not os.path.exists(input_file): + self.logger.critical(f"not found input file: {input_file}") + with open(input_file, "r") as file: + issues = func(self, file) + self._save_all_rules(issues) + data = self._issues2dict(issues) + with open(self.output_file, "w") as output_file: + output_file.write(json.dumps(data)) + self.logger.info(f"the file {self.output_file} was updated") + return 0 + return 1 + + return intra + + def _save_all_rules(self, issues: List[Issue]) -> bool: + """Checks and create SQ rules if needed""" + if not issues: + return False + rules = defaultdict(list) + for issue in issues: + rules[issue.rule.tool_name].append(issue.rule) + for tool_name, tool_rules in rules.items(): + exist_rules = [rule["key"] for rule in self._get_list(f"rules/search?include_external=true&q={tool_name}", "rules")] + grouped_rules = {rule.sq_key: rule for rule in tool_rules} + for sq_key, rule in grouped_rules.items(): + if sq_key in exist_rules: + # was created before + continue + self.logger.info(f"try to create the rule: {sq_key}") + body = { + "custom_key": rule.unique_key, + "markdown_description": rule.description, + "name": rule.name, + "severity": rule.severity.value, + "type": rule.rule_type.value, + "template_key": rule.template + } + self._post("rules/create", body) + self.logger.info(f"the rule {sq_key} was created") + return True + + def _issues2dict(self, issues: List[Issue]) -> Mapping[str, Any]: + """ + { + "issues": [ + { + "engineId": "test", + "ruleId": "rule1", + "severity":"BLOCKER", + "type":"CODE_SMELL", + "primaryLocation": { + "message": "fully-fleshed issue", + "filePath": "sources/A.java", + "textRange": { + "startLine": 30, + "endLine": 30, + "startColumn": 9, + "endColumn": 14 + } + } + }, + ... + ]}""" + return { + "issues": [issue.to_json() for issue in issues] + } + + @prepare_file + def from_mypy(self, file: TextIO) -> List[Issue]: + buff = None + items = [] + + for line in file: + line = line.strip() + if RE_MYPY_LINE.match(line): + if buff: + items.append(self.__parse_mypy_issue(buff)) + buff = [] + if buff is not None: + buff.append(line) + if buff is None: + # mypy can return an error without line/column values + file.seek(0) + for line in file: + m = RE_MYPY_LINE_WO_COORDINATES.match(line.strip()) + if not m: + continue + items.append(self.Issue( + path=m.group(1).strip(), + description=m.group(2).strip(), + rule=self._mypy_rules["[unknown]"], + )) + self.logger.info(f"detected an error without coordinates: {line}") + + items.append(self.__parse_mypy_issue(buff)) + return [i for i in items if i] + + @classmethod + def __parse_mypy_issue(cls, lines: List[str]) -> Optional[Issue]: + """" + An example of log response: + source_airtable/helpers.py:8:1: error: Library stubs not installed for + "requests" (or incompatible with Python 3.7) [import] + import requests + ^ + source_airtable/helpers.py:8:1: note: Hint: "python3 -m pip install types-requests" + """ + if not lines: + return None + path, line_number, column_number, error_or_note, *others = " ".join(lines).split(":") + if "test" in Path(path).name: + cls.logger.info(f"skip the test file: {path}") + return None + if error_or_note.strip() == "note": + return None + others = ":".join(others) + rule = None + for code in cls._mypy_rules: + if code in others: + rule = cls._mypy_rules[code] + others = re.sub(r"\s+", " ", others.replace(code, ". Code line: ")) + break + if not rule: + cls.logger.warning(f"couldn't parse the lines: {lines}") + return None + + description = others.split("^")[0] + + return cls.Issue( + path=path.strip(), + line_number=int(line_number.strip()), + column_number=int(column_number.strip()), + description=description.strip(), + rule=rule, + ) + + @staticmethod + def __parse_diff(lines: List[str]) -> Mapping[str, int]: + """Converts diff lines to mapping: + {file1: , file2: } + """ + patch = PatchSet(lines, metadata_only=True) + return {updated_file.path: len(updated_file) for updated_file in patch} + + @prepare_file + def from_black(self, file: TextIO) -> List[Issue]: + return [self.Issue( + path=path, + description=f"{count} code part(s) should be updated.", + rule=self._black_rule, + ) for path, count in self.__parse_diff(file.readlines()).items()] + + @prepare_file + def from_isort(self, file: TextIO) -> List[Issue]: + changes = defaultdict(lambda: 0) + for path, count in self.__parse_diff(file.readlines()).items(): + # check path value + # path in isort diff file has the following format + # :before|after + if path.endswith(":before"): + path = path[:-len(":before")] + elif path.endswith(":after"): + path = path[:-len(":after")] + changes[path] += count + + return [self.Issue( + path=path, + description=f"{count} code part(s) should be updated.", + rule=self._isort_rule, + ) for path, count in changes.items()] diff --git a/tools/ci_code_validator/ci_sonar_qube/main.py b/tools/ci_code_validator/ci_sonar_qube/main.py new file mode 100644 index 0000000000000..17c8370f1afd4 --- /dev/null +++ b/tools/ci_code_validator/ci_sonar_qube/main.py @@ -0,0 +1,60 @@ +import argparse +import sys + +from .log_parsers import LogParser +from .sonar_qube_api import SonarQubeApi + + +def main() -> int: + convert_key = len(set(["--mypy_log", "--black_diff", "--isort_diff"]) & set(sys.argv)) > 0 + need_print_key = "--print_key" in sys.argv + + parser = argparse.ArgumentParser(description='Working with SonarQube instance.') + parser.add_argument('--host', help='SonarQube host', required=not need_print_key, type=str) + parser.add_argument('--token', help='SonarQube token', required=not need_print_key, type=str) + parser.add_argument('--pr', help='PR unique name. Example: airbyte/1231', type=str, default=None) + + name_value = parser.add_mutually_exclusive_group(required=not convert_key) + name_value.add_argument('--project', help='Name of future project', type=str) + name_value.add_argument('--module', help='Name of future module project', type=str) + + command = parser.add_mutually_exclusive_group(required=not convert_key) + command.add_argument('--print_key', help='Return a generate SonarQube key', action="store_true") + command.add_argument('--report', help='generate .md file with current issues of a project') + command.add_argument('--create', help='create a project', action="store_true") + command.add_argument('--remove', help='remove project', action="store_true") + + parser.add_argument('--mypy_log', help='Path to MyPy Logs', required=False, type=str) + parser.add_argument('--black_diff', help='Path to Black Diff', required=False, type=str) + parser.add_argument('--isort_diff', help='Path to iSort Diff', required=False, type=str) + parser.add_argument('--output_file', help='Path of output file', required=convert_key, type=str) + + args = parser.parse_args() + if convert_key: + parser = LogParser(output_file=args.output_file, host=args.host, token=args.token) + if args.mypy_log: + return parser.from_mypy(args.mypy_log) + if args.black_diff: + return parser.from_black(args.black_diff) + if args.isort_diff: + return parser.from_isort(args.isort_diff) + api = SonarQubeApi(host=args.host, token=args.token, pr_name=args.pr) + + project_name = api.module2project(args.module) if args.module else args.project + + if args.create: + return 0 if api.create_project(project_name=project_name) else 1 + elif args.remove: + return 0 if api.remove_project(project_name=project_name) else 1 + elif args.print_key: + data = api.prepare_project_settings(project_name) + print(data["project"], file=sys.stdout) + return 0 + elif args.report: + return 0 if api.generate_report(project_name=project_name, report_file=args.report) else 1 + api.logger.critical("not set any action...") + return 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tools/ci_code_validator/ci_sonar_qube/sonar_qube_api.py b/tools/ci_code_validator/ci_sonar_qube/sonar_qube_api.py new file mode 100644 index 0000000000000..1b96f5ec6e785 --- /dev/null +++ b/tools/ci_code_validator/ci_sonar_qube/sonar_qube_api.py @@ -0,0 +1,342 @@ +import itertools +import re +from functools import reduce +from typing import Mapping, Any, Optional, List +from urllib.parse import urljoin + +import requests +from mdutils.mdutils import MdUtils +from requests.auth import HTTPBasicAuth + +from ci_common_utils import Logger + +AIRBYTE_PROJECT_PREFIX = "airbyte" +RE_RULE_NAME = re.compile(r"(.+):[A-Za-z]+(\d+)") + +REPORT_METRICS = ( + "alert_status", + # "quality_gate_details", + "bugs", "new_bugs", + "reliability_rating", "new_reliability_rating", + "vulnerabilities", "new_vulnerabilities", + "security_rating", "new_security_rating", + # "security_hotspots", "new_security_hotspots", + # "security_hotspots_reviewed", "new_security_hotspots_reviewed", + # "security_review_rating", "new_security_review_rating", + "code_smells", "new_code_smells", + # "sqale_rating", "new_maintainability_rating", + # "sqale_index", "new_technical_debt", + "coverage", "new_coverage", + "lines_to_cover", "new_lines_to_cover", + "tests", + "duplicated_lines_density", "new_duplicated_lines_density", + "duplicated_blocks", + "ncloc", + # "ncloc_language_distribution", + # "projects", + # "lines", "new_lines" +) + +RATINGS = { + 1.0: "A", + 2.0: "B", + 3.0: "C", + 4.0: "D", + 5.0: "F", +} + + +class SonarQubeApi: + """https://sonarcloud.io/web_api""" + logger = Logger() + + def __init__(self, host: str, token: str, pr_name: str): + + self._host = host + self._token = token + + # split the latest name part + self._pr_id = (pr_name or '').split("/")[-1] + if not self._pr_id.isdigit(): + self.logger.critical(f"PR id should be integer. Current value: {pr_name}") + + self._pr_id = int(self._pr_id) + # check token + # https://sonarcloud.io/web_api/api/authentication/validate + if not self._host: + return + resp = self._get("authentication/validate") + if not resp["valid"]: + self.logger.critical("provided token is not valid") + + @property + def __auth(self): + return HTTPBasicAuth(self._token, '') + + def __parse_response(self, url: str, response: requests.Response) -> Mapping[str, Any]: + if response.status_code == 204: + # empty response + return {} + elif response.status_code != 200: + self.logger.critical(f"API error for {url}: [{response.status_code}] {response.json()['errors']}") + return response.json() + + def generate_url(self, endpoint: str) -> str: + return reduce(urljoin, [self._host, "/api/", endpoint]) + + def _post(self, endpoint: str, json: Mapping[str, Any]) -> Mapping[str, Any]: + url = self.generate_url(endpoint) + return self.__parse_response(url, requests.post(url, auth=self.__auth, params=json, json=json)) + + def _get(self, endpoint: str) -> Mapping[str, Any]: + url = self.generate_url(endpoint) + return self.__parse_response(url, requests.get(url, auth=self.__auth)) + + def _get_list(self, endpoint: str, list_name: str) -> List[Mapping[str, Any]]: + + page = 0 + items = [] + while True: + page += 1 + url = endpoint + "&" if "?" in endpoint else "?" + f"p={page}" + data = self._get(url) + items += data[list_name] + total = data.get("total") or data.get("paging", {}).get("total", 0) + if len(items) >= total: + break + return items + + @classmethod + def module2project(cls, module_name: str) -> str: + """""" + parts = module_name.split("/") + if len(parts) != 2: + cls.logger.critical("module name must have the format: component/module") + return f"{AIRBYTE_PROJECT_PREFIX}:{parts[0].lower()}:{parts[1].lower().replace('_', '-')}" + + def __correct_project_name(self, project_name: str) -> str: + return f"pr:{self._pr_id}:{project_name}" if self._pr_id else f"master:{project_name}" + + def __search_project(self, project_name: str) -> Optional[Mapping[str, Any]]: + """https://sonarcloud.io/web_api/api/projects/search""" + data = self._get(f"projects/search?q={project_name}") + exists_projects = data["components"] + if len(exists_projects) > 1: + self.logger.critical(f"there are several projects with the name '{project_name}'") + elif len(exists_projects) == 0: + return None + return exists_projects[0] + + def prepare_project_settings(self, project_name: str) -> Mapping[str, str]: + title = re.sub('[:_-]', ' ', project_name).replace("connectors_", "").title() + if self._pr_id: + title += f"(#{self._pr_id})" + + project_name = self.__correct_project_name(project_name) + return { + "name": title, + "project": project_name, + "visibility": "private", + } + + def create_project(self, project_name: str) -> bool: + """https://sonarcloud.io/web_api/api/projects/create""" + data = self.prepare_project_settings(project_name) + project_name = data["project"] + exists_project = self.__search_project(project_name) + if exists_project: + self.logger.info(f"The project '{project_name}' was created before") + return True + + self._post("projects/create", data) + self.logger.info(f"The project '{project_name}' was created") + return True + + def remove_project(self, project_name: str) -> bool: + """https://sonarcloud.io/web_api/api/projects/delete""" + project_name = self.prepare_project_settings(project_name)["project"] + + exists_project = self.__search_project(project_name) + if exists_project is None: + self.logger.info(f"not found the project '{project_name}'") + return True + body = { + "project": project_name + } + self._post("projects/delete", body) + self.logger.info(f"The project '{project_name}' was removed") + return True + + def generate_report(self, project_name: str, report_file: str) -> bool: + project_data = self.prepare_project_settings(project_name) + + md_file = MdUtils(file_name=report_file) + md_file.new_line(f'### SonarQube report for {project_data["name"]}') + + project_name = project_data["project"] + + issues = self._get_list(f"issues/search?componentKeys={project_name}&additionalFields=_all", "issues") + rules = {} + for rule_key in set(issue["rule"] for issue in issues): + key_parts = rule_key.split(":") + while len(key_parts) > 2: + key_parts.pop(0) + key = ":".join(key_parts) + + data = self._get(f"rules/search?rule_key={key}")["rules"] + if not data: + data = self._get(f"rules/show?key={rule_key}")["rule"] + else: + data = data[0] + + description = data["name"] + public_name = key + link = None + if rule_key.startswith("external_"): + public_name = key.replace("external_", "") + if not data["isExternal"]: + # this is custom rule + description = data["htmlDesc"] + if public_name.startswith("flake"): + # single link for all descriptions + link = "https://flake8.pycqa.org/en/latest/user/error-codes.html" + elif "isort_" in public_name: + link = "https://pycqa.github.io/isort/index.html" + elif "black_" in public_name: + link = "https://black.readthedocs.io/en/stable/the_black_code_style/index.html" + else: + # link's example + # https://rules.sonarsource.com/python/RSPEC-6287 + m = RE_RULE_NAME.match(public_name) + if not m: + # for local server + link = f"{self._host}coding_rules?open={key}&rule_key={key}" + else: + # to public SQ docs + link = f"https://rules.sonarsource.com/{m.group(1)}/RSPEC-{m.group(2)}" + if link: + public_name = md_file.new_inline_link( + link=link, + text=public_name + ) + + rules[rule_key] = (public_name, description) + + data = self._get(f"measures/component?component={project_name}&additionalFields=metrics&metricKeys={','.join(REPORT_METRICS)}") + measures = {} + total_coverage = None + for measure in data["component"]["measures"]: + metric = measure["metric"] + if measure["metric"].startswith("new_") and measure.get("periods"): + # we need to show values for last sync period only + last_period = max(measure["periods"], key=lambda period: period["index"]) + value = last_period["value"] + else: + value = measure.get("value") + measures[metric] = value + # group overall and latest values + measures = {metric: (value, measures.get(f"new_{metric}")) for metric, value in measures.items() if + not metric.startswith("new_")} + metrics = {} + for metric in data["metrics"]: + # if metric["key"] not in measures: + # continue + metrics[metric["key"]] = (metric["name"], metric["type"]) + + md_file.new_line('#### Measures') + + values = [] + for metric, (overall_value, latest_value) in measures.items(): + if metric not in metrics: + continue + name, metric_type = metrics[metric] + value = overall_value if (latest_value is None or latest_value == "0") else latest_value + + if metric_type == "PERCENT": + value = str(round(float(value), 1)) + elif metric_type == "INT": + value = int(float(value)) + elif metric_type == "LEVEL": + pass + elif metric_type == "RATING": + value = int(float(value)) + for k, v in RATINGS.items(): + if value <= k: + value = v + break + if metric == "coverage": + total_coverage = value + values.append([name, value]) + + values += [ + ("Blocker Issues", sum(map(lambda i: i["severity"] == "BLOCKER", issues))), + ("Critical Issues", sum(map(lambda i: i["severity"] == "CRITICAL", issues))), + ("Major Issues", sum(map(lambda i: i["severity"] == "MAJOR", issues))), + ("Minor Issues", sum(map(lambda i: i["severity"] == "MINOR", issues))), + ] + + while len(values) % 3: + values.append(("", "")) + table_items = ["Name", "Value"] * 3 + list(itertools.chain.from_iterable(values)) + md_file.new_table(columns=6, rows=int(len(values) / 3 + 1), text=table_items, text_align='left') + md_file.new_line() + if issues: + md_file.new_line('#### Detected Issues') + table_items = [ + "Rule", "File", "Description", "Message" + ] + for issue in issues: + rule_name, description = rules[issue["rule"]] + path = issue["component"].split(":")[-1].split("/") + # need to show only 2 last path parts + while len(path) > 2: + path.pop(0) + path = "/".join(path) + + # add line number in the end + if issue.get("line"): + path += f':{issue["line"]}' + table_items += [ + f'{rule_name} ({issue["severity"]})', + path, + description, + issue["message"], + ] + + md_file.new_table(columns=4, rows=len(issues) + 1, text=table_items, text_align='left') + coverage_files = [(k, v) for k, v in self.load_coverage_component(project_name).items()] + if total_coverage is not None: + md_file.new_line(f'#### Coverage ({total_coverage}%)') + while len(coverage_files) % 2: + coverage_files.append(("", "")) + table_items = ["File", "Coverage"] * 2 + list(itertools.chain.from_iterable(coverage_files)) + md_file.new_table(columns=4, rows=int(len(coverage_files) / 2 + 1), text=table_items, text_align='left') + md_file.create_md_file() + self.logger.info(f"The {report_file} was generated") + return True + + def load_coverage_component(self, base_component: str, dir_path: str = None) -> Mapping[str, Any]: + + page = 0 + coverage_files = {} + read_count = 0 + while True: + page += 1 + component = base_component + if dir_path: + component += f":{dir_path}" + url = f"measures/component_tree?p={page}&component={component}&additionalFields=metrics&metricKeys=coverage,uncovered_lines,uncovered_conditions&strategy=children" + data = self._get(url) + read_count += len(data["components"]) + for component in data["components"]: + if component["qualifier"] == "DIR": + coverage_files.update(self.load_coverage_component(base_component, component["path"])) + continue + elif not component["measures"]: + continue + elif component["qualifier"] == "FIL": + coverage_files[component["path"]] = [m["value"] for m in component["measures"] if m["metric"] == "coverage"][0] + if data["paging"]["total"] <= read_count: + break + + return coverage_files diff --git a/tools/ci_code_validator/setup.py b/tools/ci_code_validator/setup.py new file mode 100644 index 0000000000000..87d4c2ad3b3fb --- /dev/null +++ b/tools/ci_code_validator/setup.py @@ -0,0 +1,43 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "requests", + "ci_common_utils", + "unidiff", + "mdutils~=1.3.1" +] + +TEST_REQUIREMENTS = [ + "requests-mock", + "pytest", + "black", + "mypy", + "lxml", + "isort" +] + +setup( + version="0.0.0", + name="ci_code_validator", + description="Load and extract CI secrets for test suites", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + python_requires='>=3.7', + extras_require={ + "tests": TEST_REQUIREMENTS, + + }, + entry_points={ + 'console_scripts': [ + 'ci_sonar_qube = ci_sonar_qube.main:main', + 'ci_changes_detection = ci_changes_detection.main:main', + ], + }, +) diff --git a/tools/ci_code_validator/tests/__init__.py b/tools/ci_code_validator/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tools/ci_code_validator/tests/simple_files/black_smell_package_report.json b/tools/ci_code_validator/tests/simple_files/black_smell_package_report.json new file mode 100644 index 0000000000000..f5ba0fa01f794 --- /dev/null +++ b/tools/ci_code_validator/tests/simple_files/black_smell_package_report.json @@ -0,0 +1,14 @@ +{ + "issues": [ + { + "engineId": "black", + "ruleId": "python:black_need_format", + "severity": "MINOR", + "type": "CODE_SMELL", + "primaryLocation": { + "message": "1 code part(s) should be updated.", + "filePath": "simple_smell_package/invalid_file.py" + } + } + ] +} diff --git a/tools/ci_code_validator/tests/simple_files/isort_smell_package_report.json b/tools/ci_code_validator/tests/simple_files/isort_smell_package_report.json new file mode 100644 index 0000000000000..34c4c7c63f61d --- /dev/null +++ b/tools/ci_code_validator/tests/simple_files/isort_smell_package_report.json @@ -0,0 +1,14 @@ +{ + "issues": [ + { + "engineId": "isort", + "ruleId": "python:isort_need_format", + "severity": "MINOR", + "type": "CODE_SMELL", + "primaryLocation": { + "message": "1 code part(s) should be updated.", + "filePath": "simple_smell_package/invalid_file.py" + } + } + ] +} diff --git a/tools/ci_code_validator/tests/simple_files/mypy_smell_package_report.json b/tools/ci_code_validator/tests/simple_files/mypy_smell_package_report.json new file mode 100644 index 0000000000000..968becdca1dca --- /dev/null +++ b/tools/ci_code_validator/tests/simple_files/mypy_smell_package_report.json @@ -0,0 +1,52 @@ +{ + "issues": [ + { + "engineId": "mypy", + "ruleId": "python:mypy_return_value", + "severity": "MINOR", + "type": "CODE_SMELL", + "primaryLocation": { + "message": "Incompatible return value type (got \"int\", expected \"str\") . Code line: return 1000", + "filePath": "simple_smell_package/invalid_file.py", + "textRange": { + "startLine": 11, + "endLine": 11, + "startColumn": 11, + "endColumn": 12 + } + } + }, + { + "engineId": "mypy", + "ruleId": "python:mypy_no_redef", + "severity": "MINOR", + "type": "CODE_SMELL", + "primaryLocation": { + "message": "Name \"fake_func\" already defined on line 10 . Code line: def fake_func(i):", + "filePath": "simple_smell_package/invalid_file.py", + "textRange": { + "startLine": 14, + "endLine": 14, + "startColumn": 0, + "endColumn": 1 + } + } + }, + { + "engineId": "mypy", + "ruleId": "python:mypy_no_untyped_def", + "severity": "MINOR", + "type": "CODE_SMELL", + "primaryLocation": { + "message": "Function is missing a type annotation . Code line: def fake_func(i):", + "filePath": "simple_smell_package/invalid_file.py", + "textRange": { + "startLine": 14, + "endLine": 14, + "startColumn": 0, + "endColumn": 1 + } + } + } + ] +} diff --git a/tools/ci_code_validator/tests/simple_files/without_issues_report.json b/tools/ci_code_validator/tests/simple_files/without_issues_report.json new file mode 100644 index 0000000000000..2eb23288a67d0 --- /dev/null +++ b/tools/ci_code_validator/tests/simple_files/without_issues_report.json @@ -0,0 +1 @@ +{"issues": []} \ No newline at end of file diff --git a/tools/ci_code_validator/tests/simple_package/__init__.py b/tools/ci_code_validator/tests/simple_package/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tools/ci_code_validator/tests/simple_package/valid_file.py b/tools/ci_code_validator/tests/simple_package/valid_file.py new file mode 100644 index 0000000000000..5ad71b1c936f3 --- /dev/null +++ b/tools/ci_code_validator/tests/simple_package/valid_file.py @@ -0,0 +1,13 @@ +# don't valid it by auto-linters becaise this file is used for testing +import os +from pathlib import Path + +LONG_STRING = """aaaaaaaaaaaaaaaa""" + + +def func() -> bool: + return Path(os.getcwd()).is_dir() is True + + +def func2(i: int) -> int: + return i * 10 diff --git a/tools/ci_code_validator/tests/simple_smell_package/__init__.py b/tools/ci_code_validator/tests/simple_smell_package/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tools/ci_code_validator/tests/simple_smell_package/invalid_file.py b/tools/ci_code_validator/tests/simple_smell_package/invalid_file.py new file mode 100644 index 0000000000000..b99a26f148566 --- /dev/null +++ b/tools/ci_code_validator/tests/simple_smell_package/invalid_file.py @@ -0,0 +1,15 @@ +# don't valid it by auto-linters because this file is used for testing +import pathlib +import os + + + +LONG_STRING = """aaaaaaaaaaaaaaaaaaaaaaaaaawwwwwwwwwwwwwwwwwwwwwwwww mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm mmmmmmmmmmmmmm wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww""" + + +def fake_func() -> str: + return 1000 + + +def fake_func(i): + return i * 10 diff --git a/tools/ci_code_validator/tests/test_detect_changed_modules.py b/tools/ci_code_validator/tests/test_detect_changed_modules.py new file mode 100644 index 0000000000000..cd1956fc17fd6 --- /dev/null +++ b/tools/ci_code_validator/tests/test_detect_changed_modules.py @@ -0,0 +1,74 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# +from typing import List, Set + +import pytest +from ci_changes_detection.main import list_changed_modules +from ci_sonar_qube import ROOT_DIR + + +@pytest.mark.parametrize( + "changed_files,changed_modules", + [ + (["path/to/file1", "file2.txt", "path/to/file3.txt"], []), + ( + [ + "airbyte-integrations/connectors/source-asana/source_asana/streams.py", + "airbyte-integrations/connectors/source-asana/source_asana/source.py", + "airbyte-integrations/connectors/source-braintree/integration_tests/abnormal_state.json", + ], + [ + {"folder": str(ROOT_DIR / "airbyte-integrations/connectors/source-asana"), "lang": "py", + "module": "connectors/source-asana"}, + {"folder": str(ROOT_DIR / "airbyte-integrations/connectors/source-braintree"), "lang": "py", + "module": "connectors/source-braintree"}, + ], + ), + ( + [ + "airbyte-integrations/connectors/destination-mongodb/build.gradle", + "airbyte-integrations/connectors/destination-mongodb/src/main/java/io/airbyte/integrations/destination/mongodb/MongodbDestination.java", + "airbyte-integrations/connectors/destination-s3/Dockerfile", + ], + [ + {"folder": str(ROOT_DIR / "airbyte-integrations/connectors/destination-mongodb"), "lang": "java", + "module": "connectors/destination-mongodb"}, + {"folder": str(ROOT_DIR / "airbyte-integrations/connectors/destination-s3"), "lang": "java", + "module": "connectors/destination-s3"}, + ], + ), + ( + [ + "airbyte-integrations/connectors/source-s3/Dockerfile", + "airbyte-integrations/connectors/destination-s3/Dockerfile", + "tools/ci_code_validator" + ], + [ + {"folder": str(ROOT_DIR / "airbyte-integrations/connectors/source-s3"), "lang": "py", + "module": "connectors/source-s3"}, + {"folder": str(ROOT_DIR / "airbyte-integrations/connectors/destination-s3"), "lang": "java", + "module": "connectors/destination-s3"}, + ], + ), + ( + [ + "airbyte-integrations/connectors/source-s3/Dockerfile", + "airbyte-integrations/connectors/destination-s3/Dockerfile", + "tools/ci_code_validator" + ], + [ + {"folder": str(ROOT_DIR / "airbyte-integrations/connectors/source-s3"), "lang": "py", + "module": "connectors/source-s3"}, + {"folder": str(ROOT_DIR / "airbyte-integrations/connectors/destination-s3"), "lang": "java", + "module": "connectors/destination-s3"}, + ], + ), + + ], + ids=["incorrect_files", "py_modules_only", "java_modules_only", "mix_modules", "absolute_paths"], +) +def test_list_changed_modules(changed_files: List[str], changed_modules: Set[str]) -> None: + calculated_changed_modules = list_changed_modules(changed_files) + + assert calculated_changed_modules == changed_modules diff --git a/tools/ci_code_validator/tests/test_sq_project.py b/tools/ci_code_validator/tests/test_sq_project.py new file mode 100644 index 0000000000000..4fb0ea1b38149 --- /dev/null +++ b/tools/ci_code_validator/tests/test_sq_project.py @@ -0,0 +1,20 @@ +import pytest +import requests_mock +from ci_sonar_qube.sonar_qube_api import SonarQubeApi + + +@pytest.mark.parametrize( + "module_name,pr, expected_title, expected_key", + [ + ("connectors/source-s3", "airbyte/1234", "Airbyte Connectors Source S3(#1234)", "pr:1234:airbyte:connectors:source-s3"), + ("tools/ci_code_validator", "airbyte/1111", "Airbyte Tools Ci Code Validator(#1111)", "pr:1111:airbyte:tools:ci-code-validator"), + ("airbyte-cdk/python", "0", "Airbyte Airbyte Cdk Python", "master:airbyte:airbyte-cdk:python"), + ] +) +def test_module2project(module_name, pr, expected_title, expected_key): + with requests_mock.Mocker() as m: + m.get('/api/authentication/validate', json={"valid": True}) + api = SonarQubeApi(host="http://fake.com/", token="", pr_name=pr) + project_settings = api.prepare_project_settings(api.module2project(module_name)) + assert project_settings["name"] == expected_title + assert project_settings["project"] == expected_key diff --git a/tools/ci_code_validator/tests/test_tools.py b/tools/ci_code_validator/tests/test_tools.py new file mode 100644 index 0000000000000..3a9b7593062ad --- /dev/null +++ b/tools/ci_code_validator/tests/test_tools.py @@ -0,0 +1,102 @@ +import json +import os +import shutil +import subprocess +from pathlib import Path + +import pytest +import requests_mock + +from ci_code_validator.ci_sonar_qube.log_parsers import LogParser + +HERE = Path(__file__).parent +PACKAGE_DIR = HERE / "simple_package" +SMELL_PACKAGE_DIR = HERE / "simple_smell_package" +SIMPLE_FILES = HERE / "simple_files" +WITHOUT_ISSUE_REPORT = SIMPLE_FILES / "without_issues_report.json" + +ISORT_CMD = """isort --diff {package_dir}""" # config file should be in a started folder +BLACK_CMD = r"""black --config {toml_config_file} --diff {package_dir}""" +MYPY_CMD = r"""mypy {package_dir} --config-file={toml_config_file}""" + + +@pytest.fixture(scope="session") +def toml_config_file() -> Path: + root_dir = HERE + while str(root_dir) != root_dir.root: + config_file = root_dir / "pyproject.toml" + if config_file.is_file(): + return config_file + root_dir = root_dir.parent + raise Exception("can't found pyproject.toml") + + +@pytest.fixture(autouse=True) +def prepare_toml_file(toml_config_file): + pyproject_toml = Path(os.getcwd()) / "pyproject.toml" + if toml_config_file != pyproject_toml and not pyproject_toml.is_file(): + shutil.copy(toml_config_file, pyproject_toml) + yield + if toml_config_file != pyproject_toml and pyproject_toml.is_file(): + os.remove(str(pyproject_toml)) + + +@pytest.mark.parametrize( + "cmd,package_dir,expected_file", + [ + ( + "mypy {package_dir} --config-file={toml_config_file}", + SMELL_PACKAGE_DIR, + SIMPLE_FILES / "mypy_smell_package_report.json" + ), + ( + "mypy {package_dir} --config-file={toml_config_file}", + PACKAGE_DIR, + WITHOUT_ISSUE_REPORT + ), + ( + "black --config {toml_config_file} --diff {package_dir}", + SMELL_PACKAGE_DIR, + HERE / "simple_files/black_smell_package_report.json" + ), + ( + "black --config {toml_config_file} --diff {package_dir}", + PACKAGE_DIR, + WITHOUT_ISSUE_REPORT + ), + ( + ISORT_CMD, + SMELL_PACKAGE_DIR, + HERE / "simple_files/isort_smell_package_report.json" + ), + ( + ISORT_CMD, + PACKAGE_DIR, + WITHOUT_ISSUE_REPORT, + ), + ], + ids=["mypy_failed", "mypy_pass", "black_failed", "black_pass", "isort_failed", "isort_pass"] +) +def test_tool(tmp_path, toml_config_file, cmd, package_dir, expected_file): + cmd = cmd.format(package_dir=package_dir, toml_config_file=toml_config_file) + + proc = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, _ = proc.communicate() + file_log = tmp_path / "temp.log" + file_log.write_bytes(out) + assert file_log.is_file() is True + issues_file = tmp_path / "issues.json" + with requests_mock.Mocker() as m: + m.get('/api/authentication/validate', json={"valid": True}) + m.get("/api/rules/search", json={"rules": []}) + m.post("/api/rules/create", json={}) + parser = LogParser(issues_file, host="http://fake.com/", token="fake_token") + assert getattr(parser, f'from_{cmd.split(" ")[0]}')(file_log) == 0 + + assert issues_file.is_file() is True + data = json.loads(issues_file.read_text()) + for issue in data["issues"]: + issue["primaryLocation"]["filePath"] = "/".join(issue["primaryLocation"]["filePath"].split("/")[-2:]) + + expected_data = json.loads(Path(expected_file).read_text()) + assert json.dumps(data, sort_keys=True) == json.dumps(expected_data, sort_keys=True) diff --git a/tools/ci_credentials/setup.py b/tools/ci_credentials/setup.py index 6c072e755223d..f4c4136d534ff 100644 --- a/tools/ci_credentials/setup.py +++ b/tools/ci_credentials/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["requests", "ci_common_utils", "pytest"] +MAIN_REQUIREMENTS = ["requests", "ci_common_utils"] TEST_REQUIREMENTS = ["requests-mock"] diff --git a/tools/ci_static_check_reports/__init__.py b/tools/ci_static_check_reports/__init__.py deleted file mode 100644 index 46b7376756ec6..0000000000000 --- a/tools/ci_static_check_reports/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. -# diff --git a/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/__init__.py b/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/__init__.py deleted file mode 100644 index 46b7376756ec6..0000000000000 --- a/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. -# diff --git a/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/main.py b/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/main.py deleted file mode 100644 index 6e779b3e4e1f2..0000000000000 --- a/tools/ci_static_check_reports/ci_build_python_static_checkers_reports/main.py +++ /dev/null @@ -1,100 +0,0 @@ -# -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. -# -import argparse -import json -import os -import sys -from typing import Dict, List - -from invoke import Context - -sys.path.insert(0, "airbyte-integrations/connectors") -from tasks import CONFIG_FILE, TOOLS_VERSIONS, _run_task # noqa - -TASK_COMMANDS: Dict[str, List[str]] = { - "black": [ - f"pip install black~={TOOLS_VERSIONS['black']}", - f"XDG_CACHE_HOME={os.devnull} black -v {{check_option}} --diff {{source_path}}/. > {{reports_path}}/black.txt", - ], - "coverage": [ - "pip install .", - f"pip install coverage[toml]~={TOOLS_VERSIONS['coverage']}", - "coverage xml --rcfile={toml_config_file} -o {reports_path}/coverage.xml", - ], - "flake": [ - f"pip install mccabe~={TOOLS_VERSIONS['mccabe']}", - f"pip install pyproject-flake8~={TOOLS_VERSIONS['flake']}", - f"pip install flake8-junit-report~={TOOLS_VERSIONS['flake_junit']}", - "pflake8 -v {source_path} --output-file={reports_path}/flake.txt --bug-report", - "flake8_junit {reports_path}/flake.txt {reports_path}/flake.xml", - "rm -f {reports_path}/flake.txt", - ], - "isort": [ - f"pip install colorama~={TOOLS_VERSIONS['colorama']}", - f"pip install isort~={TOOLS_VERSIONS['isort']}", - "isort -v {check_option} {source_path}/. > {reports_path}/isort.txt", - ], - "mypy": [ - "pip install .", - f"pip install lxml~={TOOLS_VERSIONS['lxml']}", - f"pip install mypy~={TOOLS_VERSIONS['mypy']}", - "mypy {source_path} --config-file={toml_config_file} --cobertura-xml-report={reports_path}", - ], - "test": [ - "mkdir {venv}/source-acceptance-test", - "cp -f $(git ls-tree -r HEAD --name-only {source_acceptance_test_path} | tr '\n' ' ') {venv}/source-acceptance-test", - "pip install build", - f"python -m build {os.path.join('{venv}', 'source-acceptance-test')}", - f"pip install {os.path.join('{venv}', 'source-acceptance-test', 'dist', 'source_acceptance_test-*.whl')}", - "[ -f requirements.txt ] && pip install -r requirements.txt 2> /dev/null", - "pip install .", - "pip install .[tests]", - "pip install pytest-cov", - "pytest -v --cov={source_path} --cov-report xml:{reports_path}/pytest.xml {source_path}/unit_tests", - ], -} - - -def build_static_checkers_reports(modules: list, static_checker_reports_path: str) -> int: - ctx = Context() - toml_config_file = os.path.join(os.getcwd(), "pyproject.toml") - - for module_path in modules: - reports_path = f"{os.getcwd()}/{static_checker_reports_path}/{module_path}" - if not os.path.exists(reports_path): - os.makedirs(reports_path) - - for checker in TASK_COMMANDS: - _run_task( - ctx, - f"{os.getcwd()}/{module_path}", - checker, - module_path=module_path, - multi_envs=True, - check_option="", - task_commands=TASK_COMMANDS, - toml_config_file=toml_config_file, - reports_path=reports_path, - source_acceptance_test_path=os.path.join(os.getcwd(), "airbyte-integrations/bases/source-acceptance-test"), - ) - return 0 - - -def main() -> int: - parser = argparse.ArgumentParser(description="Working with Python Static Report Builder.") - parser.add_argument("changed_modules", nargs="*") - parser.add_argument("--static-checker-reports-path", help="SonarQube host", required=False, type=str, default="static_checker_reports") - - args = parser.parse_args() - changed_python_module_paths = [ - module["dir"] - for module in json.loads(args.changed_modules[0]) - if module["lang"] == "py" and os.path.exists(module["dir"]) and "setup.py" in os.listdir(module["dir"]) - ] - print("Changed python modules: ", changed_python_module_paths) - return build_static_checkers_reports(changed_python_module_paths, static_checker_reports_path=args.static_checker_reports_path) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/tools/ci_static_check_reports/ci_detect_changed_modules/main.py b/tools/ci_static_check_reports/ci_detect_changed_modules/main.py deleted file mode 100644 index a2a68c3be06f1..0000000000000 --- a/tools/ci_static_check_reports/ci_detect_changed_modules/main.py +++ /dev/null @@ -1,52 +0,0 @@ -# -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. -# -import json -import os -import sys -from typing import Dict, List, Set - -# Filenames used to detect whether the dir is a module -LANGUAGE_MODULE_ID_FILE = { - ".py": "setup.py", - # TODO: Add ID files for other languages -} - - -def find_base_path(path: str, modules: List[Dict[str, str]], unique_modules: Set[str], file_ext: str = "", lookup_file: str = None) -> None: - filename, file_extension = os.path.splitext(path) - lookup_file = lookup_file or LANGUAGE_MODULE_ID_FILE.get(file_extension) - - dir_path = os.path.dirname(filename) - if dir_path and os.path.exists(dir_path): - is_module_root = lookup_file in os.listdir(dir_path) - if is_module_root: - if dir_path not in unique_modules: - modules.append({"dir": dir_path, "lang": file_ext[1:]}) - unique_modules.add(dir_path) - else: - find_base_path(dir_path, modules, unique_modules, file_ext=file_extension, lookup_file=lookup_file) - - -def list_changed_modules(changed_files: List[str]) -> List[Dict[str, str]]: - """ - changed_filed are the list of files which were modified in current branch. - E.g. changed_files = ["tools/ci_static_check_reports/__init__.py", "tools/ci_static_check_reports/setup.py", ...] - """ - - modules: List[Dict[str, str]] = [] - unique_modules: set = set() - for file_path in changed_files: - _, file_extension = os.path.splitext(file_path) - find_base_path(file_path, modules, file_ext=file_extension, unique_modules=unique_modules) - return modules - - -def main() -> int: - changed_modules = list_changed_modules(sys.argv[1:]) - print(json.dumps(changed_modules)) - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/tools/ci_static_check_reports/setup.py b/tools/ci_static_check_reports/setup.py deleted file mode 100644 index 4241142328843..0000000000000 --- a/tools/ci_static_check_reports/setup.py +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. -# - - -from setuptools import find_packages, setup - -TEST_REQUIREMENTS = [ - "pytest~=6.1", -] - -setup( - name="ci_static_check_reports", - description="CI tool to detect changed modules and then generate static check reports.", - author="Airbyte", - author_email="contact@airbyte.io", - packages=find_packages(), - install_requires=["invoke~=1.6.0", "virtualenv~=20.10.0"], - package_data={"": ["*.json", "schemas/*.json"]}, - extras_require={ - "tests": TEST_REQUIREMENTS, - }, - entry_points={ - "console_scripts": [ - "ci_detect_changed_modules = ci_detect_changed_modules.main:main", - "ci_build_python_checkers_reports = ci_build_python_static_checkers_reports.main:main", - ], - }, -) diff --git a/tools/ci_static_check_reports/unit_tests/__init__.py b/tools/ci_static_check_reports/unit_tests/__init__.py deleted file mode 100644 index 46b7376756ec6..0000000000000 --- a/tools/ci_static_check_reports/unit_tests/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. -# diff --git a/tools/ci_static_check_reports/unit_tests/test_build_static_checkers_reports.py b/tools/ci_static_check_reports/unit_tests/test_build_static_checkers_reports.py deleted file mode 100644 index 77b9437d4a727..0000000000000 --- a/tools/ci_static_check_reports/unit_tests/test_build_static_checkers_reports.py +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. -# -import os -import subprocess - -import pytest - - -@pytest.mark.parametrize( - "changed_module,should_build_reports", - [ - ('[{"dir": "tools/ci_static_check_reports", "lang": "py"}]', True), - ('[{"dir": "airbyte-integrations/connectors/destination-bigquery", "lang": "java"}]', False), - ('[{"dir": "airbyte-integrations/connectors/not-existing-module", "lang": "other"}]', False), - ], -) -def test_build_static_checkers_reports(changed_module: str, should_build_reports: bool) -> None: - subprocess.call(["ci_build_python_checkers_reports", changed_module], shell=False) - static_checker_reports_path = f"static_checker_reports/{changed_module}" - - static_checker_reports_path_exists = os.path.exists(static_checker_reports_path) - black_exists = os.path.exists(os.path.join(static_checker_reports_path, "black.txt")) - coverage_exists = os.path.exists(os.path.join(static_checker_reports_path, "coverage.xml")) - flake_exists = os.path.exists(os.path.join(static_checker_reports_path, "flake.xml")) - isort_exists = os.path.exists(os.path.join(static_checker_reports_path, "isort.txt")) - cobertura_exists = os.path.exists(os.path.join(static_checker_reports_path, "cobertura.xml")) - pytest_exists = os.path.exists(os.path.join(static_checker_reports_path, "pytest.xml")) - report_paths_exist = [ - static_checker_reports_path_exists, - black_exists, - coverage_exists, - flake_exists, - isort_exists, - cobertura_exists, - pytest_exists, - ] - - if should_build_reports: - assert all(report_paths_exist) - else: - assert not all(report_paths_exist) diff --git a/tools/ci_static_check_reports/unit_tests/test_detect_changed_modules.py b/tools/ci_static_check_reports/unit_tests/test_detect_changed_modules.py deleted file mode 100644 index 468e7dc21ac09..0000000000000 --- a/tools/ci_static_check_reports/unit_tests/test_detect_changed_modules.py +++ /dev/null @@ -1,58 +0,0 @@ -# -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. -# -from typing import List, Set - -import pytest -from ci_detect_changed_modules.main import list_changed_modules - - -@pytest.mark.parametrize( - "changed_files,changed_modules", - [ - (["path/to/file1", "file2.txt", "path/to/file3.txt"], []), - ( - [ - "airbyte-cdk/python/airbyte_cdk/entrypoint.py", - "airbyte-cdk/python/airbyte_cdk/file1", - "airbyte-cdk/python/airbyte_cdk/file2.py", - ], - [{"dir": "airbyte-cdk/python", "lang": "py"}], - ), - ( - [ - "airbyte-cdk/python/airbyte_cdk/entrypoint.py", - "airbyte-integrations/connectors/source-asana/source_asana/streams.py", - "airbyte-integrations/connectors/source-asana/source_asana/source.py", - "airbyte-integrations/connectors/source-braintree/integration_tests/abnormal_state.json", - ], - [{"dir": "airbyte-cdk/python", "lang": "py"}, {"dir": "airbyte-integrations/connectors/source-asana", "lang": "py"}], - ), - ( - [], - [], - ), - # TODO: update test after non-python modules are supported - ( - [ - "airbyte-integrations/connectors/source-clickhouse-strict-encrypt/src/main/" - "java/io/airbyte/integrations/source/clickhouse/ClickHouseStrictEncryptSource.java" - ], - [], - ), - ( - ["airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json"], - [], - ), - ( - ["airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/destination.py"], - [ - {"dir": "airbyte-integrations/connectors/destination-amazon-sqs", "lang": "py"}, - ], - ), - ], -) -def test_list_changed_modules(changed_files: List[str], changed_modules: Set[str]) -> None: - calculated_changed_modules = list_changed_modules(changed_files) - - assert calculated_changed_modules == changed_modules From 28b174ce398bcce0d8f7b7aebc7561762ceae0a5 Mon Sep 17 00:00:00 2001 From: Yevhenii <34103125+yevhenii-ldv@users.noreply.github.com> Date: Wed, 19 Jan 2022 22:32:48 +0200 Subject: [PATCH 158/215] Fix logger for function 'should_retry' (#9619) * Fix logging for function 'should_retry' --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- airbyte-integrations/connectors/source-github/Dockerfile | 2 +- .../connectors/source-github/source_github/streams.py | 2 +- docs/integrations/sources/github.md | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 1958801c835d0..fdc609683698a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -224,7 +224,7 @@ - name: GitHub sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e dockerRepository: airbyte/source-github - dockerImageTag: 0.2.11 + dockerImageTag: 0.2.13 documentationUrl: https://docs.airbyte.io/integrations/sources/github icon: github.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index cf7cca7be7980..7145cefbde5ad 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2012,7 +2012,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-github:0.2.11" +- dockerImage: "airbyte/source-github:0.2.13" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/github" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index 3508845d3b6ad..160005032c190 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.11 +LABEL io.airbyte.version=0.2.13 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index a9499124ac303..1186609456eaa 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -58,7 +58,7 @@ def should_retry(self, response: requests.Response) -> bool: ) if retry_flag: self.logger.info( - f"Rate limit handling for the response with {response.status_code} status code with message: {response.json()}" + f"Rate limit handling for stream `{self.name}` for the response with {response.status_code} status code with message: {response.text}" ) return retry_flag diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 76ec536519b2d..5f68b1dec9ed9 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -92,6 +92,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.13 | 2021-01-20 | [9619](https://github.com/airbytehq/airbyte/pull/9619) | Fix logging for function `should_retry` | | 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response | | 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental | | 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses | From 48ca8060bfff9a5b1e377e73cf3f7df1523c8c3f Mon Sep 17 00:00:00 2001 From: Prudhvi Raj Date: Thu, 20 Jan 2022 00:35:14 +0400 Subject: [PATCH 159/215] Amazon Seller Partner Fix: 0 records read in reports (#9581) --- .../e55879a8-0ef8-4557-abcf-ab34c53ec460.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-amazon-seller-partner/Dockerfile | 2 +- .../source_amazon_seller_partner/streams.py | 2 +- docs/integrations/sources/amazon-seller-partner.md | 3 ++- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json index e175948b1c018..93080de9d5a53 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "e55879a8-0ef8-4557-abcf-ab34c53ec460", "name": "Amazon Seller Partner", "dockerRepository": "airbyte/source-amazon-seller-partner", - "dockerImageTag": "0.2.10", + "dockerImageTag": "0.2.13", "documentationUrl": "https://docs.airbyte.io/integrations/sources/amazon-seller-partner", "icon": "amazonsellerpartner.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index fdc609683698a..5658e1026587d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -22,7 +22,7 @@ - name: Amazon Seller Partner sourceDefinitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460 dockerRepository: airbyte/source-amazon-seller-partner - dockerImageTag: 0.2.12 + dockerImageTag: 0.2.13 sourceType: api documentationUrl: https://docs.airbyte.io/integrations/sources/amazon-seller-partner icon: amazonsellerpartner.svg diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 7145cefbde5ad..15a641b97ed67 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -157,7 +157,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-amazon-seller-partner:0.2.12" +- dockerImage: "airbyte/source-amazon-seller-partner:0.2.13" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" changelogUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile index 38c7033939b84..c4b24cd2a828d 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.12 +LABEL io.airbyte.version=0.2.13 LABEL io.airbyte.name=airbyte/source-amazon-seller-partner diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py index f26dca2d8d192..c91c815e45843 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py @@ -225,7 +225,7 @@ def _report_data( return { "reportType": self.name, "marketplaceIds": [self.marketplace_id], - "createdSince": replication_start_date.strftime(DATE_TIME_FORMAT), + "dataStartTime": replication_start_date.strftime(DATE_TIME_FORMAT), } def _create_report( diff --git a/docs/integrations/sources/amazon-seller-partner.md b/docs/integrations/sources/amazon-seller-partner.md index 71d336fc1a1f9..f2aeb90f4dc02 100644 --- a/docs/integrations/sources/amazon-seller-partner.md +++ b/docs/integrations/sources/amazon-seller-partner.md @@ -67,7 +67,8 @@ Information about rate limits you may find [here](https://github.com/amzn/sellin | Version | Date | Pull Request | Subject | | :------- | :--------- | :------------------------------------------------------- | :--------------------------------------------------------------------- | -| `0.2.12` | 2022-01-05 | [\#9312](https://github.com/airbytehq/airbyte/pull/9312) | Add all remaining brand analytics report streams +| `0.2.13` | 2022-01-18 | [\#9581](https://github.com/airbytehq/airbyte/pull/9581) | Change createdSince parameter to dataStartTime | +| `0.2.12` | 2022-01-05 | [\#9312](https://github.com/airbytehq/airbyte/pull/9312) | Add all remaining brand analytics report streams | | `0.2.11` | 2022-01-05 | [\#9115](https://github.com/airbytehq/airbyte/pull/9115) | Fix reading only 100 orders | | `0.2.10` | 2021-12-31 | [\#9236](https://github.com/airbytehq/airbyte/pull/9236) | Fix NoAuth deprecation warning | | `0.2.9` | 2021-12-30 | [\#9212](https://github.com/airbytehq/airbyte/pull/9212) | Normalize GET_SELLER_FEEDBACK_DATA header field names | From 9d224f2a1066d764dcd9572b298b4fad457d0bab Mon Sep 17 00:00:00 2001 From: Augustin Date: Wed, 19 Jan 2022 21:43:26 +0100 Subject: [PATCH 160/215] add annotations to posgtresql deployment with helm (#9556) --- charts/airbyte/values.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 470c65bd08668..42221b185c4a3 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -505,7 +505,9 @@ postgresql: ## This secret is used in case of postgresql.enabled=true and we would like to specify password for newly created postgresql instance ## existingSecret: "" - + commonAnnotations: + helm.sh/hook: pre-install,pre-upgrade + helm.sh/hook-weight: -1 ## External PostgreSQL configuration ## All of these values are only used when postgresql.enabled is set to false ## @param externalDatabase.host Database host From 8b149e85665c0d1cbe9874ca4e3aa21e222611be Mon Sep 17 00:00:00 2001 From: Baz Date: Wed, 19 Jan 2022 22:53:57 +0200 Subject: [PATCH 161/215] =?UTF-8?q?=F0=9F=8E=89=20=20Source=20Shopify:=20i?= =?UTF-8?q?mplement=20Oauth2.0=20for=20Airbyte-Cloud=20(#9591)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../9da77001-af33-4bcd-be46-6252bf9342b9.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 76 ++++++++---- .../destination/bigquery/BigQueryUtils.java | 7 +- .../bigquery/BigQueryDestinationTest.java | 21 ++-- .../bigquery/BigQueryUtilsTest.java | 3 +- .../connectors/source-shopify/Dockerfile | 2 +- .../integration_tests/invalid_config.json | 2 +- .../invalid_oauth_config.json | 4 +- .../source-shopify/source_shopify/auth.py | 14 +-- .../source-shopify/source_shopify/spec.json | 78 +++++++++--- .../SnowflakeSource.java | 19 +-- .../oauth/OAuthImplementationFactory.java | 1 + .../airbyte/oauth/flows/ShopifyOAuthFlow.java | 116 ++++++++++++++++++ docs/integrations/sources/shopify.md | 1 + 15 files changed, 270 insertions(+), 78 deletions(-) create mode 100644 airbyte-oauth/src/main/java/io/airbyte/oauth/flows/ShopifyOAuthFlow.java diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json index 1eabc052ada40..94a52b10f6171 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "9da77001-af33-4bcd-be46-6252bf9342b9", "name": "Shopify", "dockerRepository": "airbyte/source-shopify", - "dockerImageTag": "0.1.27", + "dockerImageTag": "0.1.28", "documentationUrl": "https://docs.airbyte.io/integrations/sources/shopify", "icon": "shopify.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 5658e1026587d..95fa5edb2d484 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -641,7 +641,7 @@ - name: Shopify sourceDefinitionId: 9da77001-af33-4bcd-be46-6252bf9342b9 dockerRepository: airbyte/source-shopify - dockerImageTag: 0.1.27 + dockerImageTag: 0.1.28 documentationUrl: https://docs.airbyte.io/integrations/sources/shopify icon: shopify.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 15a641b97ed67..dee1594ebbed3 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6684,7 +6684,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-shopify:0.1.27" +- dockerImage: "airbyte/source-shopify:0.1.28" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/shopify" connectionSpecification: @@ -6694,8 +6694,8 @@ required: - "shop" - "start_date" - - "auth_method" - additionalProperties: false + - "credentials" + additionalProperties: true properties: shop: type: "string" @@ -6710,24 +6710,21 @@ examples: - "2021-01-01" pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - auth_method: + credentials: title: "Shopify Authorization Method" type: "object" oneOf: - type: "object" title: "OAuth2.0" required: - - "client_id" - - "client_secret" - - "access_token" - "auth_method" properties: auth_method: type: "string" - const: "access_token" + const: "oauth2.0" enum: - - "access_token" - default: "access_token" + - "oauth2.0" + default: "oauth2.0" order: 0 client_id: type: "string" @@ -6747,8 +6744,8 @@ - title: "API Password" type: "object" required: - - "api_password" - "auth_method" + - "api_password" properties: auth_method: type: "string" @@ -6766,17 +6763,52 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: - - "auth_method" - - "0" - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - oauthFlowOutputParameters: - - - "access_token" + advanced_auth: + auth_flow_type: "oauth2.0" + predicate_key: + - "credentials" + - "auth_method" + predicate_value: "oauth2.0" + oauth_config_specification: + oauth_user_input_from_connector_config_specification: + type: "object" + additionalProperties: false + properties: + shop: + type: "string" + path_in_connector_config: + - "shop" + complete_oauth_output_specification: + type: "object" + additionalProperties: false + properties: + access_token: + type: "string" + path_in_connector_config: + - "credentials" + - "access_token" + complete_oauth_server_input_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + client_secret: + type: "string" + complete_oauth_server_output_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + path_in_connector_config: + - "credentials" + - "client_id" + client_secret: + type: "string" + path_in_connector_config: + - "credentials" + - "client_secret" - dockerImage: "airbyte/source-shortio:0.1.2" spec: documentationUrl: "https://developers.short.io/reference" diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java index 545e8aff58969..2c7a3dddd5833 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java @@ -5,7 +5,6 @@ package io.airbyte.integrations.destination.bigquery; import static io.airbyte.integrations.destination.bigquery.helpers.LoggerHelper.getJobErrorMessage; -import static java.util.Objects.isNull; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -172,9 +171,9 @@ public static String getDatasetId(final JsonNode config) { String projectId = config.get(BigQueryConsts.CONFIG_PROJECT_ID).asText(); if (!(projectId.equals(projectIdPart))) { throw new IllegalArgumentException(String.format( - "Project ID included in Dataset ID must match Project ID field's value: Project ID is `%s`, but you specified `%s` in Dataset ID", - projectId, - projectIdPart)); + "Project ID included in Dataset ID must match Project ID field's value: Project ID is `%s`, but you specified `%s` in Dataset ID", + projectId, + projectIdPart)); } } // if colonIndex is -1, then this returns the entire string diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java index 70c7e9dd1627e..9edfa8f2ba0bc 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java @@ -385,6 +385,7 @@ private boolean isTablePartitioned(final BigQuery bigquery, final Dataset datase } private static class DatasetIdResetter { + private Consumer consumer; DatasetIdResetter(Consumer consumer) { @@ -394,20 +395,20 @@ private static class DatasetIdResetter { public void accept(JsonNode config) { consumer.accept(config); } + } private static Stream datasetIdResetterProvider() { // parameterized test with two dataset-id patterns: `dataset_id` and `project-id:dataset_id` return Stream.of( - Arguments.arguments(new DatasetIdResetter(config -> {})), - Arguments.arguments(new DatasetIdResetter( - config -> { - String projectId = ((ObjectNode) config).get(BigQueryConsts.CONFIG_PROJECT_ID).asText(); - String datasetId = ((ObjectNode) config).get(BigQueryConsts.CONFIG_DATASET_ID).asText(); - ((ObjectNode) config).put(BigQueryConsts.CONFIG_DATASET_ID, - String.format("%s:%s", projectId, datasetId)); - } - )) - ); + Arguments.arguments(new DatasetIdResetter(config -> {})), + Arguments.arguments(new DatasetIdResetter( + config -> { + String projectId = ((ObjectNode) config).get(BigQueryConsts.CONFIG_PROJECT_ID).asText(); + String datasetId = ((ObjectNode) config).get(BigQueryConsts.CONFIG_DATASET_ID).asText(); + ((ObjectNode) config).put(BigQueryConsts.CONFIG_DATASET_ID, + String.format("%s:%s", projectId, datasetId)); + }))); } + } diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java index 586e0cf7ce747..f0538e443a624 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQueryUtilsTest.java @@ -26,7 +26,7 @@ public void init() { .put(BigQueryConsts.CONFIG_CREDS, "test_secret") .put(BigQueryConsts.CONFIG_DATASET_LOCATION, "US"); } - + @ParameterizedTest @MethodSource("validBigQueryIdProvider") public void testGetDatasetIdSuccess(String projectId, String datasetId, String expected) throws Exception { @@ -66,4 +66,5 @@ private static Stream invalidBigQueryIdProvider() { Arguments.arguments("my-project", "your-project:my_dataset", "Project ID included in Dataset ID must match Project ID field's value: Project ID is `my-project`, but you specified `your-project` in Dataset ID")); } + } diff --git a/airbyte-integrations/connectors/source-shopify/Dockerfile b/airbyte-integrations/connectors/source-shopify/Dockerfile index a4c18b641f04d..27a2909ad9283 100644 --- a/airbyte-integrations/connectors/source-shopify/Dockerfile +++ b/airbyte-integrations/connectors/source-shopify/Dockerfile @@ -28,5 +28,5 @@ COPY source_shopify ./source_shopify ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.27 +LABEL io.airbyte.version=0.1.28 LABEL io.airbyte.name=airbyte/source-shopify diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-shopify/integration_tests/invalid_config.json index 78b1a4afac6ed..c3ff95944b38c 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/invalid_config.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/invalid_config.json @@ -1,7 +1,7 @@ { "shop": "SHOP_NAME", "start_date": "2020-11-01", - "auth_method": { + "credentials": { "auth_method": "api_password", "api_password": "SOME_API_PASSWORD" } diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/invalid_oauth_config.json b/airbyte-integrations/connectors/source-shopify/integration_tests/invalid_oauth_config.json index 5770cfa2d2097..db9882d1eec4b 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/invalid_oauth_config.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/invalid_oauth_config.json @@ -1,8 +1,8 @@ { "shop": "SHOP_NAME", "start_date": "2020-11-01", - "auth_method": { - "auth_method": "access_token", + "credentials": { + "auth_method": "oauth2.0", "client_id": "SOME_CLIENT_ID", "client_secret": "SOME_CLIENT_SECRET", "access_token": "SOME_ACCESS_TOKEN" diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/auth.py b/airbyte-integrations/connectors/source-shopify/source_shopify/auth.py index bc3ab9db3d97e..ea3d36dda1a03 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/auth.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/auth.py @@ -30,12 +30,12 @@ def __init__(self, config: Mapping[str, Any]): def get_auth_header(self) -> Mapping[str, Any]: auth_header: str = "X-Shopify-Access-Token" - auth_method: Dict = self.config["auth_method"] - auth_option: str = auth_method.get("auth_method") + credentials: Dict = self.config["credentials"] + auth_method: str = credentials.get("auth_method") - if auth_option == "access_token": - return {auth_header: auth_method.get("access_token")} - elif auth_option == "api_password": - return {auth_header: auth_method.get("api_password")} + if auth_method == "oauth2.0": + return {auth_header: credentials.get("access_token")} + elif auth_method == "api_password": + return {auth_header: credentials.get("api_password")} else: - raise NotImplementedAuth(auth_option) + raise NotImplementedAuth(auth_method) diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/spec.json b/airbyte-integrations/connectors/source-shopify/source_shopify/spec.json index a342e64eb1f43..69186baf72eed 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/spec.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/spec.json @@ -4,8 +4,8 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Shopify Source CDK Specifications", "type": "object", - "required": ["shop", "start_date", "auth_method"], - "additionalProperties": false, + "required": ["shop", "start_date", "credentials"], + "additionalProperties": true, "properties": { "shop": { "type": "string", @@ -19,25 +19,20 @@ "examples": ["2021-01-01"], "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" }, - "auth_method": { + "credentials": { "title": "Shopify Authorization Method", "type": "object", "oneOf": [ { "type": "object", "title": "OAuth2.0", - "required": [ - "client_id", - "client_secret", - "access_token", - "auth_method" - ], + "required": ["auth_method"], "properties": { "auth_method": { "type": "string", - "const": "access_token", - "enum": ["access_token"], - "default": "access_token", + "const": "oauth2.0", + "enum": ["oauth2.0"], + "default": "oauth2.0", "order": 0 }, "client_id": { @@ -63,7 +58,7 @@ { "title": "API Password", "type": "object", - "required": ["api_password", "auth_method"], + "required": ["auth_method", "api_password"], "properties": { "auth_method": { "type": "string", @@ -84,12 +79,57 @@ } } }, - "authSpecification": { - "auth_type": "oauth2.0", - "oauth2Specification": { - "rootObject": ["auth_method", 0], - "oauthFlowInitParameters": [["client_id"], ["client_secret"]], - "oauthFlowOutputParameters": [["access_token"]] + "advanced_auth": { + "auth_flow_type": "oauth2.0", + "predicate_key": ["credentials", "auth_method"], + "predicate_value": "oauth2.0", + "oauth_config_specification": { + "complete_oauth_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "access_token": { + "type": "string", + "path_in_connector_config": ["credentials", "access_token"] + } + } + }, + "complete_oauth_server_input_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string" + }, + "client_secret": { + "type": "string" + } + } + }, + "complete_oauth_server_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string", + "path_in_connector_config": ["credentials", "client_id"] + }, + "client_secret": { + "type": "string", + "path_in_connector_config": ["credentials", "client_secret"] + } + } + }, + "oauth_user_input_from_connector_config_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "shop": { + "type": "string", + "path_in_connector_config": ["shop"] + } + } + } } } } diff --git a/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java b/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java index 95e8f8836c9bf..cdffb1abac6f1 100644 --- a/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java +++ b/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java @@ -39,15 +39,16 @@ public JsonNode toDatabaseConfig(final JsonNode config) { .put("host", config.get("host").asText()) .put("username", config.get("username").asText()) .put("password", config.get("password").asText()) - .put("connection_properties", String.format("role=%s;warehouse=%s;database=%s;schema=%s;JDBC_QUERY_RESULT_FORMAT=%s;CLIENT_SESSION_KEEP_ALIVE=%s;", - config.get("role").asText(), - config.get("warehouse").asText(), - config.get("database").asText(), - config.get("schema").asText(), - // Needed for JDK17 - see - // https://stackoverflow.com/questions/67409650/snowflake-jdbc-driver-internal-error-fail-to-retrieve-row-count-for-first-arrow - "JSON", - true)) + .put("connection_properties", + String.format("role=%s;warehouse=%s;database=%s;schema=%s;JDBC_QUERY_RESULT_FORMAT=%s;CLIENT_SESSION_KEEP_ALIVE=%s;", + config.get("role").asText(), + config.get("warehouse").asText(), + config.get("database").asText(), + config.get("schema").asText(), + // Needed for JDK17 - see + // https://stackoverflow.com/questions/67409650/snowflake-jdbc-driver-internal-error-fail-to-retrieve-row-count-for-first-arrow + "JSON", + true)) .build()); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java index 7a549d65a3650..2c2498c132c90 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java @@ -57,6 +57,7 @@ public OAuthImplementationFactory(final ConfigRepository configRepository, final .put("airbyte/source-monday", new MondayOAuthFlow(configRepository, httpClient)) .put("airbyte/source-zendesk-sunshine", new ZendeskSunshineOAuthFlow(configRepository, httpClient)) .put("airbyte/source-mailchimp", new MailchimpOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-shopify", new ShopifyOAuthFlow(configRepository, httpClient)) .build(); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/ShopifyOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/ShopifyOAuthFlow.java new file mode 100644 index 0000000000000..1e49cfcba18d2 --- /dev/null +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/ShopifyOAuthFlow.java @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.oauth.BaseOAuth2Flow; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.http.HttpClient; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.function.Supplier; +import org.apache.http.client.utils.URIBuilder; + +public class ShopifyOAuthFlow extends BaseOAuth2Flow { + + private static final List SCOPES = Arrays.asList( + "read_themes", + "read_orders", + "read_all_orders", + "read_assigned_fulfillment_orders", + "read_checkouts", + "read_content", + "read_customers", + "read_discounts", + "read_draft_orders", + "read_fulfillments", + "read_locales", + "read_locations", + "read_price_rules", + "read_products", + "read_product_listings", + "read_shopify_payments_payouts"); + + public String getScopes() { + return String.join(",", SCOPES); + } + + public ShopifyOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient) { + super(configRepository, httpClient); + } + + @VisibleForTesting + public ShopifyOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient, final Supplier stateSupplier) { + super(configRepository, httpClient, stateSupplier); + } + + @Override + protected String formatConsentUrl(final UUID definitionId, + final String clientId, + final String redirectUrl, + final JsonNode inputOAuthConfiguration) + throws IOException { + + // getting shop value from user's config + final String shop = getConfigValueUnsafe(inputOAuthConfiguration, "shop"); + // building consent url + final URIBuilder builder = new URIBuilder() + .setScheme("https") + .setHost(shop + ".myshopify.com") + .setPath("admin/oauth/authorize") + .addParameter("client_id", clientId) + .addParameter("redirect_uri", redirectUrl) + .addParameter("state", getState()) + .addParameter("grant_options[]", "value") + .addParameter("scope", getScopes()); + + try { + return builder.build().toString(); + } catch (URISyntaxException e) { + throw new IOException("Failed to format Consent URL for OAuth flow", e); + } + } + + @Override + protected Map getAccessTokenQueryParameters(String clientId, + String clientSecret, + String authCode, + String redirectUrl) { + return ImmutableMap.builder() + .put("client_id", clientId) + .put("client_secret", clientSecret) + .put("code", authCode) + .build(); + } + + @Override + protected String getAccessTokenUrl(final JsonNode inputOAuthConfiguration) { + // getting shop value from user's config + final String shop = getConfigValueUnsafe(inputOAuthConfiguration, "shop"); + // building the access_token_url + return "https://" + shop + ".myshopify.com/admin/oauth/access_token"; + } + + @Override + protected Map extractOAuthOutput(final JsonNode data, final String accessTokenUrl) throws IOException { + final Map result = new HashMap<>(); + // getting out access_token + if (data.has("access_token")) { + result.put("access_token", data.get("access_token").asText()); + } else { + throw new IOException(String.format("Missing 'access_token' in query params from %s", accessTokenUrl)); + } + + return result; + } + +} diff --git a/docs/integrations/sources/shopify.md b/docs/integrations/sources/shopify.md index 31528619ed025..79afd2388b4bc 100644 --- a/docs/integrations/sources/shopify.md +++ b/docs/integrations/sources/shopify.md @@ -101,6 +101,7 @@ This connector support both: `OAuth 2.0` and `API PASSWORD` (for private applica | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.28 | 2022-01-19 | [9591](https://github.com/airbytehq/airbyte/pull/9591) | Implemented `OAuth2.0` authentication method for Airbyte Cloud | | 0.1.27 | 2021-12-22 | [9049](https://github.com/airbytehq/airbyte/pull/9049) | Update connector fields title/description | | 0.1.26 | 2021-12-14 | [8597](https://github.com/airbytehq/airbyte/pull/8597) | Fix `mismatched number of tables` for base-normalization, increased performance of `order_refunds` stream | | 0.1.25 | 2021-12-02 | [8297](https://github.com/airbytehq/airbyte/pull/8297) | Added Shop stream | From 97eeb2fbfa494541a2a4cc3154d99a1bec0775be Mon Sep 17 00:00:00 2001 From: Noah Kawasaki <68556134+noahkawasakigoogle@users.noreply.github.com> Date: Wed, 19 Jan 2022 17:41:32 -0800 Subject: [PATCH 162/215] Fix generate.sh from bad trap for ERR signal in by using bash instead of sh (#9243) --- .../connector-templates/generator/generate.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connector-templates/generator/generate.sh b/airbyte-integrations/connector-templates/generator/generate.sh index fabe0395be49a..b7714d5ceb278 100755 --- a/airbyte-integrations/connector-templates/generator/generate.sh +++ b/airbyte-integrations/connector-templates/generator/generate.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env sh +#!/usr/bin/env bash error_handler() { echo "While trying to generate a connector, an error occurred on line $1 of generate.sh and the process aborted early. This is probably a bug." @@ -21,10 +21,10 @@ docker build --build-arg UID="$_UID" --build-arg GID="$_GID" . -t airbyte/connec # Run the container and mount the airbyte folder if [ $# -eq 2 ]; then echo "2 arguments supplied: 1=$1 2=$2" - docker run --name airbyte-connector-bootstrap --user $_UID:$_GID -e HOME=/tmp -e package_desc="$1" -e package_name="$2" -v "$(pwd)/../../../.":/airbyte airbyte/connector-bootstrap + docker run --name airbyte-connector-bootstrap --user "$_UID:$_GID" -e HOME=/tmp -e package_desc="$1" -e package_name="$2" -v "$(pwd)/../../../.":/airbyte airbyte/connector-bootstrap else echo "Running generator..." - docker run --rm -it --name airbyte-connector-bootstrap --user $_UID:$_GID -e HOME=/tmp -v "$(pwd)/../../../.":/airbyte airbyte/connector-bootstrap + docker run --rm -it --name airbyte-connector-bootstrap --user "$_UID:$_GID" -e HOME=/tmp -v "$(pwd)/../../../.":/airbyte airbyte/connector-bootstrap fi echo "Finished running generator" From e7da9232bbe5512110bae1c8900d337835cddb28 Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Wed, 19 Jan 2022 18:16:19 -0800 Subject: [PATCH 163/215] Fix record count and add acceptance test to the new scheduler (#9487) * Add a job notification The new scheduler was missing a notification step after the job is done. This is needed in order to report the number of record of a sync. * Acceptance test with the new scheduler Add a new github action task to run the acceptances test with the new scheduler * Retry if the failure * PR comments --- .env | 4 + .github/workflows/gradle.yml | 3 + .../features/EnvVariableFeatureFlags.java | 2 +- .../airbyte/server/apis/ConfigurationApi.java | 4 + .../server/handlers/ConnectionsHandler.java | 1 + .../server/handlers/SchedulerHandler.java | 21 ++++- .../WebBackendConnectionsHandlerTest.java | 3 +- airbyte-tests/build.gradle | 3 + .../test/acceptance/AcceptanceTests.java | 71 ++++++++++++--- airbyte-workers/build.gradle | 1 + .../java/io/airbyte/workers/WorkerApp.java | 32 ++++++- .../workers/temporal/TemporalClient.java | 47 ++++++++-- .../scheduling/ConnectionManagerWorkflow.java | 3 + .../ConnectionManagerWorkflowImpl.java | 57 ++++++++++-- .../scheduling/ConnectionUpdaterInput.java | 1 + .../activities/GenerateInputActivity.java | 1 + .../activities/GenerateInputActivityImpl.java | 22 ++++- .../JobCreationAndStatusUpdateActivity.java | 16 ++++ ...obCreationAndStatusUpdateActivityImpl.java | 87 ++++++++++++++++++- .../scheduling/state/WorkflowState.java | 20 +++++ .../WorkflowStateChangedListener.java | 4 +- .../worker_run/TemporalWorkerRunFactory.java | 11 +-- .../ConnectionManagerWorkflowTest.java | 24 +++-- ...obCreationAndStatusUpdateActivityTest.java | 32 +++++-- docker-compose.yaml | 7 +- .../bin/acceptance_test_with_new_scheduler.sh | 19 ++++ 26 files changed, 437 insertions(+), 59 deletions(-) create mode 100755 tools/bin/acceptance_test_with_new_scheduler.sh diff --git a/.env b/.env index b21d58434f634..0f43c07e94ebf 100644 --- a/.env +++ b/.env @@ -88,3 +88,7 @@ MAX_SYNC_WORKERS=5 MAX_SPEC_WORKERS=5 MAX_CHECK_WORKERS=5 MAX_DISCOVER_WORKERS=5 + + +### FEATURE FLAGS ### +NEW_SCHEDULER=false diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index c5c53c331a1b9..533668cdf38a8 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -261,6 +261,9 @@ jobs: - name: Run End-to-End Acceptance Tests run: ./tools/bin/acceptance_test.sh + - name: Run End-to-End Acceptance Tests with the new scheduler + run: ./tools/bin/acceptance_test_with_new_scheduler.sh + - name: Automatic Migration Acceptance Test run: SUB_BUILD=PLATFORM ./gradlew :airbyte-tests:automaticMigrationAcceptanceTest --scan -i diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/features/EnvVariableFeatureFlags.java b/airbyte-commons/src/main/java/io/airbyte/commons/features/EnvVariableFeatureFlags.java index eb6047044a12e..b89ca268c9eba 100644 --- a/airbyte-commons/src/main/java/io/airbyte/commons/features/EnvVariableFeatureFlags.java +++ b/airbyte-commons/src/main/java/io/airbyte/commons/features/EnvVariableFeatureFlags.java @@ -8,7 +8,7 @@ public class EnvVariableFeatureFlags implements FeatureFlags { @Override public boolean usesNewScheduler() { - return System.getenv().containsKey("NEW_SCHEDULER"); + return Boolean.parseBoolean(System.getenv("NEW_SCHEDULER")); } } diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index 05ec930a8ff70..c2280bfd7438e 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -558,6 +558,10 @@ public JobInfoRead syncConnection(final ConnectionIdRequestBody connectionIdRequ @Override public JobInfoRead resetConnection(final ConnectionIdRequestBody connectionIdRequestBody) { + if (featureFlags.usesNewScheduler()) { + return execute(() -> schedulerHandler.resetConnection(connectionIdRequestBody.getConnectionId())); + } + return execute(() -> schedulerHandler.resetConnection(connectionIdRequestBody)); } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java index 0f5ffa9fb66ba..755820d9df519 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/ConnectionsHandler.java @@ -160,6 +160,7 @@ public ConnectionRead createConnection(final ConnectionCreate connectionCreate) if (featureFlags.usesNewScheduler()) { try { + LOGGER.info("Starting a connection using the new scheduler"); temporalWorkerRunFactory.createNewSchedulerWorkflow(connectionId); } catch (final Exception e) { LOGGER.error("Start of the temporal connection manager workflow failed", e); diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java index 3adf012c3e5bd..a56a2107f206f 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java @@ -347,6 +347,18 @@ public JobInfoRead syncConnection(final ConnectionIdRequestBody connectionIdRequ return jobConverter.getJobInfoRead(job); } + public JobInfoRead resetConnection(final UUID connectionId) throws IOException { + final ManualSyncSubmissionResult manualSyncSubmissionResult = temporalWorkerRunFactory.resetConnection(connectionId); + + if (manualSyncSubmissionResult.getFailingReason().isPresent()) { + throw new IllegalStateException(manualSyncSubmissionResult.getFailingReason().get()); + } + + final Job job = jobPersistence.getJob(manualSyncSubmissionResult.getJobId().get()); + + return jobConverter.getJobInfoRead(job); + } + public JobInfoRead resetConnection(final ConnectionIdRequestBody connectionIdRequestBody) throws IOException, JsonValidationException, ConfigNotFoundException { final UUID connectionId = connectionIdRequestBody.getConnectionId(); @@ -456,13 +468,14 @@ public JobInfoRead createManualRun(final UUID connectionId) throws IOException { public JobInfoRead createNewSchedulerCancellation(final Long id) throws IOException { final Job job = jobPersistence.getJob(id); - final ManualSyncSubmissionResult manualSyncSubmissionResult = temporalWorkerRunFactory.startNewCancelation(UUID.fromString(job.getScope())); + final ManualSyncSubmissionResult cancellationSubmissionResult = temporalWorkerRunFactory.startNewCancelation(UUID.fromString(job.getScope())); - if (manualSyncSubmissionResult.getFailingReason().isPresent()) { - throw new IllegalStateException(manualSyncSubmissionResult.getFailingReason().get()); + if (cancellationSubmissionResult.getFailingReason().isPresent()) { + throw new IllegalStateException(cancellationSubmissionResult.getFailingReason().get()); } - return jobConverter.getJobInfoRead(job); + final Job cancelledJob = jobPersistence.getJob(id); + return jobConverter.getJobInfoRead(cancelledJob); } } diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java index 3dcb73d2154e9..87f14345a3bc6 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java @@ -208,7 +208,8 @@ public void setup() throws IOException, JsonValidationException, ConfigNotFoundE .memoryRequest(ConnectionHelpers.TESTING_RESOURCE_REQUIREMENTS.getMemoryRequest()) .memoryLimit(ConnectionHelpers.TESTING_RESOURCE_REQUIREMENTS.getMemoryLimit())); - when(schedulerHandler.resetConnection(any())).thenReturn(new JobInfoRead().job(new JobRead().status(JobStatus.SUCCEEDED))); + when(schedulerHandler.resetConnection(any(ConnectionIdRequestBody.class))) + .thenReturn(new JobInfoRead().job(new JobRead().status(JobStatus.SUCCEEDED))); } @Test diff --git a/airbyte-tests/build.gradle b/airbyte-tests/build.gradle index 5312be73c4058..6f2a572c9bc1f 100644 --- a/airbyte-tests/build.gradle +++ b/airbyte-tests/build.gradle @@ -41,13 +41,16 @@ dependencies { acceptanceTestsImplementation project(':airbyte-api') acceptanceTestsImplementation project(':airbyte-commons') + acceptanceTestsImplementation project(':airbyte-config:models') acceptanceTestsImplementation project(':airbyte-config:persistence') acceptanceTestsImplementation project(':airbyte-db:lib') acceptanceTestsImplementation project(':airbyte-tests') acceptanceTestsImplementation project(':airbyte-test-utils') + acceptanceTestsImplementation project(':airbyte-workers') acceptanceTestsImplementation 'com.fasterxml.jackson.core:jackson-databind' acceptanceTestsImplementation 'io.github.cdimascio:java-dotenv:3.0.0' + acceptanceTestsImplementation 'io.temporal:temporal-sdk:1.6.0' acceptanceTestsImplementation 'org.apache.commons:commons-csv:1.4' acceptanceTestsImplementation 'org.testcontainers:postgresql:1.15.3' acceptanceTestsImplementation 'org.postgresql:postgresql:42.2.18' diff --git a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java index 4ac7009144a8e..e873d488e2082 100644 --- a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java +++ b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java @@ -68,6 +68,8 @@ import io.airbyte.api.client.model.SourceIdRequestBody; import io.airbyte.api.client.model.SourceRead; import io.airbyte.api.client.model.SyncMode; +import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.MoreBooleans; import io.airbyte.commons.resources.MoreResources; @@ -171,6 +173,8 @@ public class AcceptanceTests { private List destinationIds; private List operationIds; + private static FeatureFlags featureFlags; + @SuppressWarnings("UnstableApiUsage") @BeforeAll public static void init() throws URISyntaxException, IOException, InterruptedException { @@ -203,6 +207,8 @@ public static void init() throws URISyntaxException, IOException, InterruptedExc } else { LOGGER.info("Using external deployment of airbyte."); } + + featureFlags = new EnvVariableFeatureFlags(); } @AfterAll @@ -467,7 +473,10 @@ public void testManualSync() throws Exception { catalog.getStreams().forEach(s -> s.getConfig().syncMode(syncMode).destinationSyncMode(destinationSyncMode)); final UUID connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); - + // Avoid Race condition with the new scheduler + if (featureFlags.usesNewScheduler()) { + waitForTemporalWorkflow(connectionId); + } final JobInfoRead connectionSyncRead = apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); waitForSuccessfulJob(apiClient.getJobsApi(), connectionSyncRead.getJob()); assertSourceAndDestinationDbInSync(false); @@ -486,7 +495,10 @@ public void testCancelSync() throws Exception { catalog.getStreams().forEach(s -> s.getConfig().syncMode(syncMode).destinationSyncMode(destinationSyncMode)); final UUID connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); - + // Avoid Race condition with the new scheduler + if (featureFlags.usesNewScheduler()) { + waitForTemporalWorkflow(connectionId); + } final JobInfoRead connectionSyncRead = apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); waitForJob(apiClient.getJobsApi(), connectionSyncRead.getJob(), Set.of(JobStatus.PENDING)); @@ -519,7 +531,10 @@ public void testIncrementalSync() throws Exception { .destinationSyncMode(destinationSyncMode)); final UUID connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); - + // Avoid Race condition with the new scheduler + if (featureFlags.usesNewScheduler()) { + waitForTemporalWorkflow(connectionId); + } LOGGER.info("Beginning testIncrementalSync() sync 1"); final JobInfoRead connectionSyncRead1 = apiClient.getConnectionApi() .syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); @@ -550,21 +565,31 @@ public void testIncrementalSync() throws Exception { assertRawDestinationContains(expectedRecords, new SchemaTableNamePair("public", STREAM_NAME)); // reset back to no data. + LOGGER.info("Starting testIncrementalSync() reset"); final JobInfoRead jobInfoRead = apiClient.getConnectionApi().resetConnection(new ConnectionIdRequestBody().connectionId(connectionId)); - waitForSuccessfulJob(apiClient.getJobsApi(), jobInfoRead.getJob()); + FeatureFlags featureFlags = new EnvVariableFeatureFlags(); + if (featureFlags.usesNewScheduler()) { + waitForJob(apiClient.getJobsApi(), jobInfoRead.getJob(), + Sets.newHashSet(JobStatus.PENDING, JobStatus.RUNNING, JobStatus.INCOMPLETE, JobStatus.FAILED)); + } else { + waitForSuccessfulJob(apiClient.getJobsApi(), jobInfoRead.getJob()); + } + LOGGER.info("state after reset: {}", apiClient.getConnectionApi().getState(new ConnectionIdRequestBody().connectionId(connectionId))); - assertRawDestinationContains(Collections.emptyList(), new SchemaTableNamePair("public", STREAM_NAME)); + assertRawDestinationContains(Collections.emptyList(), new SchemaTableNamePair("public", + STREAM_NAME)); // sync one more time. verify it is the equivalent of a full refresh. LOGGER.info("Starting testIncrementalSync() sync 3"); - final JobInfoRead connectionSyncRead3 = apiClient.getConnectionApi() - .syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); + final JobInfoRead connectionSyncRead3 = + apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); waitForSuccessfulJob(apiClient.getJobsApi(), connectionSyncRead3.getJob()); LOGGER.info("state after sync 3: {}", apiClient.getConnectionApi().getState(new ConnectionIdRequestBody().connectionId(connectionId))); assertSourceAndDestinationDbInSync(false); + } @Test @@ -613,7 +638,10 @@ public void testMultipleSchemasAndTablesSync() throws Exception { catalog.getStreams().forEach(s -> s.getConfig().syncMode(syncMode).destinationSyncMode(destinationSyncMode)); final UUID connectionId = createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); - + // Avoid Race condition with the new scheduler + if (featureFlags.usesNewScheduler()) { + waitForTemporalWorkflow(connectionId); + } final JobInfoRead connectionSyncRead = apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); waitForSuccessfulJob(apiClient.getJobsApi(), connectionSyncRead.getJob()); assertSourceAndDestinationDbInSync(false); @@ -743,7 +771,10 @@ public void testCheckpointing() throws Exception { .destinationSyncMode(destinationSyncMode)); final UUID connectionId = createConnection(connectionName, sourceId, destinationId, Collections.emptyList(), catalog, null).getConnectionId(); - + // Avoid Race condition with the new scheduler + if (featureFlags.usesNewScheduler()) { + waitForTemporalWorkflow(connectionId); + } final JobInfoRead connectionSyncRead1 = apiClient.getConnectionApi() .syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); @@ -834,7 +865,10 @@ public void testBackpressure() throws Exception { final UUID connectionId = createConnection(connectionName, sourceId, destinationId, Collections.emptyList(), catalog, null) .getConnectionId(); - + // Avoid Race condition with the new scheduler + if (featureFlags.usesNewScheduler()) { + waitForTemporalWorkflow(connectionId); + } final JobInfoRead connectionSyncRead1 = apiClient.getConnectionApi() .syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); @@ -912,6 +946,10 @@ public void testFailureTimeout() throws Exception { final UUID connectionId = createConnection(connectionName, sourceId, destinationId, Collections.emptyList(), catalog, null) .getConnectionId(); + // Avoid Race condition with the new scheduler + if (featureFlags.usesNewScheduler()) { + waitForTemporalWorkflow(connectionId); + } final JobInfoRead connectionSyncRead1 = apiClient.getConnectionApi() .syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); @@ -1315,4 +1353,17 @@ public enum Type { DESTINATION } + private static void waitForTemporalWorkflow(final UUID connectionId) { + /* + * do { try { Thread.sleep(1000); } catch (InterruptedException e) { throw new RuntimeException(e); + * } } while + * (temporalClient.isWorkflowRunning(temporalClient.getConnectionManagerName(connectionId))); + */ + try { + Thread.sleep(10 * 1000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index 42ac0bd19ee1e..1b097f48fe996 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -21,6 +21,7 @@ dependencies { implementation project(':airbyte-analytics') implementation project(':airbyte-api') + implementation project(':airbyte-commons-docker') implementation project(':airbyte-config:models') implementation project(':airbyte-config:persistence') implementation project(':airbyte-db:lib') diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java index 5057e5463c051..5bf166662218f 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java @@ -4,6 +4,7 @@ package io.airbyte.workers; +import io.airbyte.analytics.Deployment; import io.airbyte.analytics.TrackingClient; import io.airbyte.analytics.TrackingClientSingleton; import io.airbyte.commons.features.EnvVariableFeatureFlags; @@ -24,11 +25,14 @@ import io.airbyte.db.instance.jobs.JobsDatabaseInstance; import io.airbyte.scheduler.persistence.DefaultJobCreator; import io.airbyte.scheduler.persistence.DefaultJobPersistence; +import io.airbyte.scheduler.persistence.JobCreator; +import io.airbyte.scheduler.persistence.JobNotifier; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.scheduler.persistence.WorkspaceHelper; import io.airbyte.scheduler.persistence.job_factory.DefaultSyncJobFactory; import io.airbyte.scheduler.persistence.job_factory.OAuthConfigSupplier; import io.airbyte.scheduler.persistence.job_factory.SyncJobFactory; +import io.airbyte.scheduler.persistence.job_tracker.JobTracker; import io.airbyte.workers.helper.ConnectionHelper; import io.airbyte.workers.process.DockerProcessFactory; import io.airbyte.workers.process.KubePortManagerSingleton; @@ -100,6 +104,8 @@ public class WorkerApp { private final Configs configs; private final ConnectionHelper connectionHelper; private final boolean containerOrchestratorEnabled; + private final JobNotifier jobNotifier; + private final JobTracker jobTracker; public void start() { final Map mdc = MDC.getCopyOfContextMap(); @@ -187,6 +193,8 @@ public void start() { syncWorker.registerActivitiesImplementations(replicationActivity, normalizationActivity, dbtTransformationActivity, persistStateActivity); + final JobCreator jobCreator = new DefaultJobCreator(jobPersistence, configRepository); + final Worker connectionUpdaterWorker = factory.newWorker(TemporalJobType.CONNECTION_UPDATER.toString(), getWorkerOptions(maxWorkers.getMaxSyncWorkers())); connectionUpdaterWorker.registerWorkflowImplementationTypes(ConnectionManagerWorkflowImpl.class, SyncWorkflowImpl.class); @@ -198,7 +206,11 @@ public void start() { jobPersistence, temporalWorkerRunFactory, workerEnvironment, - logConfigs), + logConfigs, + jobNotifier, + jobTracker, + configRepository, + jobCreator), new ConfigFetchActivityImpl(configRepository, jobPersistence, configs, () -> Instant.now().getEpochSecond()), new ConnectionDeletionActivityImpl(connectionHelper), replicationActivity, @@ -345,6 +357,12 @@ public static void main(final String[] args) throws IOException, InterruptedExce .getInitialized(); final JobPersistence jobPersistence = new DefaultJobPersistence(jobDatabase); + TrackingClientSingleton.initialize( + configs.getTrackingStrategy(), + new Deployment(configs.getDeploymentMode(), jobPersistence.getDeployment().orElseThrow(), configs.getWorkerEnvironment()), + configs.getAirbyteRole(), + configs.getAirbyteVersion(), + configRepository); final TrackingClient trackingClient = TrackingClientSingleton.get(); final SyncJobFactory jobFactory = new DefaultSyncJobFactory( new DefaultJobCreator(jobPersistence, configRepository), @@ -372,6 +390,14 @@ public static void main(final String[] args) throws IOException, InterruptedExce workspaceHelper, workerConfigs); + final JobNotifier jobNotifier = new JobNotifier( + configs.getWebappUrl(), + configRepository, + workspaceHelper, + TrackingClientSingleton.get()); + + final JobTracker jobTracker = new JobTracker(configRepository, jobPersistence, trackingClient); + new WorkerApp( workspaceRoot, jobProcessFactory, @@ -392,7 +418,9 @@ public static void main(final String[] args) throws IOException, InterruptedExce temporalWorkerRunFactory, configs, connectionHelper, - configs.getContainerOrchestratorEnabled()).start(); + configs.getContainerOrchestratorEnabled(), + jobNotifier, + jobTracker).start(); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java index d6d116a4eec99..f2ce3fd83e06b 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java @@ -216,12 +216,10 @@ public void submitConnectionUpdaterAsync(final UUID connectionId) { final ConnectionManagerWorkflow connectionManagerWorkflow = getWorkflowOptionsWithWorkflowId(ConnectionManagerWorkflow.class, TemporalJobType.CONNECTION_UPDATER, getConnectionManagerName(connectionId)); final BatchRequest signalRequest = client.newSignalWithStartRequest(); - final ConnectionUpdaterInput input = new ConnectionUpdaterInput(connectionId, null, null, false, 1, null); + final ConnectionUpdaterInput input = new ConnectionUpdaterInput(connectionId, null, null, false, 1, null, false); signalRequest.add(connectionManagerWorkflow::run, input); WorkflowClient.start(connectionManagerWorkflow::run, input); - - log.info("Scheduler temporal wf started"); } public void deleteConnection(final UUID connectionId) { @@ -300,6 +298,7 @@ public ManualSyncSubmissionResult startNewCancelation(final UUID connectionId) { final boolean isWorflowRunning = isWorkflowRunning(getConnectionManagerName(connectionId)); if (!isWorflowRunning) { + log.error("Can't cancel a non running workflow"); return new ManualSyncSubmissionResult( Optional.of("No scheduler workflow is running for: " + connectionId), Optional.empty()); @@ -320,7 +319,45 @@ public ManualSyncSubmissionResult startNewCancelation(final UUID connectionId) { } } while (connectionManagerWorkflow.getState().isRunning()); - log.info("end of manual schedule"); + log.info("end of manual cancellation"); + + final long jobId = connectionManagerWorkflow.getJobInformation().getJobId(); + + return new ManualSyncSubmissionResult( + Optional.empty(), + Optional.of(jobId)); + } + + public ManualSyncSubmissionResult resetConnection(final UUID connectionId) { + log.info("reset sync request"); + + final boolean isWorflowRunning = isWorkflowRunning(getConnectionManagerName(connectionId)); + + if (!isWorflowRunning) { + log.error("Can't reset a non running workflow"); + return new ManualSyncSubmissionResult( + Optional.of("No scheduler workflow is running for: " + connectionId), + Optional.empty()); + } + + final ConnectionManagerWorkflow connectionManagerWorkflow = + getExistingWorkflow(ConnectionManagerWorkflow.class, getConnectionManagerName(connectionId)); + + final long oldJobId = connectionManagerWorkflow.getJobInformation().getJobId(); + + connectionManagerWorkflow.resetConnection(); + + do { + try { + Thread.sleep(DELAY_BETWEEN_QUERY_MS); + } catch (final InterruptedException e) { + return new ManualSyncSubmissionResult( + Optional.of("Didn't manage to reset a sync for: " + connectionId), + Optional.empty()); + } + } while (connectionManagerWorkflow.getJobInformation().getJobId() == oldJobId); + + log.info("end of reset"); final long jobId = connectionManagerWorkflow.getJobInformation().getJobId(); @@ -405,7 +442,7 @@ public boolean isWorkflowRunning(final String workflowName) { } @VisibleForTesting - static String getConnectionManagerName(final UUID connectionId) { + public static String getConnectionManagerName(final UUID connectionId) { return "connection_manager_" + connectionId; } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflow.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflow.java index 41b8c4131b7a1..098e0c580e28e 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflow.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflow.java @@ -52,6 +52,9 @@ public interface ConnectionManagerWorkflow { @SignalMethod void connectionUpdated(); + @SignalMethod + void resetConnection(); + /** * Return the current state of the workflow. */ diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java index d65046e3b9f2a..058bb927188d1 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java @@ -4,6 +4,9 @@ package io.airbyte.workers.temporal.scheduling; +import io.airbyte.config.StandardSyncOutput; +import io.airbyte.config.StandardSyncSummary; +import io.airbyte.config.StandardSyncSummary.ReplicationStatus; import io.airbyte.workers.temporal.TemporalJobType; import io.airbyte.workers.temporal.exception.RetryableException; import io.airbyte.workers.temporal.scheduling.activities.ConfigFetchActivity; @@ -23,6 +26,7 @@ import io.airbyte.workers.temporal.scheduling.activities.JobCreationAndStatusUpdateActivity.JobCreationOutput; import io.airbyte.workers.temporal.scheduling.activities.JobCreationAndStatusUpdateActivity.JobFailureInput; import io.airbyte.workers.temporal.scheduling.activities.JobCreationAndStatusUpdateActivity.JobSuccessInput; +import io.airbyte.workers.temporal.scheduling.activities.JobCreationAndStatusUpdateActivity.ReportJobStartInput; import io.airbyte.workers.temporal.scheduling.shared.ActivityConfiguration; import io.airbyte.workers.temporal.scheduling.state.WorkflowState; import io.airbyte.workers.temporal.scheduling.state.listener.NoopStateListener; @@ -49,6 +53,8 @@ public class ConnectionManagerWorkflowImpl implements ConnectionManagerWorkflow Optional maybeJobId = Optional.empty(); Optional maybeAttemptId = Optional.empty(); + Optional standardSyncOutput = Optional.empty(); + private final GenerateInputActivity getSyncInputActivity = Workflow.newActivityStub(GenerateInputActivity.class, ActivityConfiguration.OPTIONS); private final JobCreationAndStatusUpdateActivity jobCreationAndStatusUpdateActivity = Workflow.newActivityStub(JobCreationAndStatusUpdateActivity.class, ActivityConfiguration.OPTIONS); @@ -78,20 +84,26 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr // Job and attempt creation maybeJobId = Optional.ofNullable(connectionUpdaterInput.getJobId()).or(() -> { final JobCreationOutput jobCreationOutput = jobCreationAndStatusUpdateActivity.createNewJob(new JobCreationInput( - connectionUpdaterInput.getConnectionId())); + connectionUpdaterInput.getConnectionId(), workflowState.isResetConnection())); + connectionUpdaterInput.setJobId(jobCreationOutput.getJobId()); return Optional.ofNullable(jobCreationOutput.getJobId()); }); maybeAttemptId = Optional.ofNullable(connectionUpdaterInput.getAttemptId()).or(() -> maybeJobId.map(jobId -> { final AttemptCreationOutput attemptCreationOutput = jobCreationAndStatusUpdateActivity.createNewAttempt(new AttemptCreationInput( jobId)); + connectionUpdaterInput.setAttemptId(attemptCreationOutput.getAttemptId()); return attemptCreationOutput.getAttemptId(); })); // Sync workflow final SyncInput getSyncInputActivitySyncInput = new SyncInput( maybeAttemptId.get(), - maybeJobId.get()); + maybeJobId.get(), + workflowState.isResetConnection()); + + jobCreationAndStatusUpdateActivity.reportJobStart(new ReportJobStartInput( + maybeJobId.get())); final SyncOutput syncWorkflowInputs = getSyncInputActivity.getSyncWorkflowInput(getSyncInputActivitySyncInput); @@ -108,12 +120,18 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr final UUID connectionId = connectionUpdaterInput.getConnectionId(); try { - childSync.run( + standardSyncOutput = Optional.ofNullable(childSync.run( syncWorkflowInputs.getJobRunConfig(), syncWorkflowInputs.getSourceLauncherConfig(), syncWorkflowInputs.getDestinationLauncherConfig(), syncWorkflowInputs.getSyncInput(), - connectionId); + connectionId)); + + StandardSyncSummary standardSyncSummary = standardSyncOutput.get().getStandardSyncSummary(); + + if (standardSyncSummary != null && standardSyncSummary.getStatus() == ReplicationStatus.FAILED) { + workflowState.setFailed(true); + } } catch (final ChildWorkflowFailure childWorkflowFailure) { if (!(childWorkflowFailure.getCause() instanceof CanceledFailure)) { throw childWorkflowFailure; @@ -128,6 +146,16 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr // The naming is very misleading, it is not a failure but the expected behavior... } + if (workflowState.isResetConnection()) { + connectionUpdaterInput.setResetConnection(true); + connectionUpdaterInput.setJobId(null); + connectionUpdaterInput.setAttemptNumber(1); + connectionUpdaterInput.setFromFailure(false); + connectionUpdaterInput.setAttemptId(null); + } else { + connectionUpdaterInput.setResetConnection(false); + } + if (workflowState.isUpdated()) { log.error("A connection configuration has changed for the connection {}. The job will be restarted", connectionUpdaterInput.getConnectionId()); @@ -139,6 +167,8 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr } else if (workflowState.isCancelled()) { jobCreationAndStatusUpdateActivity.jobCancelled(new JobCancelledInput( maybeJobId.get())); + } else if (workflowState.isFailed()) { + reportFailure(connectionUpdaterInput); } else { // report success reportSuccess(connectionUpdaterInput); @@ -155,7 +185,8 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr private void reportSuccess(final ConnectionUpdaterInput connectionUpdaterInput) { jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput( maybeJobId.get(), - maybeAttemptId.get())); + maybeAttemptId.get(), + standardSyncOutput.orElse(null))); connectionUpdaterInput.setJobId(null); connectionUpdaterInput.setAttemptNumber(1); @@ -176,7 +207,8 @@ private void reportFailure(final ConnectionUpdaterInput connectionUpdaterInput) connectionUpdaterInput.setFromFailure(true); } else { jobCreationAndStatusUpdateActivity.jobFailure(new JobFailureInput( - connectionUpdaterInput.getJobId())); + connectionUpdaterInput.getJobId(), + "Job failed after too many retries")); Workflow.await(Duration.ofMinutes(1), () -> skipScheduling()); @@ -217,6 +249,14 @@ public void connectionUpdated() { workflowState.setUpdated(true); } + @Override + public void resetConnection() { + if (!workflowState.isRunning()) { + cancelJob(); + } + workflowState.setResetConnection(true); + } + @Override public WorkflowState getState() { return workflowState; @@ -230,14 +270,15 @@ public JobInformation getJobInformation() { } private Boolean skipScheduling() { - return workflowState.isSkipScheduling() || workflowState.isDeleted() || workflowState.isUpdated(); + return workflowState.isSkipScheduling() || workflowState.isDeleted() || workflowState.isUpdated() || workflowState.isResetConnection(); } private void continueAsNew(final ConnectionUpdaterInput connectionUpdaterInput) { // Continue the workflow as new connectionUpdaterInput.setAttemptId(null); + boolean isDeleted = workflowState.isDeleted(); workflowState.reset(); - if (!workflowState.isDeleted()) { + if (!isDeleted) { Workflow.continueAsNew(connectionUpdaterInput); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionUpdaterInput.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionUpdaterInput.java index e556fbcff6d4f..b2c0dc701c749 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionUpdaterInput.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionUpdaterInput.java @@ -27,5 +27,6 @@ public class ConnectionUpdaterInput { private int attemptNumber; @Nullable private WorkflowState workflowState; + private boolean resetConnection; } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/GenerateInputActivity.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/GenerateInputActivity.java index c8b023dfffef2..75e5cf0d18b38 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/GenerateInputActivity.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/GenerateInputActivity.java @@ -23,6 +23,7 @@ class SyncInput { private int attemptId; private long jobId; + private boolean reset; } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/GenerateInputActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/GenerateInputActivityImpl.java index 8c1fe1ecb67fd..6704b33810a37 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/GenerateInputActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/GenerateInputActivityImpl.java @@ -4,11 +4,15 @@ package io.airbyte.workers.temporal.scheduling.activities; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.JobResetConnectionConfig; import io.airbyte.config.JobSyncConfig; import io.airbyte.config.StandardSyncInput; import io.airbyte.scheduler.models.IntegrationLauncherConfig; +import io.airbyte.scheduler.models.Job; import io.airbyte.scheduler.models.JobRunConfig; import io.airbyte.scheduler.persistence.JobPersistence; +import io.airbyte.workers.WorkerConstants; import io.airbyte.workers.temporal.TemporalUtils; import io.airbyte.workers.temporal.exception.RetryableException; import lombok.AllArgsConstructor; @@ -23,7 +27,22 @@ public SyncOutput getSyncWorkflowInput(final SyncInput input) { try { final long jobId = input.getJobId(); final int attempt = input.getAttemptId(); - final JobSyncConfig config = jobPersistence.getJob(jobId).getConfig().getSync(); + final Job job = jobPersistence.getJob(jobId); + JobSyncConfig config = job.getConfig().getSync(); + if (input.isReset()) { + final JobResetConnectionConfig resetConnection = job.getConfig().getResetConnection(); + config = new JobSyncConfig() + .withNamespaceDefinition(resetConnection.getNamespaceDefinition()) + .withNamespaceFormat(resetConnection.getNamespaceFormat()) + .withPrefix(resetConnection.getPrefix()) + .withSourceDockerImage(WorkerConstants.RESET_JOB_SOURCE_DOCKER_IMAGE_STUB) + .withDestinationDockerImage(resetConnection.getDestinationDockerImage()) + .withSourceConfiguration(Jsons.emptyObject()) + .withDestinationConfiguration(resetConnection.getDestinationConfiguration()) + .withConfiguredAirbyteCatalog(resetConnection.getConfiguredAirbyteCatalog()) + .withOperationSequence(resetConnection.getOperationSequence()) + .withResourceRequirements(resetConnection.getResourceRequirements()); + } final JobRunConfig jobRunConfig = TemporalUtils.createJobRunConfig(jobId, attempt); @@ -49,6 +68,7 @@ public SyncOutput getSyncWorkflowInput(final SyncInput input) { .withResourceRequirements(config.getResourceRequirements()); return new SyncOutput(jobRunConfig, sourceLauncherConfig, destinationLauncherConfig, syncInput); + } catch (final Exception e) { throw new RetryableException(e); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java index 44e70b17aba3d..385f952eb56a8 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java @@ -4,6 +4,7 @@ package io.airbyte.workers.temporal.scheduling.activities; +import io.airbyte.config.StandardSyncOutput; import io.airbyte.workers.temporal.exception.RetryableException; import io.temporal.activity.ActivityInterface; import io.temporal.activity.ActivityMethod; @@ -21,6 +22,7 @@ public interface JobCreationAndStatusUpdateActivity { class JobCreationInput { private UUID connectionId; + private boolean reset; } @@ -76,6 +78,7 @@ class JobSuccessInput { private long jobId; private int attemptId; + private StandardSyncOutput standardSyncOutput; } @@ -91,6 +94,7 @@ class JobSuccessInput { class JobFailureInput { private long jobId; + private String reason; } @@ -131,4 +135,16 @@ class JobCancelledInput { @ActivityMethod void jobCancelled(JobCancelledInput input); + @Data + @NoArgsConstructor + @AllArgsConstructor + class ReportJobStartInput { + + private long jobId; + + } + + @ActivityMethod + void reportJobStart(ReportJobStartInput reportJobStartInput); + } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java index f91fffdbdb675..bfd122a8d8089 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java @@ -4,17 +4,35 @@ package io.airbyte.workers.temporal.scheduling.activities; +import com.google.common.collect.Lists; +import io.airbyte.commons.docker.DockerUtils; +import io.airbyte.commons.enums.Enums; import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.DestinationConnection; +import io.airbyte.config.JobOutput; +import io.airbyte.config.StandardDestinationDefinition; +import io.airbyte.config.StandardSync; +import io.airbyte.config.StandardSyncOperation; import io.airbyte.config.helpers.LogClientSingleton; import io.airbyte.config.helpers.LogConfigs; +import io.airbyte.config.persistence.ConfigNotFoundException; +import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.scheduler.models.Job; +import io.airbyte.scheduler.persistence.JobCreator; +import io.airbyte.scheduler.persistence.JobNotifier; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.scheduler.persistence.job_factory.SyncJobFactory; +import io.airbyte.scheduler.persistence.job_tracker.JobTracker; +import io.airbyte.scheduler.persistence.job_tracker.JobTracker.JobState; +import io.airbyte.validation.json.JsonValidationException; +import io.airbyte.workers.JobStatus; import io.airbyte.workers.temporal.exception.RetryableException; import io.airbyte.workers.worker_run.TemporalWorkerRunFactory; import io.airbyte.workers.worker_run.WorkerRun; import java.io.IOException; import java.nio.file.Path; +import java.util.List; +import java.util.Optional; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -27,14 +45,47 @@ public class JobCreationAndStatusUpdateActivityImpl implements JobCreationAndSta private final TemporalWorkerRunFactory temporalWorkerRunFactory; private final WorkerEnvironment workerEnvironment; private final LogConfigs logConfigs; + private final JobNotifier jobNotifier; + private final JobTracker jobTracker; + private final ConfigRepository configRepository; + private final JobCreator jobCreator; @Override public JobCreationOutput createNewJob(final JobCreationInput input) { - final long jobId = jobFactory.create(input.getConnectionId()); + try { + if (input.isReset()) { + final StandardSync standardSync = configRepository.getStandardSync(input.getConnectionId()); + + final DestinationConnection destination = configRepository.getDestinationConnection(standardSync.getDestinationId()); + + final StandardDestinationDefinition destinationDef = + configRepository.getStandardDestinationDefinition(destination.getDestinationDefinitionId()); + final String destinationImageName = DockerUtils.getTaggedImageName(destinationDef.getDockerRepository(), destinationDef.getDockerImageTag()); + + final List standardSyncOperations = Lists.newArrayList(); + for (final var operationId : standardSync.getOperationIds()) { + final StandardSyncOperation standardSyncOperation = configRepository.getStandardSyncOperation(operationId); + standardSyncOperations.add(standardSyncOperation); + } + + final Optional jobIdOptional = + jobCreator.createResetConnectionJob(destination, standardSync, destinationImageName, standardSyncOperations); - log.info("New job created, with id: " + jobId); + final long jobId = jobIdOptional.isEmpty() + ? jobPersistence.getLastReplicationJob(standardSync.getConnectionId()).orElseThrow(() -> new RuntimeException("No job available")).getId() + : jobIdOptional.get(); - return new JobCreationOutput(jobId); + return new JobCreationOutput(jobId); + } else { + final long jobId = jobFactory.create(input.getConnectionId()); + + log.info("New job created, with id: " + jobId); + + return new JobCreationOutput(jobId); + } + } catch (JsonValidationException | ConfigNotFoundException | IOException e) { + throw new RetryableException(e); + } } @Override @@ -57,7 +108,16 @@ public AttemptCreationOutput createNewAttempt(final AttemptCreationInput input) @Override public void jobSuccess(final JobSuccessInput input) { try { + if (input.getStandardSyncOutput() != null) { + final JobOutput jobOutput = new JobOutput().withSync(input.getStandardSyncOutput()); + jobPersistence.writeOutput(input.getJobId(), input.getAttemptId(), jobOutput); + } else { + log.warn("The job {} doesn't have an input for the attempt {}", input.getJobId(), input.getAttemptId()); + } jobPersistence.succeedAttempt(input.getJobId(), input.getAttemptId()); + final Job job = jobPersistence.getJob(input.getJobId()); + jobNotifier.successJob(job); + trackCompletion(job, JobStatus.SUCCEEDED); } catch (final IOException e) { throw new RetryableException(e); } @@ -67,6 +127,9 @@ public void jobSuccess(final JobSuccessInput input) { public void jobFailure(final JobFailureInput input) { try { jobPersistence.failJob(input.getJobId()); + final Job job = jobPersistence.getJob(input.getJobId()); + jobNotifier.failJob(input.getReason(), job); + trackCompletion(job, JobStatus.FAILED); } catch (final IOException e) { throw new RetryableException(e); } @@ -76,6 +139,7 @@ public void jobFailure(final JobFailureInput input) { public void attemptFailure(final AttemptFailureInput input) { try { jobPersistence.failAttempt(input.getJobId(), input.getAttemptId()); + final Job job = jobPersistence.getJob(input.getJobId()); } catch (final IOException e) { throw new RetryableException(e); } @@ -85,9 +149,26 @@ public void attemptFailure(final AttemptFailureInput input) { public void jobCancelled(final JobCancelledInput input) { try { jobPersistence.cancelJob(input.getJobId()); + final Job job = jobPersistence.getJob(input.getJobId()); + trackCompletion(job, JobStatus.FAILED); + jobNotifier.failJob("Job was cancelled", job); } catch (final IOException e) { throw new RetryableException(e); } } + @Override + public void reportJobStart(final ReportJobStartInput input) { + try { + final Job job = jobPersistence.getJob(input.getJobId()); + jobTracker.trackSync(job, JobState.STARTED); + } catch (final IOException e) { + throw new RetryableException(e); + } + } + + private void trackCompletion(final Job job, final io.airbyte.workers.JobStatus status) { + jobTracker.trackSync(job, Enums.convertTo(status, JobState.class)); + } + } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/state/WorkflowState.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/state/WorkflowState.java index c5b0242c62372..496c68b3f5c04 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/state/WorkflowState.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/state/WorkflowState.java @@ -27,6 +27,8 @@ public WorkflowState(final UUID id, final WorkflowStateChangedListener stateChan private boolean skipScheduling = false; private boolean updated = false; private boolean cancelled = false; + private boolean failed = false; + private boolean resetConnection = false; public void setRunning(final boolean running) { final ChangedStateEvent event = new ChangedStateEvent( @@ -68,12 +70,30 @@ public void setCancelled(final boolean cancelled) { this.cancelled = cancelled; } + public void setFailed(final boolean failed) { + final ChangedStateEvent event = new ChangedStateEvent( + StateField.FAILED, + failed); + stateChangedListener.addEvent(id, event); + this.failed = failed; + } + + public void setResetConnection(final boolean resetConnection) { + final ChangedStateEvent event = new ChangedStateEvent( + StateField.RESET, + resetConnection); + stateChangedListener.addEvent(id, event); + this.resetConnection = resetConnection; + } + public void reset() { this.setRunning(false); this.setDeleted(false); this.setSkipScheduling(false); this.setUpdated(false); this.setCancelled(false); + this.setFailed(false); + this.setResetConnection(false); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/state/listener/WorkflowStateChangedListener.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/state/listener/WorkflowStateChangedListener.java index 0e7301e975335..b84be34603ce9 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/state/listener/WorkflowStateChangedListener.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/state/listener/WorkflowStateChangedListener.java @@ -27,7 +27,9 @@ enum StateField { DELETED, RUNNING, SKIPPED_SCHEDULING, - UPDATED + UPDATED, + FAILED, + RESET } @Value diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/worker_run/TemporalWorkerRunFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/worker_run/TemporalWorkerRunFactory.java index af68888144044..7fb2fbe3b49bb 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/worker_run/TemporalWorkerRunFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/worker_run/TemporalWorkerRunFactory.java @@ -59,6 +59,10 @@ public ManualSyncSubmissionResult startNewCancelation(final UUID connectionId) { return temporalClient.startNewCancelation(connectionId); } + public ManualSyncSubmissionResult resetConnection(final UUID connectionId) { + return temporalClient.resetConnection(connectionId); + } + public void deleteConnection(final UUID connectionId) { temporalClient.deleteConnection(connectionId); } @@ -72,12 +76,6 @@ public CheckedSupplier, Exception> createSupplier(fin final UUID connectionId = UUID.fromString(job.getScope()); return switch (job.getConfigType()) { case SYNC -> () -> { - - if (featureFlags.usesNewScheduler()) { - temporalClient.submitConnectionUpdaterAsync(connectionId); - - return toOutputAndStatusConnector(); - } final TemporalResponse output = temporalClient.submitSync(job.getId(), attemptId, job.getConfig().getSync(), connectionId); return toOutputAndStatus(output); @@ -96,7 +94,6 @@ public CheckedSupplier, Exception> createSupplier(fin .withOperationSequence(resetConnection.getOperationSequence()) .withResourceRequirements(resetConnection.getResourceRequirements()); - // TODO: Signal method? final TemporalResponse output = temporalClient.submitSync(job.getId(), attemptId, config, connectionId); return toOutputAndStatus(output); }; diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java index 33a70b82e8e86..1266b48332c9d 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java @@ -124,7 +124,8 @@ public void runSuccess() { 1, false, 1, - workflowState); + workflowState, + false); WorkflowClient.start(workflow::run, input); testEnv.sleep(Duration.ofSeconds(124L)); @@ -156,7 +157,8 @@ public void retryAfterFail() { 1, true, 1, - workflowState); + workflowState, + false); WorkflowClient.start(workflow::run, input); testEnv.sleep(Duration.ofSeconds(50L)); @@ -187,7 +189,8 @@ public void manualRun() { 1, false, 1, - workflowState); + workflowState, + false); WorkflowClient.start(workflow::run, input); testEnv.sleep(Duration.ofSeconds(30L)); @@ -228,7 +231,8 @@ public void updatedSignalRecieved() { 1, false, 1, - workflowState); + workflowState, + false); WorkflowClient.start(workflow::run, input); testEnv.sleep(Duration.ofSeconds(30L)); @@ -269,7 +273,8 @@ public void cancelNonRunning() { 1, false, 1, - workflowState); + workflowState, + false); WorkflowClient.start(workflow::run, input); testEnv.sleep(Duration.ofSeconds(30L)); @@ -310,7 +315,8 @@ public void deleteSync() { 1, false, 1, - workflowState); + workflowState, + false); WorkflowClient.start(workflow::run, input); testEnv.sleep(Duration.ofSeconds(30L)); @@ -378,7 +384,8 @@ public void manualRun() { 1, false, 1, - workflowState); + workflowState, + false); WorkflowClient.start(workflow::run, input); testEnv.sleep(Duration.ofMinutes(2L)); @@ -407,7 +414,8 @@ public void cancelRunning() { 1, false, 1, - workflowState); + workflowState, + false); WorkflowClient.start(workflow::run, input); workflow.submitManualSync(); diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java index 05a280574aac1..6cb059dee1187 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java @@ -5,11 +5,18 @@ package io.airbyte.workers.temporal.scheduling.activities; import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.JobOutput; +import io.airbyte.config.StandardSyncOutput; +import io.airbyte.config.StandardSyncSummary; +import io.airbyte.config.StandardSyncSummary.ReplicationStatus; import io.airbyte.config.helpers.LogClientSingleton; import io.airbyte.config.helpers.LogConfigs; import io.airbyte.scheduler.models.Job; +import io.airbyte.scheduler.persistence.JobNotifier; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.scheduler.persistence.job_factory.SyncJobFactory; +import io.airbyte.scheduler.persistence.job_tracker.JobTracker; +import io.airbyte.scheduler.persistence.job_tracker.JobTracker.JobState; import io.airbyte.workers.temporal.exception.RetryableException; import io.airbyte.workers.temporal.scheduling.activities.JobCreationAndStatusUpdateActivity.AttemptCreationInput; import io.airbyte.workers.temporal.scheduling.activities.JobCreationAndStatusUpdateActivity.AttemptCreationOutput; @@ -53,12 +60,22 @@ public class JobCreationAndStatusUpdateActivityTest { @Mock private LogConfigs mLogConfigs; + @Mock + private JobNotifier mJobNotifier; + + @Mock + private JobTracker mJobtracker; + @InjectMocks private JobCreationAndStatusUpdateActivityImpl jobCreationAndStatusUpdateActivity; private static final UUID CONNECTION_ID = UUID.randomUUID(); private static final long JOB_ID = 123L; private static final int ATTEMPT_ID = 321; + private static final StandardSyncOutput standardSyncOutput = new StandardSyncOutput() + .withStandardSyncSummary( + new StandardSyncSummary() + .withStatus(ReplicationStatus.COMPLETED)); @Nested class Creation { @@ -69,7 +86,7 @@ public void createJob() { Mockito.when(mJobFactory.create(CONNECTION_ID)) .thenReturn(JOB_ID); - final JobCreationOutput output = jobCreationAndStatusUpdateActivity.createNewJob(new JobCreationInput(CONNECTION_ID)); + final JobCreationOutput output = jobCreationAndStatusUpdateActivity.createNewJob(new JobCreationInput(CONNECTION_ID, false)); Assertions.assertThat(output.getJobId()).isEqualTo(JOB_ID); } @@ -129,9 +146,13 @@ class Update { @Test public void setJobSuccess() throws IOException { - jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_ID)); + jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_ID, standardSyncOutput)); + final JobOutput jobOutput = new JobOutput().withSync(standardSyncOutput); + Mockito.verify(mJobPersistence).writeOutput(JOB_ID, ATTEMPT_ID, jobOutput); Mockito.verify(mJobPersistence).succeedAttempt(JOB_ID, ATTEMPT_ID); + Mockito.verify(mJobNotifier).successJob(Mockito.any()); + Mockito.verify(mJobtracker).trackSync(Mockito.any(), Mockito.eq(JobState.SUCCEEDED)); } @Test @@ -139,16 +160,17 @@ public void setJobSuccessWrapException() throws IOException { Mockito.doThrow(new IOException()) .when(mJobPersistence).succeedAttempt(JOB_ID, ATTEMPT_ID); - Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_ID))) + Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_ID, null))) .isInstanceOf(RetryableException.class) .hasCauseInstanceOf(IOException.class); } @Test public void setJobFailure() throws IOException { - jobCreationAndStatusUpdateActivity.jobFailure(new JobFailureInput(JOB_ID)); + jobCreationAndStatusUpdateActivity.jobFailure(new JobFailureInput(JOB_ID, "reason")); Mockito.verify(mJobPersistence).failJob(JOB_ID); + Mockito.verify(mJobNotifier).failJob(Mockito.eq("reason"), Mockito.any()); } @Test @@ -156,7 +178,7 @@ public void setJobFailureWrapException() throws IOException { Mockito.doThrow(new IOException()) .when(mJobPersistence).failJob(JOB_ID); - Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.jobFailure(new JobFailureInput(JOB_ID))) + Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.jobFailure(new JobFailureInput(JOB_ID, ""))) .isInstanceOf(RetryableException.class) .hasCauseInstanceOf(IOException.class); } diff --git a/docker-compose.yaml b/docker-compose.yaml index 6b4e623882394..0eccd58cc660d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -71,6 +71,7 @@ services: - LOCAL_ROOT=${LOCAL_ROOT} - LOCAL_DOCKER_MOUNT=${LOCAL_DOCKER_MOUNT} - LOG_LEVEL=${LOG_LEVEL} + - NEW_SCHEDULER=${NEW_SCHEDULER} - SECRET_PERSISTENCE=${SECRET_PERSISTENCE} - SYNC_JOB_MAX_ATTEMPTS=${SYNC_JOB_MAX_ATTEMPTS} - SYNC_JOB_MAX_TIMEOUT_DAYS=${SYNC_JOB_MAX_TIMEOUT_DAYS} @@ -81,8 +82,6 @@ services: - WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT} - WORKSPACE_DOCKER_MOUNT=${WORKSPACE_DOCKER_MOUNT} - WORKSPACE_ROOT=${WORKSPACE_ROOT} - # TODO: Remove before merge - # - NEW_SCHEDULER=valuedoesntmatter volumes: - data:${CONFIG_ROOT} - workspace:${WORKSPACE_ROOT} @@ -93,6 +92,7 @@ services: container_name: airbyte-worker restart: unless-stopped environment: + - AIRBYTE_VERSION=${VERSION} - CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-} - CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-} - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} @@ -116,6 +116,7 @@ services: - SYNC_JOB_MAX_TIMEOUT_DAYS=${SYNC_JOB_MAX_TIMEOUT_DAYS} - TEMPORAL_HOST=${TEMPORAL_HOST} - TRACKING_STRATEGY=${TRACKING_STRATEGY} + - WEBAPP_URL=${WEBAPP_URL} - WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT} - WORKSPACE_DOCKER_MOUNT=${WORKSPACE_DOCKER_MOUNT} - WORKSPACE_ROOT=${WORKSPACE_ROOT} @@ -145,13 +146,13 @@ services: - JOB_MAIN_CONTAINER_MEMORY_REQUEST=${JOB_MAIN_CONTAINER_MEMORY_REQUEST} - JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=${JOBS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION:-} - LOG_LEVEL=${LOG_LEVEL} + - NEW_SCHEDULER=${NEW_SCHEDULER} - SECRET_PERSISTENCE=${SECRET_PERSISTENCE} - TEMPORAL_HOST=${TEMPORAL_HOST} - TRACKING_STRATEGY=${TRACKING_STRATEGY} - WEBAPP_URL=${WEBAPP_URL} - WORKER_ENVIRONMENT=${WORKER_ENVIRONMENT} - WORKSPACE_ROOT=${WORKSPACE_ROOT} - # - NEW_SCHEDULER=valuedoesntmatter ports: - 8001:8001 volumes: diff --git a/tools/bin/acceptance_test_with_new_scheduler.sh b/tools/bin/acceptance_test_with_new_scheduler.sh new file mode 100755 index 0000000000000..b9338db047f85 --- /dev/null +++ b/tools/bin/acceptance_test_with_new_scheduler.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -e + +. tools/lib/lib.sh + +assert_root + +echo "Starting app..." + +# Detach so we can run subsequent commands +VERSION=dev TRACKING_STRATEGY=logging NEW_SCHEDULER=true docker-compose up -d +trap "echo 'docker-compose logs:' && docker-compose logs -t --tail 1000 && docker-compose down -v" EXIT + +echo "Waiting for services to begin" +while [[ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/api/v1/health)" != "200" ]]; do echo "Waiting for docker deployment.."; sleep 5; done + +echo "Running e2e tests via gradle" +SUB_BUILD=PLATFORM USE_EXTERNAL_DEPLOYMENT=true ./gradlew :airbyte-tests:acceptanceTests --rerun-tasks --scan From 16133cf5e7c90808b0d5cc66e962c37a6dbfeb2e Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Wed, 19 Jan 2022 18:56:34 -0800 Subject: [PATCH 164/215] Rm flaky test (#9628) --- .github/workflows/gradle.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index 533668cdf38a8..c5c53c331a1b9 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -261,9 +261,6 @@ jobs: - name: Run End-to-End Acceptance Tests run: ./tools/bin/acceptance_test.sh - - name: Run End-to-End Acceptance Tests with the new scheduler - run: ./tools/bin/acceptance_test_with_new_scheduler.sh - - name: Automatic Migration Acceptance Test run: SUB_BUILD=PLATFORM ./gradlew :airbyte-tests:automaticMigrationAcceptanceTest --scan -i From 3aa071665e7d6d399d32b8e0a3817ba5a19c5053 Mon Sep 17 00:00:00 2001 From: Octavia Squidington III <90398440+octavia-squidington-iii@users.noreply.github.com> Date: Thu, 20 Jan 2022 12:09:15 +0800 Subject: [PATCH 165/215] Bump Airbyte version from 0.35.5-alpha to 0.35.6-alpha (#9630) Co-authored-by: benmoriceau --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-bootloader/Dockerfile | 4 ++-- airbyte-container-orchestrator/Dockerfile | 6 +++--- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 4 ++-- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 10 +++++----- charts/airbyte/values.yaml | 10 +++++----- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 12 ++++++------ kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 12 ++++++------ 17 files changed, 42 insertions(+), 42 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 1b8fb7e71e393..598d64116a47d 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.35.5-alpha +current_version = 0.35.6-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index 0f43c07e94ebf..03d63b19bfa95 100644 --- a/.env +++ b/.env @@ -10,7 +10,7 @@ ### SHARED ### -VERSION=0.35.5-alpha +VERSION=0.35.6-alpha # When using the airbyte-db via default docker image CONFIG_ROOT=/data diff --git a/airbyte-bootloader/Dockerfile b/airbyte-bootloader/Dockerfile index cc06fe86c6683..e481d8e8cef43 100644 --- a/airbyte-bootloader/Dockerfile +++ b/airbyte-bootloader/Dockerfile @@ -5,6 +5,6 @@ ENV APPLICATION airbyte-bootloader WORKDIR /app -ADD bin/${APPLICATION}-0.35.5-alpha.tar /app +ADD bin/${APPLICATION}-0.35.6-alpha.tar /app -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}"] diff --git a/airbyte-container-orchestrator/Dockerfile b/airbyte-container-orchestrator/Dockerfile index 1fe6b714a4fb6..b805172795d8d 100644 --- a/airbyte-container-orchestrator/Dockerfile +++ b/airbyte-container-orchestrator/Dockerfile @@ -19,12 +19,12 @@ RUN add-apt-repository \ RUN apt-get update && apt-get install -y docker-ce-cli jq ENV APPLICATION airbyte-container-orchestrator -ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}" +ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}" WORKDIR /app # Move orchestrator app -ADD bin/${APPLICATION}-0.35.5-alpha.tar /app +ADD bin/${APPLICATION}-0.35.6-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 92eb38ba59023..e452774f31aa7 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD bin/${APPLICATION}-0.35.5-alpha.tar /app +ADD bin/${APPLICATION}-0.35.6-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index a5b4e3ecb0127..0c65522957ebd 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD bin/${APPLICATION}-0.35.5-alpha.tar /app +ADD bin/${APPLICATION}-0.35.6-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 929778b2c117f..25d3848a54641 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,12 +1,12 @@ { "name": "airbyte-webapp", - "version": "0.35.5-alpha", + "version": "0.35.6-alpha", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "airbyte-webapp", - "version": "0.35.5-alpha", + "version": "0.35.6-alpha", "dependencies": { "@fortawesome/fontawesome-svg-core": "^1.2.36", "@fortawesome/free-brands-svg-icons": "^5.15.4", diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index f8e99c4f9f898..3a0ed25c36035 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.35.5-alpha", + "version": "0.35.6-alpha", "private": true, "engines": { "node": ">=16.0.0" diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index abc336fec7ebb..ce3aff6106938 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -30,7 +30,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD bin/${APPLICATION}-0.35.5-alpha.tar /app +ADD bin/${APPLICATION}-0.35.6-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.5-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index d031e09ad5854..8695d030dc10d 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.35.5-alpha" +appVersion: "0.35.6-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index ba04b7cb037d1..141fe87e20b59 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.5-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.6-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -55,7 +55,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.5-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.6-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -86,7 +86,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.5-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.6-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -120,7 +120,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.5-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.6-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -148,7 +148,7 @@ | ----------------------------- | -------------------------------------------------------------------- | -------------------- | | `bootloader.image.repository` | The repository to use for the airbyte bootloader image. | `airbyte/bootloader` | | `bootloader.image.pullPolicy` | the pull policy to use for the airbyte bootloader image | `IfNotPresent` | -| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.5-alpha` | +| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.6-alpha` | ### Temporal parameters diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 42221b185c4a3..6d113532487d1 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -43,7 +43,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.35.5-alpha + tag: 0.35.6-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -140,7 +140,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.35.5-alpha + tag: 0.35.6-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -245,7 +245,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.35.5-alpha + tag: 0.35.6-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -357,7 +357,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.35.5-alpha + tag: 0.35.6-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## @@ -446,7 +446,7 @@ bootloader: image: repository: airbyte/bootloader pullPolicy: IfNotPresent - tag: 0.35.5-alpha + tag: 0.35.6-alpha ## @section Temporal parameters ## TODO: Move to consuming temporal from a dedicated helm chart diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 5b640eb3c218e..3edc0edc0e70c 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -101,7 +101,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.35.5-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.35.6-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index b5cc97f7e2584..041ebcb9c50bf 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.5-alpha +AIRBYTE_VERSION=0.35.6-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index b466956a7624b..0293dcff62822 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: airbyte/bootloader - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: airbyte/scheduler - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: airbyte/server - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: airbyte/webapp - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: airbyte/worker - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index b5cc97f7e2584..041ebcb9c50bf 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.5-alpha +AIRBYTE_VERSION=0.35.6-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 32de4f039f260..4b4fe415b6ee4 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: airbyte/bootloader - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: airbyte/scheduler - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: airbyte/server - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: airbyte/webapp - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: airbyte/worker - newTag: 0.35.5-alpha + newTag: 0.35.6-alpha - name: temporalio/auto-setup newTag: 1.7.0 From b8cd77247c845785cf3300b9fe2d8656c2b5096e Mon Sep 17 00:00:00 2001 From: Augustin Date: Thu, 20 Jan 2022 11:18:58 +0100 Subject: [PATCH 166/215] Documentation: More details about using an external database (#9611) --- docs/operator-guides/configuring-airbyte-db.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/operator-guides/configuring-airbyte-db.md b/docs/operator-guides/configuring-airbyte-db.md index 6374abcd970d0..22593a228dc36 100644 --- a/docs/operator-guides/configuring-airbyte-db.md +++ b/docs/operator-guides/configuring-airbyte-db.md @@ -3,7 +3,9 @@ Airbyte uses different objects to store internal state and metadata. This data is stored and manipulated by the various Airbyte components, but you have the ability to manage the deployment of this database in the following two ways: * Using the default Postgres database that Airbyte spins-up as part of the Docker service described in the `docker-compose.yml` file: `airbyte/db`. -* Through a dedicated custom Postgres instance \(the `airbyte/db` is in this case unused, and can therefore be removed or de-activated from the `docker-compose.yml` file\). +* Through a dedicated custom Postgres instance \(the `airbyte/db` is in this case unused, and can therefore be removed or de-activated from the `docker-compose.yml` file\). It's not a good practice to deploy mission-critical databases on Docker or Kubernetes. +Using a dedicated instance will provide more reliability to your Airbyte deployment. +Moreover, using a Cloud-managed Postgres instance (such as AWS RDS our GCP Cloud SQL), you will benefit from automatic backup and fine-grained sizing. You can start with a pretty small instance, but according to your Airbyte usage, the job database might grow and require more storage if you are not truncating the job history. The various entities are persisted in two internal databases: From e142a4f65ae5aba5abd90047ffbbd1ba90bbcf83 Mon Sep 17 00:00:00 2001 From: ycherniaiev <94798230+ycherniaiev@users.noreply.github.com> Date: Thu, 20 Jan 2022 15:12:37 +0200 Subject: [PATCH 167/215] Update fields in source-connectors specifications: lever-hiring (#9214) Signed-off-by: Sergey Chvalyuk Co-authored-by: Sergey Chvalyuk --- .../3981c999-bd7d-4afc-849b-e53dea90c948.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 143 +++++++++++++----- .../connectors/source-lever-hiring/Dockerfile | 2 +- .../integration_tests/spec.json | 8 +- .../source_lever_hiring/spec.json | 8 +- docs/integrations/sources/lever-hiring.md | 1 + 7 files changed, 119 insertions(+), 47 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/3981c999-bd7d-4afc-849b-e53dea90c948.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/3981c999-bd7d-4afc-849b-e53dea90c948.json index e7972a73124a8..eb95b7628d0c4 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/3981c999-bd7d-4afc-849b-e53dea90c948.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/3981c999-bd7d-4afc-849b-e53dea90c948.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "3981c999-bd7d-4afc-849b-e53dea90c948", "name": "Lever Hiring", "dockerRepository": "airbyte/source-lever-hiring", - "dockerImageTag": "0.1.0", + "dockerImageTag": "0.1.2", "documentationUrl": "https://docs.airbyte.io/integrations/sources/lever-hiring", "icon": "leverhiring.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 95fa5edb2d484..724e3cf8f2f19 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -362,7 +362,7 @@ - name: Lever Hiring sourceDefinitionId: 3981c999-bd7d-4afc-849b-e53dea90c948 dockerRepository: airbyte/source-lever-hiring - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.2 documentationUrl: https://docs.airbyte.io/integrations/sources/lever-hiring icon: leverhiring.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index dee1594ebbed3..07ec4b040181c 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3574,63 +3574,134 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-lever-hiring:0.1.0" +- dockerImage: "airbyte/source-lever-hiring:0.1.2" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/lever-hiring" changelogUrl: "https://docs.airbyte.io/integrations/sources/lever-hiring#changelog" connectionSpecification: - title: "Lever Hiring Spec" + $schema: "http://json-schema.org/draft-07/schema#" + title: "Lever Hiring Source Spec" type: "object" + required: + - "start_date" + additionalProperties: true properties: - client_id: - title: "Client Id" - description: "The client application id as provided when registering the\ - \ application with Lever." - type: "string" - client_secret: - title: "Client Secret" - description: "The application secret as provided when registering the application\ - \ with Lever." - airbyte_secret: true - type: "string" - refresh_token: - title: "Refresh Token" - description: "The refresh token your application will need to submit to\ - \ get a new access token after it's expired." + credentials: + order: 3 + title: "Authentication Mechanism" + description: "Choose how to authenticate to Lever Hiring." + type: "object" + oneOf: + - type: "object" + title: "Authenticate via Lever (OAuth)" + required: + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + enum: + - "Client" + default: "Client" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your Lever Hiring developer application." + client_secret: + title: "Client Secret" + type: "string" + description: "The Client Secret of your Lever Hiring developer application." + airbyte_secret: true + option_title: + type: "string" + title: "Credentials Title" + description: "OAuth Credentials" + const: "OAuth Credentials" + refresh_token: + type: "string" + title: "Refresh Token" + description: "The token for obtaining new access token." + airbyte_secret: true + start_date: + order: 0 type: "string" + title: "Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated. Note that it will be used\ + \ only in the following incremental streams: comments, commits, and issues." + examples: + - "2021-03-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" environment: + order: 1 + type: "string" title: "Environment" - description: "Sandbox or Production environment." - default: "Production" + description: "The environment in which you'd like to replicate data for\ + \ Lever. This is used to determine which Lever API endpoint to use." + default: "Sandbox" enum: - - "Sandbox" - "Production" - type: "string" - start_date: - title: "Start Date" - description: "UTC date and time in the format 2019-02-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2021-04-25T00:00:00Z" - type: "string" - required: - - "client_id" - - "client_secret" - - "refresh_token" - - "start_date" + - "Sandbox" supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] authSpecification: auth_type: "oauth2.0" oauth2Specification: - rootObject: [] + rootObject: + - "credentials" + - "0" oauthFlowInitParameters: - - "client_id" - - "client_secret" + oauthFlowOutputParameters: - - "refresh_token" - oauthFlowOutputParameters: [] + advanced_auth: + auth_flow_type: "oauth2.0" + predicate_key: + - "credentials" + - "auth_type" + predicate_value: "Client" + oauth_config_specification: + oauth_user_input_from_connector_config_specification: + type: "object" + properties: + environment: + type: "string" + path_in_connector_config: + - "environment" + complete_oauth_output_specification: + type: "object" + additionalProperties: false + properties: + refresh_token: + type: "string" + path_in_connector_config: + - "credentials" + - "refresh_token" + complete_oauth_server_input_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + client_secret: + type: "string" + complete_oauth_server_output_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + path_in_connector_config: + - "credentials" + - "client_id" + client_secret: + type: "string" + path_in_connector_config: + - "credentials" + - "client_secret" - dockerImage: "airbyte/source-linkedin-ads:0.1.5" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/linkedin-ads" diff --git a/airbyte-integrations/connectors/source-lever-hiring/Dockerfile b/airbyte-integrations/connectors/source-lever-hiring/Dockerfile index 979d6b21308ef..a5b4e9402e03f 100644 --- a/airbyte-integrations/connectors/source-lever-hiring/Dockerfile +++ b/airbyte-integrations/connectors/source-lever-hiring/Dockerfile @@ -34,5 +34,5 @@ COPY source_lever_hiring ./source_lever_hiring ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/source-lever-hiring diff --git a/airbyte-integrations/connectors/source-lever-hiring/integration_tests/spec.json b/airbyte-integrations/connectors/source-lever-hiring/integration_tests/spec.json index 9aca720bc46b6..4df7073f2dc7b 100644 --- a/airbyte-integrations/connectors/source-lever-hiring/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-lever-hiring/integration_tests/spec.json @@ -29,12 +29,12 @@ "client_id": { "title": "Client ID", "type": "string", - "description": "The Client ID of your application" + "description": "The Client ID of your Lever Hiring developer application." }, "client_secret": { "title": "Client Secret", "type": "string", - "description": "The client secret of your application", + "description": "The Client Secret of your Lever Hiring developer application.", "airbyte_secret": true }, "option_title": { @@ -46,7 +46,7 @@ "refresh_token": { "type": "string", "title": "Refresh Token", - "description": "OAuth access token", + "description": "The token for obtaining new access token.", "airbyte_secret": true } } @@ -57,7 +57,7 @@ "order": 0, "type": "string", "title": "Start Date", - "description": "The date from which you'd like to replicate data for Lever in the format YYYY-MM-DDT00:00:00Z. All data generated after this date will be replicated. Note that it will be used only in the following incremental streams: comments, commits and issues.", + "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated. Note that it will be used only in the following incremental streams: comments, commits, and issues.", "examples": ["2021-03-01T00:00:00Z"], "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" }, diff --git a/airbyte-integrations/connectors/source-lever-hiring/source_lever_hiring/spec.json b/airbyte-integrations/connectors/source-lever-hiring/source_lever_hiring/spec.json index 9aca720bc46b6..4df7073f2dc7b 100644 --- a/airbyte-integrations/connectors/source-lever-hiring/source_lever_hiring/spec.json +++ b/airbyte-integrations/connectors/source-lever-hiring/source_lever_hiring/spec.json @@ -29,12 +29,12 @@ "client_id": { "title": "Client ID", "type": "string", - "description": "The Client ID of your application" + "description": "The Client ID of your Lever Hiring developer application." }, "client_secret": { "title": "Client Secret", "type": "string", - "description": "The client secret of your application", + "description": "The Client Secret of your Lever Hiring developer application.", "airbyte_secret": true }, "option_title": { @@ -46,7 +46,7 @@ "refresh_token": { "type": "string", "title": "Refresh Token", - "description": "OAuth access token", + "description": "The token for obtaining new access token.", "airbyte_secret": true } } @@ -57,7 +57,7 @@ "order": 0, "type": "string", "title": "Start Date", - "description": "The date from which you'd like to replicate data for Lever in the format YYYY-MM-DDT00:00:00Z. All data generated after this date will be replicated. Note that it will be used only in the following incremental streams: comments, commits and issues.", + "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated. Note that it will be used only in the following incremental streams: comments, commits, and issues.", "examples": ["2021-03-01T00:00:00Z"], "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" }, diff --git a/docs/integrations/sources/lever-hiring.md b/docs/integrations/sources/lever-hiring.md index ef8900d761e19..5c881dcec3ace 100644 --- a/docs/integrations/sources/lever-hiring.md +++ b/docs/integrations/sources/lever-hiring.md @@ -43,6 +43,7 @@ The Lever Hiring connector should not run into Lever Hiring API limitations unde | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.2 | 2021-12-30 | [9214](https://github.com/airbytehq/airbyte/pull/9214) | Update title and descriptions | | 0.1.1 | 2021-12-16 | [7677](https://github.com/airbytehq/airbyte/pull/7677) | OAuth Automated Authentication | | 0.1.0 | 2021-09-22 | [6141](https://github.com/airbytehq/airbyte/pull/6141) | Add Lever Hiring Source Connector | From 9afbbff454978f64849b7dc910f013c660418eb2 Mon Sep 17 00:00:00 2001 From: Juozas V Date: Thu, 20 Jan 2022 16:03:12 +0200 Subject: [PATCH 168/215] =?UTF-8?q?=F0=9F=8E=89=20New=20Source:=20Flexport?= =?UTF-8?q?=20(#8777)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../a4444d7b-c9ee-4d99-8d50-78e71abe7174.json | 8 + .../src/main/resources/icons/flexport.svg | 6 + .../resources/seed/source_definitions.yaml | 7 + airbyte-integrations/builds.md | 1 + .../connectors/source-flexport/.dockerignore | 6 + .../connectors/source-flexport/Dockerfile | 38 + .../connectors/source-flexport/README.md | 132 ++ .../acceptance-test-config.yml | 42 + .../source-flexport/acceptance-test-docker.sh | 16 + .../connectors/source-flexport/bootstrap.md | 14 + .../connectors/source-flexport/build.gradle | 9 + .../integration_tests/__init__.py | 3 + .../integration_tests/abnormal_state.json | 5 + .../integration_tests/acceptance.py | 14 + .../integration_tests/catalog.json | 36 + .../configured_catalog_companies.json | 197 +++ .../configured_catalog_invoices.json | 504 +++++++ .../configured_catalog_locations.json | 141 ++ .../configured_catalog_products.json | 162 +++ .../configured_catalog_shipments.json | 1287 +++++++++++++++++ .../integration_tests/invalid_config.json | 4 + .../integration_tests/sample_config.json | 4 + .../integration_tests/sample_state.json | 5 + .../connectors/source-flexport/main.py | 13 + .../source-flexport/requirements.txt | 2 + .../connectors/source-flexport/setup.py | 30 + .../source_flexport/__init__.py | 8 + .../source_flexport/schemas/companies.json | 3 + .../source_flexport/schemas/invoices.json | 3 + .../source_flexport/schemas/locations.json | 3 + .../source_flexport/schemas/products.json | 3 + .../schemas/shared/address.json | 52 + .../schemas/shared/air/shipment.json | 28 + .../schemas/shared/api/refs/collection.json | 20 + .../schemas/shared/api/refs/object.json | 24 + .../schemas/shared/company_entity.json | 42 + .../shared/company_entity/vat_number.json | 20 + .../schemas/shared/credit_memo.json | 29 + .../schemas/shared/hs_code.json | 24 + .../schemas/shared/invoice.json | 90 ++ .../schemas/shared/invoice/quantity.json | 20 + .../schemas/shared/invoice/rate.json | 20 + .../schemas/shared/invoice_item.json | 50 + .../source_flexport/schemas/shared/money.json | 20 + .../schemas/shared/network/company.json | 47 + .../schemas/shared/network/location.json | 44 + .../schemas/shared/ocean/shipment.json | 36 + .../schemas/shared/product.json | 73 + .../shared/product/classification.json | 23 + .../schemas/shared/product/property.json | 20 + .../schemas/shared/product/supplier.json | 21 + .../schemas/shared/quantity/volume.json | 21 + .../schemas/shared/quantity/weight.json | 21 + .../schemas/shared/shipment.json | 289 ++++ .../shared/shipment/dangerous_goods.json | 18 + .../schemas/shared/shipment_item.json | 36 + .../source_flexport/schemas/shipments.json | 3 + .../source-flexport/source_flexport/source.py | 41 + .../source-flexport/source_flexport/spec.json | 24 + .../source_flexport/streams.py | 174 +++ .../source-flexport/unit_tests/__init__.py | 3 + .../unit_tests/test_incremental_streams.py | 124 ++ .../source-flexport/unit_tests/test_source.py | 41 + .../unit_tests/test_streams.py | 87 ++ docs/SUMMARY.md | 1 + docs/integrations/README.md | 1 + docs/integrations/sources/flexport.md | 49 + 67 files changed, 4342 insertions(+) create mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/a4444d7b-c9ee-4d99-8d50-78e71abe7174.json create mode 100644 airbyte-config/init/src/main/resources/icons/flexport.svg create mode 100644 airbyte-integrations/connectors/source-flexport/.dockerignore create mode 100644 airbyte-integrations/connectors/source-flexport/Dockerfile create mode 100644 airbyte-integrations/connectors/source-flexport/README.md create mode 100644 airbyte-integrations/connectors/source-flexport/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-flexport/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-flexport/bootstrap.md create mode 100644 airbyte-integrations/connectors/source-flexport/build.gradle create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/abnormal_state.json create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/catalog.json create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_companies.json create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_invoices.json create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_locations.json create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_products.json create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_shipments.json create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-flexport/integration_tests/sample_state.json create mode 100644 airbyte-integrations/connectors/source-flexport/main.py create mode 100644 airbyte-integrations/connectors/source-flexport/requirements.txt create mode 100644 airbyte-integrations/connectors/source-flexport/setup.py create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/__init__.py create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/companies.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/invoices.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/locations.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/products.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/address.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/air/shipment.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/api/refs/collection.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/api/refs/object.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/company_entity.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/company_entity/vat_number.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/credit_memo.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/hs_code.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice/quantity.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice/rate.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice_item.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/money.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/network/company.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/network/location.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/ocean/shipment.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/classification.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/property.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/supplier.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/quantity/volume.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/quantity/weight.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment/dangerous_goods.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment_item.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shipments.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/source.py create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/spec.json create mode 100644 airbyte-integrations/connectors/source-flexport/source_flexport/streams.py create mode 100644 airbyte-integrations/connectors/source-flexport/unit_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-flexport/unit_tests/test_incremental_streams.py create mode 100644 airbyte-integrations/connectors/source-flexport/unit_tests/test_source.py create mode 100644 airbyte-integrations/connectors/source-flexport/unit_tests/test_streams.py create mode 100644 docs/integrations/sources/flexport.md diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/a4444d7b-c9ee-4d99-8d50-78e71abe7174.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/a4444d7b-c9ee-4d99-8d50-78e71abe7174.json new file mode 100644 index 0000000000000..7e548380395f1 --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/a4444d7b-c9ee-4d99-8d50-78e71abe7174.json @@ -0,0 +1,8 @@ +{ + "sourceDefinitionId": "a4444d7b-c9ee-4d99-8d50-78e71abe7174", + "name": "Flexport", + "dockerRepository": "airbyte/source-flexport", + "dockerImageTag": "0.1.0", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/flexport", + "icon": "flexport.svg" +} diff --git a/airbyte-config/init/src/main/resources/icons/flexport.svg b/airbyte-config/init/src/main/resources/icons/flexport.svg new file mode 100644 index 0000000000000..248bc9e143342 --- /dev/null +++ b/airbyte-config/init/src/main/resources/icons/flexport.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 724e3cf8f2f19..2b10e51ed7c23 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -200,6 +200,13 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/file icon: file.svg sourceType: file +- name: Flexport + sourceDefinitionId: a4444d7b-c9ee-4d99-8d50-78e71abe7174 + dockerRepository: airbyte/source-flexport + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/flexport + icon: flexport.svg + sourceType: api - name: Freshdesk sourceDefinitionId: ec4b9503-13cb-48ab-a4ab-6ade4be46567 dockerRepository: airbyte/source-freshdesk diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index c1adc5af18e50..93437d64b079e 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -28,6 +28,7 @@ | Exchange Rates API | [![source-exchange-rates](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-exchange-rates%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-exchange-rates) | | Facebook Marketing | [![source-facebook-marketing](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-facebook-marketing%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-facebook-marketing) | | Files | [![source-file](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-file%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-file) | +| Flexport | [![source-file](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-flexport%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-flexport) | | Freshdesk | [![source-freshdesk](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-freshdesk%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-freshdesk) | | Freshsales | [![source-freshsales](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-freshsales%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-freshsales) | | Freshservice | [![source-service](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-freshservice%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-freshservice) | diff --git a/airbyte-integrations/connectors/source-flexport/.dockerignore b/airbyte-integrations/connectors/source-flexport/.dockerignore new file mode 100644 index 0000000000000..0cae3269b942f --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!main.py +!source_flexport +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-flexport/Dockerfile b/airbyte-integrations/connectors/source-flexport/Dockerfile new file mode 100644 index 0000000000000..f111db41ddb4a --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_flexport ./source_flexport + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-flexport diff --git a/airbyte-integrations/connectors/source-flexport/README.md b/airbyte-integrations/connectors/source-flexport/README.md new file mode 100644 index 0000000000000..4ffa1faead1d9 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/README.md @@ -0,0 +1,132 @@ +# Flexport Source + +This is the repository for the Flexport source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/flexport). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-flexport:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/flexport) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_flexport/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source flexport test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-flexport:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-flexport:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-flexport:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-flexport:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-flexport:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-flexport:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-flexport:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-flexport:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-flexport/acceptance-test-config.yml b/airbyte-integrations/connectors/source-flexport/acceptance-test-config.yml new file mode 100644 index 0000000000000..c4cb1c4d10eba --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/acceptance-test-config.yml @@ -0,0 +1,42 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-flexport:dev +tests: + spec: + - spec_path: "source_flexport/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog_companies.json" + empty_streams: [] + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog_locations.json" + empty_streams: [] + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog_products.json" + empty_streams: [] + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog_shipments.json" + empty_streams: [] + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog_invoices.json" + empty_streams: [] + incremental: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog_shipments.json" + future_state_path: "integration_tests/abnormal_state.json" + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog_companies.json" + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog_locations.json" + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog_products.json" + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog_invoices.json" diff --git a/airbyte-integrations/connectors/source-flexport/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-flexport/acceptance-test-docker.sh new file mode 100644 index 0000000000000..c51577d10690c --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-flexport/bootstrap.md b/airbyte-integrations/connectors/source-flexport/bootstrap.md new file mode 100644 index 0000000000000..8c4fbffc79e8a --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/bootstrap.md @@ -0,0 +1,14 @@ +# Flexport + +Flexport is a straightforward CRUD REST [API](https://developers.flexport.com/s/api). It consists of some REST resources like Company, Location, Product, etc., each of which is uniformly paginated. Each resource has a link to a related resource or resource collection. All relations might be optionally embedded within the resource instance. The `id` property identifies each resource. + +API documentation is either outdated or incomplete. The issues are following: + +1) Some resources that get embedded by default are not documented at all. However, since the schema of all resources follows the same pattern, their schema can be easily deduced too. +2) The documentation doesn't specify which properties are nullable - trial and error is the only way to learn that. +3) Some properties' type is ambiguous, i.e., `create` action specifies a property as required while `read` returns a nullable value. +4) The type of some properties is mislabeled, e.g., `integer` instead of an actual `string` type. + +Authentication uses a pre-created API token which can be [created in the UI](https://apidocs.flexport.com/reference/authentication). + +Paginations uses page number and items per page strategy. diff --git a/airbyte-integrations/connectors/source-flexport/build.gradle b/airbyte-integrations/connectors/source-flexport/build.gradle new file mode 100644 index 0000000000000..c3d63f1d0f908 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_flexport' +} diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/__init__.py b/airbyte-integrations/connectors/source-flexport/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-flexport/integration_tests/abnormal_state.json new file mode 100644 index 0000000000000..2ecb4887fc3df --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "shipments": { + "updated_at": "2050-12-01" + } +} diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-flexport/integration_tests/acceptance.py new file mode 100644 index 0000000000000..0347f2a0b143d --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/catalog.json b/airbyte-integrations/connectors/source-flexport/integration_tests/catalog.json new file mode 100644 index 0000000000000..ba12986b30c71 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/catalog.json @@ -0,0 +1,36 @@ +{ + "streams": [ + { + "name": "companies", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + { + "name": "locations", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + { + "name": "products", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + { + "name": "invoices", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + { + "name": "shipments", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updated_at"], + "source_defined_primary_key": [["id"]] + } + ] +} diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_companies.json b/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_companies.json new file mode 100644 index 0000000000000..f41b9382d95fc --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_companies.json @@ -0,0 +1,197 @@ +{ + "streams": [ + { + "stream": { + "name": "companies", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /network/company for this object.", + "pattern": "^/network/company$" + }, + "id": { + "type": "string", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the company." + }, + "ref": { + "type": "string", + "description": "Your ref for this company entity, as set in the Network tab." + }, + "editable": { + "type": "boolean", + "description": "Whether or not you manage this company and can edit its attributes." + }, + "entities": { + "type": "array", + "description": "Array of company entities that belong to this company.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity for this object.", + "pattern": "^/company_entity$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the company entity." + }, + "mailing_address": { + "description": "Address of the company entity.", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /address for this object.", + "pattern": "^/address$" + }, + "street_address": { + "type": "string", + "description": "Address line 1 (Street address/PO Box)." + }, + "street_address2": { + "type": ["null", "string"], + "description": "Address line 2 (Apartment/Suite/Unit/Building)." + }, + "city": { + "type": "string", + "description": "City/Town." + }, + "state": { + "type": ["null", "string"], + "description": "State/County/Province/Region." + }, + "country": { + "type": ["null", "string"], + "description": "Country." + }, + "country_code": { + "type": ["null", "string"], + "description": "Two-letter country code (ISO 3166-1 Alpha-2)." + }, + "zip": { + "type": ["null", "string"], + "description": "ZIP or postal code." + }, + "unlocode": { + "type": ["null", "string"], + "description": "If port, then UN/LOCODE (United Nations Code for Trade and Transport Locations)." + }, + "timezone": { + "type": "string", + "description": "Timezone for this address" + }, + "ref": { + "type": ["null", "string"], + "description": "Your reference for the address, as set in your network tab" + } + } + } + ] + }, + "ref": { + "type": "string", + "description": "Your reference for this company entity, as set in the Network tab." + }, + "vat_numbers": { + "type": "array", + "description": "Array of VAT numbers of the company entity.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity/vat_number for this object.", + "pattern": "^/company_entity/vat_number$" + }, + "country_code": { + "type": "string", + "description": "ISO 3166 two-letter country code of the VAT number's country" + }, + "number": { + "type": "string", + "description": "Full VAT number, including the country code" + } + } + } + } + } + } + }, + "locations": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/collection for this object.", + "pattern": "^/api/refs/collection$" + }, + "link": { + "type": "string", + "description": "API end point that points to a list of resources" + }, + "ref_type": { + "type": "string", + "description": "The _object value of each individual element of the list that link points to." + } + } + }, + "contacts": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/collection for this object.", + "pattern": "^/api/refs/collection$" + }, + "link": { + "type": "string", + "description": "API end point that points to a list of resources" + }, + "ref_type": { + "type": "string", + "description": "The _object value of each individual element of the list that link points to." + } + } + }, + "metadata": { + "type": "object", + "description": "User defined metadata attached to the company." + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_invoices.json b/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_invoices.json new file mode 100644 index 0000000000000..1b1fc0277a14f --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_invoices.json @@ -0,0 +1,504 @@ +{ + "streams": [ + { + "stream": { + "name": "invoices", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "Type of the object. Always /invoice for this object.", + "pattern": "^/invoice$" + }, + "id": { + "type": "string", + "description": "Unique ID for the invoice" + }, + "name": { + "type": "string", + "description": "Unique name for the invoice" + }, + "issued_at": { + "type": "string", + "description": "When the invoice was issued. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + }, + "due_date": { + "type": "string", + "description": "Due date of the invoice. Calendar date only, no time.", + "format": "date" + }, + "total": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /money for this object.", + "pattern": "^/money$" + }, + "amount": { + "type": "string", + "description": "A string representing the decimal amount" + }, + "currency_code": { + "type": "string", + "description": "The currency of the amount" + } + } + }, + "balance": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /money for this object.", + "pattern": "^/money$" + }, + "amount": { + "type": "string", + "description": "A string representing the decimal amount" + }, + "currency_code": { + "type": "string", + "description": "The currency of the amount" + } + } + }, + "status": { + "type": "string", + "description": "Status of the invoice. One of:", + "enum": [ + "outstanding", + "past_due", + "void", + "paid", + "payment_pending" + ] + }, + "type": { + "type": ["null", "string"], + "description": "What the invoice is being issued for. Shipment related charges are type Shipment and non-shipment related charges are type Client" + }, + "voided_at": { + "type": ["null", "string"], + "description": "When the invoice was voided (if applicable). In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + }, + "last_updated_at": { + "type": ["null", "string"], + "description": "Time of the last adjustment to in invoice. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + }, + "recipient": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity for this object.", + "pattern": "^/company_entity$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the company entity." + }, + "mailing_address": { + "description": "Address of the company entity.", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /address for this object.", + "pattern": "^/address$" + }, + "street_address": { + "type": "string", + "description": "Address line 1 (Street address/PO Box)." + }, + "street_address2": { + "type": ["null", "string"], + "description": "Address line 2 (Apartment/Suite/Unit/Building)." + }, + "city": { + "type": "string", + "description": "City/Town." + }, + "state": { + "type": ["null", "string"], + "description": "State/County/Province/Region." + }, + "country": { + "type": ["null", "string"], + "description": "Country." + }, + "country_code": { + "type": ["null", "string"], + "description": "Two-letter country code (ISO 3166-1 Alpha-2)." + }, + "zip": { + "type": ["null", "string"], + "description": "ZIP or postal code." + }, + "unlocode": { + "type": ["null", "string"], + "description": "If port, then UN/LOCODE (United Nations Code for Trade and Transport Locations)." + }, + "timezone": { + "type": "string", + "description": "Timezone for this address" + }, + "ref": { + "type": ["null", "string"], + "description": "Your reference for the address, as set in your network tab" + } + } + } + ] + }, + "ref": { + "type": "string", + "description": "Your reference for this company entity, as set in the Network tab." + }, + "vat_numbers": { + "type": "array", + "description": "Array of VAT numbers of the company entity.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity/vat_number for this object.", + "pattern": "^/company_entity/vat_number$" + }, + "country_code": { + "type": "string", + "description": "ISO 3166 two-letter country code of the VAT number's country" + }, + "number": { + "type": "string", + "description": "Full VAT number, including the country code" + } + } + } + } + } + }, + "issuer": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity for this object.", + "pattern": "^/company_entity$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the company entity." + }, + "mailing_address": { + "description": "Address of the company entity.", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /address for this object.", + "pattern": "^/address$" + }, + "street_address": { + "type": "string", + "description": "Address line 1 (Street address/PO Box)." + }, + "street_address2": { + "type": ["null", "string"], + "description": "Address line 2 (Apartment/Suite/Unit/Building)." + }, + "city": { + "type": "string", + "description": "City/Town." + }, + "state": { + "type": ["null", "string"], + "description": "State/County/Province/Region." + }, + "country": { + "type": ["null", "string"], + "description": "Country." + }, + "country_code": { + "type": ["null", "string"], + "description": "Two-letter country code (ISO 3166-1 Alpha-2)." + }, + "zip": { + "type": ["null", "string"], + "description": "ZIP or postal code." + }, + "unlocode": { + "type": ["null", "string"], + "description": "If port, then UN/LOCODE (United Nations Code for Trade and Transport Locations)." + }, + "timezone": { + "type": "string", + "description": "Timezone for this address" + }, + "ref": { + "type": ["null", "string"], + "description": "Your reference for the address, as set in your network tab" + } + } + } + ] + }, + "ref": { + "type": "string", + "description": "Your reference for this company entity, as set in the Network tab." + }, + "vat_numbers": { + "type": "array", + "description": "Array of VAT numbers of the company entity.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity/vat_number for this object.", + "pattern": "^/company_entity/vat_number$" + }, + "country_code": { + "type": "string", + "description": "ISO 3166 two-letter country code of the VAT number's country" + }, + "number": { + "type": "string", + "description": "Full VAT number, including the country code" + } + } + } + } + } + }, + "items": { + "type": "array", + "description": "Inlined list of InvoiceItems", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /invoice_item for this object.", + "pattern": "^/invoice_item$" + }, + "name": { + "type": "string", + "description": "Line item name. Example: FCL 40' HQ" + }, + "slug": { + "type": "string", + "description": "Line item code. Example: fcl_40_hq" + }, + "category": { + "type": "string", + "description": "Category of the charge. Example freight" + }, + "amount": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /money for this object.", + "pattern": "^/money$" + }, + "amount": { + "type": "string", + "description": "A string representing the decimal amount" + }, + "currency_code": { + "type": "string", + "description": "The currency of the amount" + } + } + }, + "rate": { + "description": "Describes the rate per unit used to calculate the price of a line item", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /invoice/rate for this object.", + "pattern": "^/invoice/rate$" + }, + "value": { + "type": "string", + "description": "Numeric value described by qualifier" + }, + "qualifier": { + "type": "string", + "description": "Describes what the value represents ('USD', '% Drayage Base', etc)" + } + } + } + ] + }, + "quantity": { + "description": "Describes the number of units used to calculate the price of a line item", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /invoice/quantity for this object.", + "pattern": "^/invoice/quantity$" + }, + "value": { + "type": "string", + "description": "Numeric value described by qualifier" + }, + "qualifier": { + "type": "string", + "description": "Describes what the value represents ('kg', 'bill of lading', 'USD')" + } + } + } + ] + } + } + } + }, + "credit_memos": { + "type": "array", + "description": "Inlined list of CreditMemos", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /credit_memo for this object.", + "pattern": "^/credit_memo$" + }, + "amount": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /money for this object.", + "pattern": "^/money$" + }, + "amount": { + "type": "string", + "description": "A string representing the decimal amount" + }, + "currency_code": { + "type": "string", + "description": "The currency of the amount" + } + } + }, + "category": { + "type": "string", + "description": "Category of the credit memo" + }, + "reason": { + "type": "string", + "description": "Why the credit was applied to the invoice" + }, + "credited_at": { + "type": "string", + "description": "When the credit was applied. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + } + } + } + }, + "notes": { + "type": "array", + "description": "Memoranda relevant to the invoice", + "items": { + "type": "string" + } + }, + "shipments": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/collection for this object.", + "pattern": "^/api/refs/collection$" + }, + "link": { + "type": "string", + "description": "API end point that points to a list of resources" + }, + "ref_type": { + "type": "string", + "description": "The _object value of each individual element of the list that link points to." + } + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_locations.json b/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_locations.json new file mode 100644 index 0000000000000..b56c516c84c27 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_locations.json @@ -0,0 +1,141 @@ +{ + "streams": [ + { + "stream": { + "name": "locations", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /network/location for this object.", + "pattern": "^/network/location$" + }, + "id": { + "type": "string", + "description": "Unique identifier for the location." + }, + "name": { + "type": ["null", "string"], + "description": "Name of the location." + }, + "address": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /address for this object.", + "pattern": "^/address$" + }, + "street_address": { + "type": "string", + "description": "Address line 1 (Street address/PO Box)." + }, + "street_address2": { + "type": ["null", "string"], + "description": "Address line 2 (Apartment/Suite/Unit/Building)." + }, + "city": { + "type": "string", + "description": "City/Town." + }, + "state": { + "type": ["null", "string"], + "description": "State/County/Province/Region." + }, + "country": { + "type": ["null", "string"], + "description": "Country." + }, + "country_code": { + "type": ["null", "string"], + "description": "Two-letter country code (ISO 3166-1 Alpha-2)." + }, + "zip": { + "type": ["null", "string"], + "description": "ZIP or postal code." + }, + "unlocode": { + "type": ["null", "string"], + "description": "If port, then UN/LOCODE (United Nations Code for Trade and Transport Locations)." + }, + "timezone": { + "type": "string", + "description": "Timezone for this address" + }, + "ref": { + "type": ["null", "string"], + "description": "Your reference for the address, as set in your network tab" + } + } + }, + "editable": { + "type": "boolean", + "description": "Whether or not you manage this location and can edit its attributes." + }, + "contacts": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/collection for this object.", + "pattern": "^/api/refs/collection$" + }, + "link": { + "type": "string", + "description": "API end point that points to a list of resources" + }, + "ref_type": { + "type": "string", + "description": "The _object value of each individual element of the list that link points to." + } + } + }, + "company": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/object for this object.", + "pattern": "^/api/refs/object$" + }, + "link": { + "type": "string", + "description": "API end point that points to a resource." + }, + "ref_type": { + "type": "string", + "description": "The _object value of the object that the link points to." + }, + "id": { + "type": ["integer", "string"], + "description": "The id value of the object that the link points to." + } + } + }, + "ref": { + "type": "string", + "description": "Your custom ref to this location." + }, + "metadata": { + "type": "object", + "description": "User defined metadata attached to the location." + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_products.json b/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_products.json new file mode 100644 index 0000000000000..bd224ac2b7f96 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_products.json @@ -0,0 +1,162 @@ +{ + "streams": [ + { + "stream": { + "name": "products", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product for this object.", + "pattern": "^/product$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of product." + }, + "description": { + "type": ["null", "string"], + "description": "Description of product." + }, + "sku": { + "type": ["null", "string"], + "description": "SKU of product." + }, + "product_category": { + "type": ["null", "string"], + "description": "The category of the product" + }, + "country_of_origin": { + "type": ["null", "string"], + "description": "Nation in which the product is manufactured" + }, + "client_verified": { + "type": "boolean", + "description": "Indicates whether Client has verified this Product." + }, + "product_properties": { + "type": "array", + "description": "An array of product properties, represented by ProductProperty", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product/property for this object.", + "pattern": "^/product/property$" + }, + "type": { + "type": "string", + "description": "The property describing the product" + }, + "value": { + "type": "string", + "description": "The value associated with the product" + } + } + } + }, + "hs_codes": { + "type": "array", + "description": "DEPRECATED - HS codes can be found in the classifications array", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /hs_code for this object.", + "pattern": "^/hs_code$" + }, + "description": { + "type": "string", + "description": "Description of HS or HTS code classification." + }, + "code": { + "type": "string", + "description": "6 digit (international HS) or 10 digit (US HTS) code." + }, + "country_code": { + "type": "string", + "description": "The country code that the HS code belongs to." + } + } + } + }, + "classifications": { + "type": "array", + "description": "An array of product classifications, represented by Classification", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product/classification for this object.", + "pattern": "^/product/classification$" + }, + "region": { + "type": "string", + "description": "The two letter code for the region of the classification" + }, + "codes": { + "type": "array", + "description": "Array of 6 digit (international HS) or 10 digit (US HTS) code.", + "items": { + "type": "string" + } + } + } + } + }, + "suppliers": { + "type": "array", + "description": "Array of suppliers.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product/supplier for this object.", + "pattern": "^/product/supplier$" + }, + "id": { + "type": "integer" + }, + "ref": { + "type": "string" + }, + "country_of_origin": { + "type": ["null", "string"] + } + } + } + }, + "archived_at": { + "type": ["null", "string"], + "description": "A date string representing when the product was archived. Will be null if product is not archived. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_shipments.json b/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_shipments.json new file mode 100644 index 0000000000000..ef376ad9d1201 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/configured_catalog_shipments.json @@ -0,0 +1,1287 @@ +{ + "streams": [ + { + "stream": { + "name": "shipments", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /shipment for this object.", + "pattern": "^/shipment$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the shipment." + }, + "booking": { + "description": "The booking associated with this shipment", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/object for this object.", + "pattern": "^/api/refs/object$" + }, + "link": { + "type": "string", + "description": "API end point that points to a resource." + }, + "ref_type": { + "type": "string", + "description": "The _object value of the object that the link points to." + }, + "id": { + "type": ["integer", "string"], + "description": "The id value of the object that the link points to." + } + } + } + ] + }, + "transportation_mode": { + "type": "string", + "description": "Transportation mode of the main carriage of the shipment. This can be either Ocean or Air." + }, + "freight_type": { + "type": "string", + "description": "The type of freight service provided. One of:", + "enum": [ + "port_to_door", + "port_to_port", + "door_to_door", + "door_to_port" + ] + }, + "updated_at": { + "type": "string", + "description": "Date when the shipment object was last updated. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + }, + "archived_at": { + "type": ["null", "string"], + "description": "Date when the shipment was archived, if applicable. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + }, + "incoterm": { + "type": "string", + "description": "The Incoterm of your shipment. This can be EXW, FCA, FAS, FOB, CPT, CFR, CIF, CIP, DAT, DAP, DDP, or DPU." + }, + "calculated_weight": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /quantity/weight for this object.", + "pattern": "^/quantity/weight" + }, + "value": { + "type": "number", + "description": "Specifies the quantity of units as a float." + }, + "unit": { + "type": "string", + "description": "Specifies the unit of measure for this quantity.", + "enum": ["kg", "lbs"] + } + } + }, + "calculated_volume": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /quantity/volume for this object.", + "pattern": "^/quantity/volume" + }, + "value": { + "type": "number", + "description": "Specifies the quantity of units as a float." + }, + "unit": { + "type": "string", + "description": "Specifies the unit of measure for this quantity.", + "enum": ["cbm", "cbft"] + } + } + }, + "estimated_departure_date": { + "type": ["null", "string"], + "description": "Estimated departure date from the first port of the main voyage. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "actual_departure_date": { + "type": ["null", "string"], + "description": "Actual departure date from the first port of the main voyage. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "target_delivery_date": { + "type": ["null", "string"], + "description": "Target date for when the shipment will be fully delivered. This date is set when the shipment''s booking is confirmed. This value may be different from estimated_delivered_in_full_date, which is updated when there is new information about the progress of a shipment. Date only.", + "format": "date" + }, + "estimated_arrival_date": { + "type": ["null", "string"], + "description": "Estimated arrival date to the last port of the main voyage. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "actual_arrival_date": { + "type": ["null", "string"], + "description": "Actual arrival date to the last port of the main voyage. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "status": { + "type": "string", + "description": "Status of the shipment. One of:", + "enum": [ + "seller_location", + "in_transit_to_departure_port", + "departure_port", + "in_transit_to_arrival_port", + "arrival_port", + "in_transit_to_final_destination", + "final_destination" + ] + }, + "priority": { + "type": "string" + }, + "pieces": { + "type": ["null", "integer"], + "description": "Total number of pieces in the shipment." + }, + "it_number": { + "type": ["null", "string"], + "description": "Inbond Transit number used for US Customs" + }, + "created_date": { + "type": "string", + "description": "Date the user has confirmed Flexport quote and cargo is getting ready to ship.", + "format": "date-time" + }, + "estimated_picked_up_in_full_date": { + "type": ["null", "string"], + "description": "Estimated pickup date from the origin location. For ocean shipments with multiple containers, this is the date of last picked up container. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "actual_picked_up_in_full_date": { + "type": ["null", "string"], + "description": "Actual pickup date from the origin location. For ocean shipments with multiple containers, this is the date of last picked up container. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "estimated_delivered_in_full_date": { + "type": ["null", "string"], + "description": "Estimated delivery date to the destination location. For ocean shipments with multiple containers, this is the date of last delivered container. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "actual_delivered_in_full_date": { + "type": ["null", "string"], + "description": "Actual delivery date to the destination location. For ocean shipments with multiple containers, this is the date of last delivered container. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "cargo_ready_date": { + "type": "string", + "description": "Date when cargo is ready for pickup at the origin location. Calendar date only, no time.", + "format": "date" + }, + "wants_freight_management_bco": { + "type": "boolean", + "description": "Determines if a shipment bypasses a freight forwarder and is booked directly with a steamship line." + }, + "wants_flexport_freight": { + "type": "boolean", + "description": "Determines if a shipment is using Flexport services for a combination of import customs clearance or final leg delivery." + }, + "wants_export_customs_service": { + "type": "boolean", + "description": "Determines if the shipment requires Flexport to provide export customs service." + }, + "wants_import_customs_service": { + "type": "boolean", + "description": "Determines if the shipment requires Flexport to provide import customs service." + }, + "visibility_only": { + "type": "boolean", + "description": "Indicates if a shipment was requested for tracking via the Flexport visibility-only service." + }, + "wants_delivery_service": { + "type": "boolean", + "description": "Determines if Flexport is responsible for door delivery on a shipment." + }, + "wants_flexport_insurance": { + "type": "boolean" + }, + "wants_pickup_service": { + "type": "boolean" + }, + "wants_commercial_invoice_transcription": { + "type": "boolean" + }, + "wants_trade_declaration_service": { + "type": ["null", "boolean"] + }, + "air_shipment": { + "description": "Air-specific Shipment information. Available only if this is an air shipment. null otherwise.", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object`s type. Always /air/shipment for this object.", + "pattern": "^/air/shipment$" + }, + "house_airway_bill": { + "type": "string", + "description": "House air waybill number" + }, + "master_airway_bill": { + "type": "string", + "description": "Master air waybill number" + }, + "chargeable_weight": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /quantity/weight for this object.", + "pattern": "^/quantity/weight" + }, + "value": { + "type": "number", + "description": "Specifies the quantity of units as a float." + }, + "unit": { + "type": "string", + "description": "Specifies the unit of measure for this quantity.", + "enum": ["kg", "lbs"] + } + } + }, + "chargeable_volume": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /quantity/volume for this object.", + "pattern": "^/quantity/volume" + }, + "value": { + "type": "number", + "description": "Specifies the quantity of units as a float." + }, + "unit": { + "type": "string", + "description": "Specifies the unit of measure for this quantity.", + "enum": ["cbm", "cbft"] + } + } + } + } + } + ] + }, + "ocean_shipment": { + "description": "Ocean-specific Shipment information. Available only if this is an ocean shipment. null otherwise.", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /ocean/shipment for this object.", + "pattern": "^/ocean/shipment$" + }, + "is_lcl": { + "type": "boolean", + "description": "Flag that indicates whether the object is a LCL shipment." + }, + "house_bill_number": { + "type": "string", + "description": "House bill of lading number." + }, + "master_bill_number": { + "type": ["null", "string"], + "description": "Master bill of lading number." + }, + "carrier_booking_number": { + "type": ["null", "string"], + "description": "Ocean carrier booking reference number." + }, + "confirmed_space_released_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "containers": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/collection for this object.", + "pattern": "^/api/refs/collection$" + }, + "link": { + "type": "string", + "description": "API end point that points to a list of resources" + }, + "ref_type": { + "type": "string", + "description": "The _object value of each individual element of the list that link points to." + } + } + } + } + } + ] + }, + "dangerous_goods": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /shipment/dangerous_goods for this object.", + "pattern": "^/shipment/dangerous_goods" + }, + "review_status": { + "type": ["null", "string"] + }, + "classifications": { + "type": "array" + } + } + }, + "shippers": { + "type": "array", + "description": "Array of shippers of the shipment, represented by the CompanyEntity objects.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity for this object.", + "pattern": "^/company_entity$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the company entity." + }, + "mailing_address": { + "description": "Address of the company entity.", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /address for this object.", + "pattern": "^/address$" + }, + "street_address": { + "type": "string", + "description": "Address line 1 (Street address/PO Box)." + }, + "street_address2": { + "type": ["null", "string"], + "description": "Address line 2 (Apartment/Suite/Unit/Building)." + }, + "city": { + "type": "string", + "description": "City/Town." + }, + "state": { + "type": ["null", "string"], + "description": "State/County/Province/Region." + }, + "country": { + "type": ["null", "string"], + "description": "Country." + }, + "country_code": { + "type": ["null", "string"], + "description": "Two-letter country code (ISO 3166-1 Alpha-2)." + }, + "zip": { + "type": ["null", "string"], + "description": "ZIP or postal code." + }, + "unlocode": { + "type": ["null", "string"], + "description": "If port, then UN/LOCODE (United Nations Code for Trade and Transport Locations)." + }, + "timezone": { + "type": "string", + "description": "Timezone for this address" + }, + "ref": { + "type": ["null", "string"], + "description": "Your reference for the address, as set in your network tab" + } + } + } + ] + }, + "ref": { + "type": "string", + "description": "Your reference for this company entity, as set in the Network tab." + }, + "vat_numbers": { + "type": "array", + "description": "Array of VAT numbers of the company entity.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity/vat_number for this object.", + "pattern": "^/company_entity/vat_number$" + }, + "country_code": { + "type": "string", + "description": "ISO 3166 two-letter country code of the VAT number's country" + }, + "number": { + "type": "string", + "description": "Full VAT number, including the country code" + } + } + } + } + } + } + }, + "consignees": { + "type": "array", + "description": "Array of consignees of the shipment, represented by CompanyEntity objects.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity for this object.", + "pattern": "^/company_entity$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the company entity." + }, + "mailing_address": { + "description": "Address of the company entity.", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /address for this object.", + "pattern": "^/address$" + }, + "street_address": { + "type": "string", + "description": "Address line 1 (Street address/PO Box)." + }, + "street_address2": { + "type": ["null", "string"], + "description": "Address line 2 (Apartment/Suite/Unit/Building)." + }, + "city": { + "type": "string", + "description": "City/Town." + }, + "state": { + "type": ["null", "string"], + "description": "State/County/Province/Region." + }, + "country": { + "type": ["null", "string"], + "description": "Country." + }, + "country_code": { + "type": ["null", "string"], + "description": "Two-letter country code (ISO 3166-1 Alpha-2)." + }, + "zip": { + "type": ["null", "string"], + "description": "ZIP or postal code." + }, + "unlocode": { + "type": ["null", "string"], + "description": "If port, then UN/LOCODE (United Nations Code for Trade and Transport Locations)." + }, + "timezone": { + "type": "string", + "description": "Timezone for this address" + }, + "ref": { + "type": ["null", "string"], + "description": "Your reference for the address, as set in your network tab" + } + } + } + ] + }, + "ref": { + "type": "string", + "description": "Your reference for this company entity, as set in the Network tab." + }, + "vat_numbers": { + "type": "array", + "description": "Array of VAT numbers of the company entity.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity/vat_number for this object.", + "pattern": "^/company_entity/vat_number$" + }, + "country_code": { + "type": "string", + "description": "ISO 3166 two-letter country code of the VAT number's country" + }, + "number": { + "type": "string", + "description": "Full VAT number, including the country code" + } + } + } + } + } + } + }, + "buyers": { + "type": "array", + "description": "Array of buyers of the shipment, represented by CompanyEntity objects.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity for this object.", + "pattern": "^/company_entity$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the company entity." + }, + "mailing_address": { + "description": "Address of the company entity.", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /address for this object.", + "pattern": "^/address$" + }, + "street_address": { + "type": "string", + "description": "Address line 1 (Street address/PO Box)." + }, + "street_address2": { + "type": ["null", "string"], + "description": "Address line 2 (Apartment/Suite/Unit/Building)." + }, + "city": { + "type": "string", + "description": "City/Town." + }, + "state": { + "type": ["null", "string"], + "description": "State/County/Province/Region." + }, + "country": { + "type": ["null", "string"], + "description": "Country." + }, + "country_code": { + "type": ["null", "string"], + "description": "Two-letter country code (ISO 3166-1 Alpha-2)." + }, + "zip": { + "type": ["null", "string"], + "description": "ZIP or postal code." + }, + "unlocode": { + "type": ["null", "string"], + "description": "If port, then UN/LOCODE (United Nations Code for Trade and Transport Locations)." + }, + "timezone": { + "type": "string", + "description": "Timezone for this address" + }, + "ref": { + "type": ["null", "string"], + "description": "Your reference for the address, as set in your network tab" + } + } + } + ] + }, + "ref": { + "type": "string", + "description": "Your reference for this company entity, as set in the Network tab." + }, + "vat_numbers": { + "type": "array", + "description": "Array of VAT numbers of the company entity.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity/vat_number for this object.", + "pattern": "^/company_entity/vat_number$" + }, + "country_code": { + "type": "string", + "description": "ISO 3166 two-letter country code of the VAT number's country" + }, + "number": { + "type": "string", + "description": "Full VAT number, including the country code" + } + } + } + } + } + } + }, + "sellers": { + "type": "array", + "description": "Array of sellers of the shipment, represented by CompanyEntity objects.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity for this object.", + "pattern": "^/company_entity$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the company entity." + }, + "mailing_address": { + "description": "Address of the company entity.", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /address for this object.", + "pattern": "^/address$" + }, + "street_address": { + "type": "string", + "description": "Address line 1 (Street address/PO Box)." + }, + "street_address2": { + "type": ["null", "string"], + "description": "Address line 2 (Apartment/Suite/Unit/Building)." + }, + "city": { + "type": "string", + "description": "City/Town." + }, + "state": { + "type": ["null", "string"], + "description": "State/County/Province/Region." + }, + "country": { + "type": ["null", "string"], + "description": "Country." + }, + "country_code": { + "type": ["null", "string"], + "description": "Two-letter country code (ISO 3166-1 Alpha-2)." + }, + "zip": { + "type": ["null", "string"], + "description": "ZIP or postal code." + }, + "unlocode": { + "type": ["null", "string"], + "description": "If port, then UN/LOCODE (United Nations Code for Trade and Transport Locations)." + }, + "timezone": { + "type": "string", + "description": "Timezone for this address" + }, + "ref": { + "type": ["null", "string"], + "description": "Your reference for the address, as set in your network tab" + } + } + } + ] + }, + "ref": { + "type": "string", + "description": "Your reference for this company entity, as set in the Network tab." + }, + "vat_numbers": { + "type": "array", + "description": "Array of VAT numbers of the company entity.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity/vat_number for this object.", + "pattern": "^/company_entity/vat_number$" + }, + "country_code": { + "type": "string", + "description": "ISO 3166 two-letter country code of the VAT number's country" + }, + "number": { + "type": "string", + "description": "Full VAT number, including the country code" + } + } + } + } + } + } + }, + "importers_of_record": { + "type": "array", + "description": "Array of importers of record of the shipment, represented by CompanyEntity objects.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity for this object.", + "pattern": "^/company_entity$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the company entity." + }, + "mailing_address": { + "description": "Address of the company entity.", + "oneOf": [ + { + "type": "null" + }, + { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /address for this object.", + "pattern": "^/address$" + }, + "street_address": { + "type": "string", + "description": "Address line 1 (Street address/PO Box)." + }, + "street_address2": { + "type": ["null", "string"], + "description": "Address line 2 (Apartment/Suite/Unit/Building)." + }, + "city": { + "type": "string", + "description": "City/Town." + }, + "state": { + "type": ["null", "string"], + "description": "State/County/Province/Region." + }, + "country": { + "type": ["null", "string"], + "description": "Country." + }, + "country_code": { + "type": ["null", "string"], + "description": "Two-letter country code (ISO 3166-1 Alpha-2)." + }, + "zip": { + "type": ["null", "string"], + "description": "ZIP or postal code." + }, + "unlocode": { + "type": ["null", "string"], + "description": "If port, then UN/LOCODE (United Nations Code for Trade and Transport Locations)." + }, + "timezone": { + "type": "string", + "description": "Timezone for this address" + }, + "ref": { + "type": ["null", "string"], + "description": "Your reference for the address, as set in your network tab" + } + } + } + ] + }, + "ref": { + "type": "string", + "description": "Your reference for this company entity, as set in the Network tab." + }, + "vat_numbers": { + "type": "array", + "description": "Array of VAT numbers of the company entity.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity/vat_number for this object.", + "pattern": "^/company_entity/vat_number$" + }, + "country_code": { + "type": "string", + "description": "ISO 3166 two-letter country code of the VAT number's country" + }, + "number": { + "type": "string", + "description": "Full VAT number, including the country code" + } + } + } + } + } + } + }, + "items": { + "type": "array", + "description": "Array of products and SKUs in the shipment, represented by ShipmentItem objects.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /shipment_item for this object.", + "pattern": "^/shipment_item$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "total_units": { + "type": "string", + "description": "Total quantity of specific SKU within shipment." + }, + "total_weight": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /quantity/weight for this object.", + "pattern": "^/quantity/weight" + }, + "value": { + "type": "number", + "description": "Specifies the quantity of units as a float." + }, + "unit": { + "type": "string", + "description": "Specifies the unit of measure for this quantity.", + "enum": ["kg", "lbs"] + } + } + }, + "total_volume": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /quantity/volume for this object.", + "pattern": "^/quantity/volume" + }, + "value": { + "type": "number", + "description": "Specifies the quantity of units as a float." + }, + "unit": { + "type": "string", + "description": "Specifies the unit of measure for this quantity.", + "enum": ["cbm", "cbft"] + } + } + }, + "purchase_order_number": { + "type": "string", + "description": "Purchase order number associated with SKU within shipment." + }, + "product": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product for this object.", + "pattern": "^/product$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of product." + }, + "description": { + "type": ["null", "string"], + "description": "Description of product." + }, + "sku": { + "type": ["null", "string"], + "description": "SKU of product." + }, + "product_category": { + "type": ["null", "string"], + "description": "The category of the product" + }, + "country_of_origin": { + "type": ["null", "string"], + "description": "Nation in which the product is manufactured" + }, + "client_verified": { + "type": "boolean", + "description": "Indicates whether Client has verified this Product." + }, + "product_properties": { + "type": "array", + "description": "An array of product properties, represented by ProductProperty", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product/property for this object.", + "pattern": "^/product/property$" + }, + "type": { + "type": "string", + "description": "The property describing the product" + }, + "value": { + "type": "string", + "description": "The value associated with the product" + } + } + } + }, + "hs_codes": { + "type": "array", + "description": "DEPRECATED - HS codes can be found in the classifications array", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /hs_code for this object.", + "pattern": "^/hs_code$" + }, + "description": { + "type": "string", + "description": "Description of HS or HTS code classification." + }, + "code": { + "type": "string", + "description": "6 digit (international HS) or 10 digit (US HTS) code." + }, + "country_code": { + "type": "string", + "description": "The country code that the HS code belongs to." + } + } + } + }, + "classifications": { + "type": "array", + "description": "An array of product classifications, represented by Classification", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product/classification for this object.", + "pattern": "^/product/classification$" + }, + "region": { + "type": "string", + "description": "The two letter code for the region of the classification" + }, + "codes": { + "type": "array", + "description": "Array of 6 digit (international HS) or 10 digit (US HTS) code.", + "items": { + "type": "string" + } + } + } + } + }, + "suppliers": { + "type": "array", + "description": "Array of suppliers.", + "items": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product/supplier for this object.", + "pattern": "^/product/supplier$" + }, + "id": { + "type": "integer" + }, + "ref": { + "type": "string" + }, + "country_of_origin": { + "type": ["null", "string"] + } + } + } + }, + "archived_at": { + "type": ["null", "string"], + "description": "A date string representing when the product was archived. Will be null if product is not archived. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + } + } + } + } + } + }, + "legs": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/collection for this object.", + "pattern": "^/api/refs/collection$" + }, + "link": { + "type": "string", + "description": "API end point that points to a list of resources" + }, + "ref_type": { + "type": "string", + "description": "The _object value of each individual element of the list that link points to." + } + } + }, + "customs_entries": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/collection for this object.", + "pattern": "^/api/refs/collection$" + }, + "link": { + "type": "string", + "description": "API end point that points to a list of resources" + }, + "ref_type": { + "type": "string", + "description": "The _object value of each individual element of the list that link points to." + } + } + }, + "commercial_invoices": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/collection for this object.", + "pattern": "^/api/refs/collection$" + }, + "link": { + "type": "string", + "description": "API end point that points to a list of resources" + }, + "ref_type": { + "type": "string", + "description": "The _object value of each individual element of the list that link points to." + } + } + }, + "documents": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/collection for this object.", + "pattern": "^/api/refs/collection$" + }, + "link": { + "type": "string", + "description": "API end point that points to a list of resources" + }, + "ref_type": { + "type": "string", + "description": "The _object value of each individual element of the list that link points to." + } + } + }, + "metadata": { + "type": "object", + "description": "User defined metadata attached to the shipment." + }, + "departure_date": { + "$comment": "deprecated", + "type": ["null", "string"], + "description": "Departure date from the first port of the main voyage.", + "format": "date-time" + }, + "arrival_date": { + "$comment": "deprecated", + "type": ["null", "string"], + "description": "Arrival date to the last port of the main voyage.", + "format": "date-time" + }, + "picked_up_in_full_date": { + "$comment": "deprecated", + "type": ["null", "string"], + "description": "Pickup date from the origin location. For ocean shipments with multiple containers, this is the date of last picked up container.", + "format": "date-time" + }, + "delivered_in_full_date": { + "$comment": "deprecated", + "type": ["null", "string"], + "description": "Delivery date to the destination location. For ocean shipments with multiple containers, this is the date of last delivered container.", + "format": "date-time" + } + } + }, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updated_at"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append_dedup" + } + ] +} diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-flexport/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..aaf245da99a15 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/invalid_config.json @@ -0,0 +1,4 @@ +{ + "api_key": "the_key", + "start_date": "2050-01-01" +} diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-flexport/integration_tests/sample_config.json new file mode 100644 index 0000000000000..df129091af40b --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/sample_config.json @@ -0,0 +1,4 @@ +{ + "api_key": "the_key", + "start_date": "2021-12-01" +} diff --git a/airbyte-integrations/connectors/source-flexport/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-flexport/integration_tests/sample_state.json new file mode 100644 index 0000000000000..2c8cd024d20f5 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/integration_tests/sample_state.json @@ -0,0 +1,5 @@ +{ + "shipments": { + "updated_at": "2021-12-01" + } +} diff --git a/airbyte-integrations/connectors/source-flexport/main.py b/airbyte-integrations/connectors/source-flexport/main.py new file mode 100644 index 0000000000000..0147afb4bb466 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_flexport import SourceFlexport + +if __name__ == "__main__": + source = SourceFlexport() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-flexport/requirements.txt b/airbyte-integrations/connectors/source-flexport/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-flexport/setup.py b/airbyte-integrations/connectors/source-flexport/setup.py new file mode 100644 index 0000000000000..0c0b777b00118 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/setup.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.1", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "requests-mock~=1.9.3", + "source-acceptance-test", +] + +setup( + name="source_flexport", + description="Source implementation for Flexport.", + author="Labanoras Tech", + author_email="jv@labanoras.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/__init__.py b/airbyte-integrations/connectors/source-flexport/source_flexport/__init__.py new file mode 100644 index 0000000000000..656ed2e1debfd --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceFlexport + +__all__ = ["SourceFlexport"] diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/companies.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/companies.json new file mode 100644 index 0000000000000..1804dac46ebd7 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/companies.json @@ -0,0 +1,3 @@ +{ + "$ref": "network/company.json" +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/invoices.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/invoices.json new file mode 100644 index 0000000000000..43c76ebf2d3ae --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/invoices.json @@ -0,0 +1,3 @@ +{ + "$ref": "invoice.json" +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/locations.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/locations.json new file mode 100644 index 0000000000000..2601c886e8381 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/locations.json @@ -0,0 +1,3 @@ +{ + "$ref": "network/location.json" +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/products.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/products.json new file mode 100644 index 0000000000000..8f7a7adb6586e --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/products.json @@ -0,0 +1,3 @@ +{ + "$ref": "product.json" +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/address.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/address.json new file mode 100644 index 0000000000000..2f76374bb28fb --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/address.json @@ -0,0 +1,52 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /address for this object.", + "pattern": "^/address$" + }, + "street_address": { + "type": "string", + "description": "Address line 1 (Street address/PO Box)." + }, + "street_address2": { + "type": ["null", "string"], + "description": "Address line 2 (Apartment/Suite/Unit/Building)." + }, + "city": { + "type": "string", + "description": "City/Town." + }, + "state": { + "type": ["null", "string"], + "description": "State/County/Province/Region." + }, + "country": { + "type": ["null", "string"], + "description": "Country." + }, + "country_code": { + "type": ["null", "string"], + "description": "Two-letter country code (ISO 3166-1 Alpha-2)." + }, + "zip": { + "type": ["null", "string"], + "description": "ZIP or postal code." + }, + "unlocode": { + "type": ["null", "string"], + "description": "If port, then UN/LOCODE (United Nations Code for Trade and Transport Locations)." + }, + "timezone": { + "type": "string", + "description": "Timezone for this address" + }, + "ref": { + "type": ["null", "string"], + "description": "Your reference for the address, as set in your network tab" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/air/shipment.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/air/shipment.json new file mode 100644 index 0000000000000..747e102fe3904 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/air/shipment.json @@ -0,0 +1,28 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object`s type. Always /air/shipment for this object.", + "pattern": "^/air/shipment$" + }, + "house_airway_bill": { + "type": "string", + "description": "House air waybill number" + }, + "master_airway_bill": { + "type": "string", + "description": "Master air waybill number" + }, + "chargeable_weight": { + "description": "Chargeable weight (kg or lbs) of the shipment.", + "$ref": "../quantity/weight.json" + }, + "chargeable_volume": { + "description": "Chargeable volume (cbm or cft) of the shipment.", + "$ref": "../quantity/volume.json" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/api/refs/collection.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/api/refs/collection.json new file mode 100644 index 0000000000000..98296a6bfb9a8 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/api/refs/collection.json @@ -0,0 +1,20 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/collection for this object.", + "pattern": "^/api/refs/collection$" + }, + "link": { + "type": "string", + "description": "API end point that points to a list of resources" + }, + "ref_type": { + "type": "string", + "description": "The _object value of each individual element of the list that link points to." + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/api/refs/object.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/api/refs/object.json new file mode 100644 index 0000000000000..3a2509deb2e2a --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/api/refs/object.json @@ -0,0 +1,24 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /api/refs/object for this object.", + "pattern": "^/api/refs/object$" + }, + "link": { + "type": "string", + "description": "API end point that points to a resource." + }, + "ref_type": { + "type": "string", + "description": "The _object value of the object that the link points to." + }, + "id": { + "type": ["integer", "string"], + "description": "The id value of the object that the link points to." + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/company_entity.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/company_entity.json new file mode 100644 index 0000000000000..2e31c55dbb147 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/company_entity.json @@ -0,0 +1,42 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity for this object.", + "pattern": "^/company_entity$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the company entity." + }, + "mailing_address": { + "description": "Address of the company entity.", + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "address.json" + } + ] + }, + "ref": { + "type": "string", + "description": "Your reference for this company entity, as set in the Network tab." + }, + "vat_numbers": { + "type": "array", + "description": "Array of VAT numbers of the company entity.", + "items": { + "$ref": "company_entity/vat_number.json" + } + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/company_entity/vat_number.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/company_entity/vat_number.json new file mode 100644 index 0000000000000..804fb5733cd3a --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/company_entity/vat_number.json @@ -0,0 +1,20 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /company_entity/vat_number for this object.", + "pattern": "^/company_entity/vat_number$" + }, + "country_code": { + "type": "string", + "description": "ISO 3166 two-letter country code of the VAT number's country" + }, + "number": { + "type": "string", + "description": "Full VAT number, including the country code" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/credit_memo.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/credit_memo.json new file mode 100644 index 0000000000000..bd8cbd94dea6a --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/credit_memo.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /credit_memo for this object.", + "pattern": "^/credit_memo$" + }, + "amount": { + "description": "Amount of the credit", + "$ref": "money.json" + }, + "category": { + "type": "string", + "description": "Category of the credit memo" + }, + "reason": { + "type": "string", + "description": "Why the credit was applied to the invoice" + }, + "credited_at": { + "type": "string", + "description": "When the credit was applied. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/hs_code.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/hs_code.json new file mode 100644 index 0000000000000..73349a339048f --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/hs_code.json @@ -0,0 +1,24 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /hs_code for this object.", + "pattern": "^/hs_code$" + }, + "description": { + "type": "string", + "description": "Description of HS or HTS code classification." + }, + "code": { + "type": "string", + "description": "6 digit (international HS) or 10 digit (US HTS) code." + }, + "country_code": { + "type": "string", + "description": "The country code that the HS code belongs to." + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice.json new file mode 100644 index 0000000000000..84748ac13ed78 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice.json @@ -0,0 +1,90 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "Type of the object. Always /invoice for this object.", + "pattern": "^/invoice$" + }, + "id": { + "type": "string", + "description": "Unique ID for the invoice" + }, + "name": { + "type": "string", + "description": "Unique name for the invoice" + }, + "issued_at": { + "type": "string", + "description": "When the invoice was issued. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + }, + "due_date": { + "type": "string", + "description": "Due date of the invoice. Calendar date only, no time.", + "format": "date" + }, + "total": { + "description": "Sum of charges on the invoice", + "$ref": "money.json" + }, + "balance": { + "description": "Outstanding balance on the invoice", + "$ref": "money.json" + }, + "status": { + "type": "string", + "description": "Status of the invoice. One of:", + "enum": ["outstanding", "past_due", "void", "paid", "payment_pending"] + }, + "type": { + "type": ["null", "string"], + "description": "What the invoice is being issued for. Shipment related charges are type Shipment and non-shipment related charges are type Client" + }, + "voided_at": { + "type": ["null", "string"], + "description": "When the invoice was voided (if applicable). In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + }, + "last_updated_at": { + "type": ["null", "string"], + "description": "Time of the last adjustment to in invoice. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + }, + "recipient": { + "description": "Inlined CompanyEntity info for recipient", + "$ref": "company_entity.json" + }, + "issuer": { + "description": "CompanyEntity receiving payment for this invoice", + "$ref": "company_entity.json" + }, + "items": { + "type": "array", + "description": "Inlined list of InvoiceItems", + "items": { + "$ref": "invoice_item.json" + } + }, + "credit_memos": { + "type": "array", + "description": "Inlined list of CreditMemos", + "items": { + "$ref": "credit_memo.json" + } + }, + "notes": { + "type": "array", + "description": "Memoranda relevant to the invoice", + "items": { + "type": "string" + } + }, + "shipments": { + "description": "Expandable link to information on related shipments", + "$ref": "api/refs/collection.json" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice/quantity.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice/quantity.json new file mode 100644 index 0000000000000..456cee19c2db2 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice/quantity.json @@ -0,0 +1,20 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /invoice/quantity for this object.", + "pattern": "^/invoice/quantity$" + }, + "value": { + "type": "string", + "description": "Numeric value described by qualifier" + }, + "qualifier": { + "type": "string", + "description": "Describes what the value represents ('kg', 'bill of lading', 'USD')" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice/rate.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice/rate.json new file mode 100644 index 0000000000000..85104bce88be2 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice/rate.json @@ -0,0 +1,20 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /invoice/rate for this object.", + "pattern": "^/invoice/rate$" + }, + "value": { + "type": "string", + "description": "Numeric value described by qualifier" + }, + "qualifier": { + "type": "string", + "description": "Describes what the value represents ('USD', '% Drayage Base', etc)" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice_item.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice_item.json new file mode 100644 index 0000000000000..386942f47e318 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/invoice_item.json @@ -0,0 +1,50 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /invoice_item for this object.", + "pattern": "^/invoice_item$" + }, + "name": { + "type": "string", + "description": "Line item name. Example: FCL 40' HQ" + }, + "slug": { + "type": "string", + "description": "Line item code. Example: fcl_40_hq" + }, + "category": { + "type": "string", + "description": "Category of the charge. Example freight" + }, + "amount": { + "description": "Price of the item", + "$ref": "money.json" + }, + "rate": { + "description": "Describes the rate per unit used to calculate the price of a line item", + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "invoice/rate.json" + } + ] + }, + "quantity": { + "description": "Describes the number of units used to calculate the price of a line item", + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "invoice/quantity.json" + } + ] + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/money.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/money.json new file mode 100644 index 0000000000000..993bdad67c75b --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/money.json @@ -0,0 +1,20 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /money for this object.", + "pattern": "^/money$" + }, + "amount": { + "type": "string", + "description": "A string representing the decimal amount" + }, + "currency_code": { + "type": "string", + "description": "The currency of the amount" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/network/company.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/network/company.json new file mode 100644 index 0000000000000..a68833d746492 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/network/company.json @@ -0,0 +1,47 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /network/company for this object.", + "pattern": "^/network/company$" + }, + "id": { + "type": "string", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the company." + }, + "ref": { + "type": "string", + "description": "Your ref for this company entity, as set in the Network tab." + }, + "editable": { + "type": "boolean", + "description": "Whether or not you manage this company and can edit its attributes." + }, + "entities": { + "type": "array", + "description": "Array of company entities that belong to this company.", + "items": { + "$ref": "../company_entity.json" + } + }, + "locations": { + "description": "Array of company entities that belong to this company.", + "$ref": "../api/refs/collection.json" + }, + "contacts": { + "description": "Expandable link to the contacts of this company.", + "$ref": "../api/refs/collection.json" + }, + "metadata": { + "type": "object", + "description": "User defined metadata attached to the company." + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/network/location.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/network/location.json new file mode 100644 index 0000000000000..bfa15aa9c96c5 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/network/location.json @@ -0,0 +1,44 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /network/location for this object.", + "pattern": "^/network/location$" + }, + "id": { + "type": "string", + "description": "Unique identifier for the location." + }, + "name": { + "type": ["null", "string"], + "description": "Name of the location." + }, + "address": { + "description": "Address of the location.", + "$ref": "../address.json" + }, + "editable": { + "type": "boolean", + "description": "Whether or not you manage this location and can edit its attributes." + }, + "contacts": { + "description": "Expandable API link to the contacts at this location.", + "$ref": "../api/refs/collection.json" + }, + "company": { + "description": "Expandable API link to the company that this location belongs to.", + "$ref": "../api/refs/object.json" + }, + "ref": { + "type": "string", + "description": "Your custom ref to this location." + }, + "metadata": { + "type": "object", + "description": "User defined metadata attached to the location." + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/ocean/shipment.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/ocean/shipment.json new file mode 100644 index 0000000000000..b96dc6d6d8b19 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/ocean/shipment.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /ocean/shipment for this object.", + "pattern": "^/ocean/shipment$" + }, + "is_lcl": { + "type": "boolean", + "description": "Flag that indicates whether the object is a LCL shipment." + }, + "house_bill_number": { + "type": "string", + "description": "House bill of lading number." + }, + "master_bill_number": { + "type": ["null", "string"], + "description": "Master bill of lading number." + }, + "carrier_booking_number": { + "type": ["null", "string"], + "description": "Ocean carrier booking reference number." + }, + "confirmed_space_released_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "containers": { + "description": "Expandable API link to the containers of the shipment, represented by the Container object.", + "$ref": "../api/refs/collection.json" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product.json new file mode 100644 index 0000000000000..9e42d19beb2e6 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product.json @@ -0,0 +1,73 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product for this object.", + "pattern": "^/product$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of product." + }, + "description": { + "type": ["null", "string"], + "description": "Description of product." + }, + "sku": { + "type": ["null", "string"], + "description": "SKU of product." + }, + "product_category": { + "type": ["null", "string"], + "description": "The category of the product" + }, + "country_of_origin": { + "type": ["null", "string"], + "description": "Nation in which the product is manufactured" + }, + "client_verified": { + "type": "boolean", + "description": "Indicates whether Client has verified this Product." + }, + "product_properties": { + "type": "array", + "description": "An array of product properties, represented by ProductProperty", + "items": { + "$ref": "product/property.json" + } + }, + "hs_codes": { + "type": "array", + "description": "DEPRECATED - HS codes can be found in the classifications array", + "items": { + "$ref": "hs_code.json" + } + }, + "classifications": { + "type": "array", + "description": "An array of product classifications, represented by Classification", + "items": { + "$ref": "product/classification.json" + } + }, + "suppliers": { + "type": "array", + "description": "Array of suppliers.", + "items": { + "$ref": "product/supplier.json" + } + }, + "archived_at": { + "type": ["null", "string"], + "description": "A date string representing when the product was archived. Will be null if product is not archived. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/classification.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/classification.json new file mode 100644 index 0000000000000..cfe9b2bfc79ed --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/classification.json @@ -0,0 +1,23 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product/classification for this object.", + "pattern": "^/product/classification$" + }, + "region": { + "type": "string", + "description": "The two letter code for the region of the classification" + }, + "codes": { + "type": "array", + "description": "Array of 6 digit (international HS) or 10 digit (US HTS) code.", + "items": { + "type": "string" + } + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/property.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/property.json new file mode 100644 index 0000000000000..904ce583e6b51 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/property.json @@ -0,0 +1,20 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product/property for this object.", + "pattern": "^/product/property$" + }, + "type": { + "type": "string", + "description": "The property describing the product" + }, + "value": { + "type": "string", + "description": "The value associated with the product" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/supplier.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/supplier.json new file mode 100644 index 0000000000000..e5f8ce599c9ed --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/product/supplier.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /product/supplier for this object.", + "pattern": "^/product/supplier$" + }, + "id": { + "type": "integer" + }, + "ref": { + "type": "string" + }, + "country_of_origin": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/quantity/volume.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/quantity/volume.json new file mode 100644 index 0000000000000..ef86a371afe55 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/quantity/volume.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /quantity/volume for this object.", + "pattern": "^/quantity/volume" + }, + "value": { + "type": "number", + "description": "Specifies the quantity of units as a float." + }, + "unit": { + "type": "string", + "description": "Specifies the unit of measure for this quantity.", + "enum": ["cbm", "cbft"] + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/quantity/weight.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/quantity/weight.json new file mode 100644 index 0000000000000..a9e876424bbbb --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/quantity/weight.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /quantity/weight for this object.", + "pattern": "^/quantity/weight" + }, + "value": { + "type": "number", + "description": "Specifies the quantity of units as a float." + }, + "unit": { + "type": "string", + "description": "Specifies the unit of measure for this quantity.", + "enum": ["kg", "lbs"] + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment.json new file mode 100644 index 0000000000000..a59f2008eed97 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment.json @@ -0,0 +1,289 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /shipment for this object.", + "pattern": "^/shipment$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "name": { + "type": "string", + "description": "Name of the shipment." + }, + "booking": { + "description": "The booking associated with this shipment", + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "api/refs/object.json" + } + ] + }, + "transportation_mode": { + "type": "string", + "description": "Transportation mode of the main carriage of the shipment. This can be either Ocean or Air." + }, + "freight_type": { + "type": "string", + "description": "The type of freight service provided. One of:", + "enum": ["port_to_door", "port_to_port", "door_to_door", "door_to_port"] + }, + "updated_at": { + "type": "string", + "description": "Date when the shipment object was last updated. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + }, + "archived_at": { + "type": ["null", "string"], + "description": "Date when the shipment was archived, if applicable. In ISO8601 UTC format with timezone denoted by Z.", + "format": "date-time" + }, + "incoterm": { + "type": "string", + "description": "The Incoterm of your shipment. This can be EXW, FCA, FAS, FOB, CPT, CFR, CIF, CIP, DAT, DAP, DDP, or DPU." + }, + "calculated_weight": { + "description": "Total weight (kg or lbs) of the shipment, calculated from individual pieces if package dimensions are known.", + "$ref": "quantity/weight.json" + }, + "calculated_volume": { + "description": "Total volume (cbm or cft) of the shipment, calculated from individual pieces if package dimensions are known.", + "$ref": "quantity/volume.json" + }, + "estimated_departure_date": { + "type": ["null", "string"], + "description": "Estimated departure date from the first port of the main voyage. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "actual_departure_date": { + "type": ["null", "string"], + "description": "Actual departure date from the first port of the main voyage. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "target_delivery_date": { + "type": ["null", "string"], + "description": "Target date for when the shipment will be fully delivered. This date is set when the shipment''s booking is confirmed. This value may be different from estimated_delivered_in_full_date, which is updated when there is new information about the progress of a shipment. Date only.", + "format": "date" + }, + "estimated_arrival_date": { + "type": ["null", "string"], + "description": "Estimated arrival date to the last port of the main voyage. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "actual_arrival_date": { + "type": ["null", "string"], + "description": "Actual arrival date to the last port of the main voyage. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "status": { + "type": "string", + "description": "Status of the shipment. One of:", + "enum": [ + "seller_location", + "in_transit_to_departure_port", + "departure_port", + "in_transit_to_arrival_port", + "arrival_port", + "in_transit_to_final_destination", + "final_destination" + ] + }, + "priority": { + "type": "string" + }, + "pieces": { + "type": ["null", "integer"], + "description": "Total number of pieces in the shipment." + }, + "it_number": { + "type": ["null", "string"], + "description": "Inbond Transit number used for US Customs" + }, + "created_date": { + "type": "string", + "description": "Date the user has confirmed Flexport quote and cargo is getting ready to ship.", + "format": "date-time" + }, + "estimated_picked_up_in_full_date": { + "type": ["null", "string"], + "description": "Estimated pickup date from the origin location. For ocean shipments with multiple containers, this is the date of last picked up container. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "actual_picked_up_in_full_date": { + "type": ["null", "string"], + "description": "Actual pickup date from the origin location. For ocean shipments with multiple containers, this is the date of last picked up container. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "estimated_delivered_in_full_date": { + "type": ["null", "string"], + "description": "Estimated delivery date to the destination location. For ocean shipments with multiple containers, this is the date of last delivered container. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "actual_delivered_in_full_date": { + "type": ["null", "string"], + "description": "Actual delivery date to the destination location. For ocean shipments with multiple containers, this is the date of last delivered container. In ISO8601 format with timezone denoted by +/-HH:MM.", + "format": "date-time" + }, + "cargo_ready_date": { + "type": "string", + "description": "Date when cargo is ready for pickup at the origin location. Calendar date only, no time.", + "format": "date" + }, + "wants_freight_management_bco": { + "type": "boolean", + "description": "Determines if a shipment bypasses a freight forwarder and is booked directly with a steamship line." + }, + "wants_flexport_freight": { + "type": "boolean", + "description": "Determines if a shipment is using Flexport services for a combination of import customs clearance or final leg delivery." + }, + "wants_export_customs_service": { + "type": "boolean", + "description": "Determines if the shipment requires Flexport to provide export customs service." + }, + "wants_import_customs_service": { + "type": "boolean", + "description": "Determines if the shipment requires Flexport to provide import customs service." + }, + "visibility_only": { + "type": "boolean", + "description": "Indicates if a shipment was requested for tracking via the Flexport visibility-only service." + }, + "wants_delivery_service": { + "type": "boolean", + "description": "Determines if Flexport is responsible for door delivery on a shipment." + }, + "wants_flexport_insurance": { + "type": "boolean" + }, + "wants_pickup_service": { + "type": "boolean" + }, + "wants_commercial_invoice_transcription": { + "type": "boolean" + }, + "wants_trade_declaration_service": { + "type": ["null", "boolean"] + }, + "air_shipment": { + "description": "Air-specific Shipment information. Available only if this is an air shipment. null otherwise.", + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "air/shipment.json" + } + ] + }, + "ocean_shipment": { + "description": "Ocean-specific Shipment information. Available only if this is an ocean shipment. null otherwise.", + "oneOf": [ + { + "type": "null" + }, + { + "$ref": "ocean/shipment.json" + } + ] + }, + "dangerous_goods": { + "description": "", + "$ref": "shipment/dangerous_goods.json" + }, + "shippers": { + "type": "array", + "description": "Array of shippers of the shipment, represented by the CompanyEntity objects.", + "items": { + "$ref": "company_entity.json" + } + }, + "consignees": { + "type": "array", + "description": "Array of consignees of the shipment, represented by CompanyEntity objects.", + "items": { + "$ref": "company_entity.json" + } + }, + "buyers": { + "type": "array", + "description": "Array of buyers of the shipment, represented by CompanyEntity objects.", + "items": { + "$ref": "company_entity.json" + } + }, + "sellers": { + "type": "array", + "description": "Array of sellers of the shipment, represented by CompanyEntity objects.", + "items": { + "$ref": "company_entity.json" + } + }, + "importers_of_record": { + "type": "array", + "description": "Array of importers of record of the shipment, represented by CompanyEntity objects.", + "items": { + "$ref": "company_entity.json" + } + }, + "items": { + "type": "array", + "description": "Array of products and SKUs in the shipment, represented by ShipmentItem objects.", + "items": { + "$ref": "shipment_item.json" + } + }, + "legs": { + "description": "Expandable API link to the legs of the shipment, represented by ShipmentLeg objects.", + "$ref": "api/refs/collection.json" + }, + "customs_entries": { + "description": "Expandable API link to the customs entries for this shipment, represented by CustomsEntry objects.", + "$ref": "api/refs/collection.json" + }, + "commercial_invoices": { + "description": "Expandable API link to the customs entries for this shipment, represented by CommercialInvoice objects.", + "$ref": "api/refs/collection.json" + }, + "documents": { + "description": "Expandable API link to the documents for this shipment, represented by Document objects.", + "$ref": "api/refs/collection.json" + }, + "metadata": { + "type": "object", + "description": "User defined metadata attached to the shipment." + }, + "departure_date": { + "$comment": "deprecated", + "type": ["null", "string"], + "description": "Departure date from the first port of the main voyage.", + "format": "date-time" + }, + "arrival_date": { + "$comment": "deprecated", + "type": ["null", "string"], + "description": "Arrival date to the last port of the main voyage.", + "format": "date-time" + }, + "picked_up_in_full_date": { + "$comment": "deprecated", + "type": ["null", "string"], + "description": "Pickup date from the origin location. For ocean shipments with multiple containers, this is the date of last picked up container.", + "format": "date-time" + }, + "delivered_in_full_date": { + "$comment": "deprecated", + "type": ["null", "string"], + "description": "Delivery date to the destination location. For ocean shipments with multiple containers, this is the date of last delivered container.", + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment/dangerous_goods.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment/dangerous_goods.json new file mode 100644 index 0000000000000..4cbbcb19412a2 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment/dangerous_goods.json @@ -0,0 +1,18 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /shipment/dangerous_goods for this object.", + "pattern": "^/shipment/dangerous_goods" + }, + "review_status": { + "type": ["null", "string"] + }, + "classifications": { + "type": "array" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment_item.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment_item.json new file mode 100644 index 0000000000000..053b39ae0bbb3 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shared/shipment_item.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": false, + "properties": { + "_object": { + "type": "string", + "description": "String representing the object's type. Always /shipment_item for this object.", + "pattern": "^/shipment_item$" + }, + "id": { + "type": "integer", + "description": "Unique identifier for the object." + }, + "total_units": { + "type": "string", + "description": "Total quantity of specific SKU within shipment." + }, + "total_weight": { + "description": "Total weight of specific SKU within shipment.", + "$ref": "quantity/weight.json" + }, + "total_volume": { + "description": "Total volume of specific SKU within shipment.", + "$ref": "quantity/volume.json" + }, + "purchase_order_number": { + "type": "string", + "description": "Purchase order number associated with SKU within shipment." + }, + "product": { + "description": "Name and SKU of product, represented by the Product object.", + "$ref": "product.json" + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shipments.json b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shipments.json new file mode 100644 index 0000000000000..731f7330b00ca --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/schemas/shipments.json @@ -0,0 +1,3 @@ +{ + "$ref": "shipment.json" +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/source.py b/airbyte-integrations/connectors/source-flexport/source_flexport/source.py new file mode 100644 index 0000000000000..81e3d2637fc52 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/source.py @@ -0,0 +1,41 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from typing import Any, List, Mapping, Tuple + +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator +from source_flexport.streams import Companies, FlexportError, FlexportStream, Invoices, Locations, Products, Shipments + + +class SourceFlexport(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + headers = {"Authorization": f"Bearer {config['api_key']}"} + response = requests.get(f"{FlexportStream.url_base}network/companies?page=1&per=1", headers=headers) + + try: + response.raise_for_status() + except Exception as exc: + try: + error = response.json()["errors"][0] + if error: + return False, FlexportError(f"{error['code']}: {error['message']}") + return False, exc + except Exception: + return False, exc + + return True, None + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = TokenAuthenticator(token=config["api_key"]) + return [ + Companies(authenticator=auth), + Locations(authenticator=auth), + Products(authenticator=auth), + Invoices(authenticator=auth), + Shipments(authenticator=auth, start_date=config["start_date"]), + ] diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/spec.json b/airbyte-integrations/connectors/source-flexport/source_flexport/spec.json new file mode 100644 index 0000000000000..23ae4f8adb9ec --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/spec.json @@ -0,0 +1,24 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/sources/flexport", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Flexport Spec", + "type": "object", + "required": ["api_key", "start_date"], + "additionalProperties": false, + "properties": { + "api_key": { + "order": 0, + "type": "string", + "title": "API Key", + "airbyte_secret": true + }, + "start_date": { + "order": 1, + "title": "Start Date", + "type": "string", + "format": "date-time" + } + } + } +} diff --git a/airbyte-integrations/connectors/source-flexport/source_flexport/streams.py b/airbyte-integrations/connectors/source-flexport/source_flexport/streams.py new file mode 100644 index 0000000000000..df7157e8bcd51 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/source_flexport/streams.py @@ -0,0 +1,174 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from typing import Any, Iterable, Mapping, MutableMapping, Optional, Union +from urllib.parse import parse_qsl, urlparse + +import pendulum +import requests +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.auth.core import HttpAuthenticator +from requests.auth import AuthBase + + +class FlexportError(Exception): + pass + + +class FlexportStream(HttpStream, ABC): + url_base = "https://api.flexport.com/" + raise_on_http_errors = False + primary_key = "id" + page_size = 500 + + def __init__(self, authenticator: Union[AuthBase, HttpAuthenticator] = None, start_date: str = None): + super().__init__(authenticator=authenticator) + + self._authenticator = authenticator + self.start_date = start_date + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + # https://apidocs.flexport.com/reference/pagination + # All list endpoints return paginated responses. The response object contains + # elements of the current page, and links to the previous and next pages. + data = response.json()["data"] + next = data.get("next") + + if next: + url = urlparse(next) + qs = dict(parse_qsl(url.query)) + + return { + "page": qs["page"], + "per": qs["per"], + } + + def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]: + if next_page_token: + return next_page_token + + return { + "page": 1, + "per": self.page_size, + } + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + # https://apidocs.flexport.com/reference/response-layout + json = response.json() + + http_error = None + try: + response.raise_for_status() + except requests.HTTPError as exc: + http_error = exc + + flexport_error = None + try: + flexport_error = json.get("error") + except AttributeError: + raise FlexportError("Unexpected response") from http_error + + if flexport_error: + try: + if "code" in flexport_error and "message" in flexport_error: + raise FlexportError(f"{flexport_error['code']}: {flexport_error['message']}") from http_error + except TypeError: + pass + + raise FlexportError(f"Unexpected error: {flexport_error}") from http_error + + if http_error: + raise http_error + + yield from json["data"]["data"] + + +class IncrementalFlexportStream(FlexportStream, ABC): + epoch_start = pendulum.from_timestamp(0, tz="UTC").to_iso8601_string() + + @property + def cursor_field(self) -> str: + return [] + + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: + current = current_stream_state.get(self.cursor_field, self.epoch_start) + latest = latest_record.get(self.cursor_field, self.epoch_start) + + return { + self.cursor_field: max(latest, current), + } + + def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, any]]]: + if not stream_state: + stream_state = {} + + from_date = pendulum.parse(stream_state.get(self.cursor_field, self.start_date)) + end_date = max(from_date, pendulum.tomorrow("UTC")) + + date_diff = end_date - from_date + if date_diff.years > 0: + interval = pendulum.duration(months=1) + elif date_diff.months > 0: + interval = pendulum.duration(weeks=1) + elif date_diff.weeks > 0: + interval = pendulum.duration(days=1) + else: + interval = pendulum.duration(hours=1) + + while True: + to_date = min(from_date + interval, end_date) + yield {"from": from_date.isoformat(), "to": to_date.add(seconds=1).isoformat()} + from_date = to_date + if from_date >= end_date: + break + + +class Companies(FlexportStream): + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return "network/companies" + + +class Locations(FlexportStream): + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return "network/locations" + + +class Products(FlexportStream): + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return "products" + + +class Invoices(FlexportStream): + page_size = 100 + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return "invoices" + + +class Shipments(IncrementalFlexportStream): + cursor_field = "updated_at" + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return "shipments" + + def request_params(self, stream_slice: Mapping[str, any] = None, **kwargs) -> MutableMapping[str, Any]: + return { + **super().request_params(stream_slice=stream_slice, **kwargs), + "sort": self.cursor_field, + "direction": "asc", + "f.updated_at.gt": stream_slice["from"], + "f.updated_at.lt": stream_slice["to"], + } diff --git a/airbyte-integrations/connectors/source-flexport/unit_tests/__init__.py b/airbyte-integrations/connectors/source-flexport/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-flexport/unit_tests/test_incremental_streams.py b/airbyte-integrations/connectors/source-flexport/unit_tests/test_incremental_streams.py new file mode 100644 index 0000000000000..e9072f76e0d08 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/unit_tests/test_incremental_streams.py @@ -0,0 +1,124 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pendulum +import pytest +from source_flexport.streams import IncrementalFlexportStream, Shipments + + +@pytest.fixture +def patch_incremental_base_class(mocker): + # Mock abstract methods to enable instantiating abstract class + mocker.patch.object(IncrementalFlexportStream, "path", "v0/example_endpoint") + mocker.patch.object(IncrementalFlexportStream, "primary_key", "test_primary_key") + mocker.patch.object(IncrementalFlexportStream, "__abstractmethods__", set()) + mocker.patch.object(IncrementalFlexportStream, "cursor_field", "test_cursor") + + +@pytest.mark.parametrize( + ("stream_class", "cursor"), + [ + (Shipments, "updated_at"), + ], +) +def test_cursor_field(patch_incremental_base_class, stream_class, cursor): + stream = stream_class() + expected_cursor_field = cursor + assert stream.cursor_field == expected_cursor_field + + +@pytest.mark.parametrize( + ("current", "latest", "expected"), + [ + ({"test_cursor": "2021-01-01"}, {}, {"test_cursor": "2021-01-01"}), + ({}, {"test_cursor": "2021-01-01"}, {"test_cursor": "2021-01-01"}), + ({"test_cursor": "2021-01-01"}, {"test_cursor": "2050-01-01"}, {"test_cursor": "2050-01-01"}), + ({"test_cursor": "2050-01-01"}, {"test_cursor": "2021-01-01"}, {"test_cursor": "2050-01-01"}), + ], +) +def test_get_updated_state(patch_incremental_base_class, current, latest, expected): + stream = IncrementalFlexportStream(start_date="2021-01-02") + inputs = {"current_stream_state": current, "latest_record": latest} + assert stream.get_updated_state(**inputs) == expected + + +def date(*args): + return pendulum.datetime(*args).isoformat() + + +@pytest.mark.parametrize( + ("now", "stream_state", "slice_count", "expected_from_date", "expected_to_date"), + [ + (None, None, 24, date(2050, 1, 1), date(2050, 1, 2, 0, 0, 1)), + (date(2050, 1, 2), None, 48, date(2050, 1, 1), date(2050, 1, 3, 0, 0, 1)), + (None, {"test_cursor": date(2050, 1, 4)}, 1, date(2050, 1, 4), date(2050, 1, 4, 0, 0, 1)), + ( + date(2050, 1, 5), + {"test_cursor": date(2050, 1, 4)}, + 48, + date(2050, 1, 4), + date(2050, 1, 6, 0, 0, 1), + ), + ( + # Yearly + date(2052, 1, 1), + {"test_cursor": date(2050, 1, 1)}, + 25, + date(2050, 1, 1), + date(2052, 1, 2, 0, 0, 1), + ), + ( + # Monthly + date(2050, 4, 1), + {"test_cursor": date(2050, 1, 1)}, + 13, + date(2050, 1, 1), + date(2050, 4, 2, 0, 0, 1), + ), + ( + # Weekly + date(2050, 1, 31), + {"test_cursor": date(2050, 1, 1)}, + 5, + date(2050, 1, 1), + date(2050, 2, 1, 0, 0, 1), + ), + ( + # Daily + date(2050, 1, 1, 23, 59, 59), + {"test_cursor": date(2050, 1, 1)}, + 24, + date(2050, 1, 1), + date(2050, 1, 2, 0, 0, 1), + ), + ], +) +def test_stream_slices(patch_incremental_base_class, now, stream_state, slice_count, expected_from_date, expected_to_date): + start_date = date(2050, 1, 1) + pendulum.set_test_now(pendulum.parse(now if now else start_date)) + + stream = IncrementalFlexportStream(start_date=start_date) + stream_slices = list(stream.stream_slices(stream_state)) + + assert len(stream_slices) == slice_count + assert stream_slices[0]["from"] == expected_from_date + assert stream_slices[-1]["to"] == expected_to_date + + +def test_supports_incremental(patch_incremental_base_class, mocker): + mocker.patch.object(IncrementalFlexportStream, "cursor_field", "dummy_field") + stream = IncrementalFlexportStream() + assert stream.supports_incremental + + +def test_source_defined_cursor(patch_incremental_base_class): + stream = IncrementalFlexportStream() + assert stream.source_defined_cursor + + +def test_stream_checkpoint_interval(patch_incremental_base_class): + stream = IncrementalFlexportStream() + expected_checkpoint_interval = None + assert stream.state_checkpoint_interval == expected_checkpoint_interval diff --git a/airbyte-integrations/connectors/source-flexport/unit_tests/test_source.py b/airbyte-integrations/connectors/source-flexport/unit_tests/test_source.py new file mode 100644 index 0000000000000..83d005cabeae9 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/unit_tests/test_source.py @@ -0,0 +1,41 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import re +from unittest.mock import MagicMock + +import pytest +from source_flexport.source import SourceFlexport +from source_flexport.streams import FlexportStream + + +@pytest.mark.parametrize( + ("status_code", "response", "expected"), + [ + (200, {}, (True, None)), + (401, {}, (False, "401 Client Error")), + (401, {"errors": [{"code": "server_error", "message": "Server error"}]}, (False, "server_error: Server error")), + ], +) +def test_check_connection(mocker, requests_mock, status_code, response, expected): + expected_ok, expected_error = expected + requests_mock.get(FlexportStream.url_base + "network/companies?page=1&per=1", status_code=status_code, json=response) + + source = SourceFlexport() + logger_mock, config_mock = MagicMock(), MagicMock() + + ok, error = source.check_connection(logger_mock, config_mock) + assert ok == expected_ok + if isinstance(expected_error, str): + assert re.match(expected_error, str(error)) + else: + assert error == expected_error + + +def test_streams(mocker): + source = SourceFlexport() + config_mock = MagicMock() + streams = source.streams(config_mock) + expected_streams_number = 5 + assert len(streams) == expected_streams_number diff --git a/airbyte-integrations/connectors/source-flexport/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-flexport/unit_tests/test_streams.py new file mode 100644 index 0000000000000..5b89deb298877 --- /dev/null +++ b/airbyte-integrations/connectors/source-flexport/unit_tests/test_streams.py @@ -0,0 +1,87 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import pytest +import requests +from requests.exceptions import HTTPError +from source_flexport.source import FlexportError +from source_flexport.streams import FlexportStream + + +@pytest.fixture +def patch_base_class(mocker): + # Mock abstract methods to enable instantiating abstract class + mocker.patch.object(FlexportStream, "path", "v0/example_endpoint") + mocker.patch.object(FlexportStream, "primary_key", "test_primary_key") + mocker.patch.object(FlexportStream, "__abstractmethods__", set()) + + +@pytest.mark.parametrize( + ("next_page_token", "expected"), + [ + (None, {"page": 1, "per": FlexportStream.page_size}), + ({"page": 2, "per": 50}, {"page": 2, "per": 50}), + ], +) +def test_request_params(patch_base_class, next_page_token, expected): + stream = FlexportStream() + assert stream.request_params(next_page_token=next_page_token) == expected + + +@pytest.mark.parametrize( + ("response", "expected"), + [ + ({"data": {"next": None}}, None), + ({"data": {"next": "/endpoint"}}, KeyError("page")), + ({"data": {"next": "/endpoint?page=2"}}, KeyError("per")), + ({"data": {"next": "/endpoint?page=2&per=42"}}, {"page": "2", "per": "42"}), + ], +) +def test_next_page_token(patch_base_class, requests_mock, response, expected): + url = "http://dummy" + requests_mock.get(url, json=response) + response = requests.get(url) + + stream = FlexportStream() + + if isinstance(expected, Exception): + with pytest.raises(type(expected), match=str(expected)): + stream.next_page_token(response) + else: + assert stream.next_page_token(response) == expected + + +@pytest.mark.parametrize( + ("status_code", "response", "expected"), + [ + (200, None, Exception()), + (400, None, Exception()), + (200, "string_response", FlexportError("Unexpected response")), + (401, "string_response", FlexportError("Unexpected response")), + (200, {"error": None}, KeyError("data")), + (402, {"error": None}, HTTPError("402 Client Error")), + (200, {"error": {}}, KeyError("data")), + (403, {"error": {}}, HTTPError("403 Client Error")), + (200, {"error": "unexpected_error_type"}, FlexportError("Unexpected error: unexpected_error_type")), + (404, {"error": "unexpected_error_type"}, FlexportError("Unexpected error: unexpected_error_type")), + (200, {"error": {"code": "error_code", "message": "Error message"}}, FlexportError("error_code: Error message")), + (405, {"error": {"code": "error_code", "message": "Error message"}}, FlexportError("error_code: Error message")), + (200, {"error": None, "data": "unexpected_data_type"}, TypeError("string indices must be integers")), + (200, {"error": None, "data": {"data": None}}, TypeError("'NoneType' object is not iterable")), + (200, {"error": None, "data": {"data": "hello"}}, ["h", "e", "l", "l", "o"]), + (200, {"error": None, "data": {"data": ["record_1", "record_2"]}}, ["record_1", "record_2"]), + ], +) +def test_parse_response(patch_base_class, requests_mock, status_code, response, expected): + url = "http://dummy" + requests_mock.get(url, status_code=status_code, json=response) + response = requests.get(url) + + stream = FlexportStream() + + if isinstance(expected, Exception): + with pytest.raises(type(expected), match=str(expected)): + list(stream.parse_response(response)) + else: + assert list(stream.parse_response(response)) == expected diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index f37b28be05589..ae380d54c6de6 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -65,6 +65,7 @@ * [Facebook Marketing](integrations/sources/facebook-marketing.md) * [Facebook Pages](integrations/sources/facebook-pages.md) * [Files](integrations/sources/file.md) + * [Flexport](integrations/sources/flexport.md) * [Freshdesk](integrations/sources/freshdesk.md) * [Freshsales](integrations/sources/freshsales.md) * [Freshservice](integrations/sources/freshservice.md) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index 91b6163db9d6b..ed4b9618e65a9 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -42,6 +42,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [Facebook Marketing](sources/facebook-marketing.md) | Beta | | [Facebook Pages](sources/facebook-pages.md) | Alpha | | [Files](sources/file.md) | Certified | +| [Flexport](sources/flexport.md) | Alpha | | [Freshdesk](sources/freshdesk.md) | Certified | | [GitHub](sources/github.md) | Beta | | [GitLab](sources/gitlab.md) | Beta | diff --git a/docs/integrations/sources/flexport.md b/docs/integrations/sources/flexport.md new file mode 100644 index 0000000000000..a15ee0c247f35 --- /dev/null +++ b/docs/integrations/sources/flexport.md @@ -0,0 +1,49 @@ +# Flexport + +## Sync overview + +Flexport source uses [Flexport API](https://developers.flexport.com/s/api) to extract data from Flexport. + +### Output schema + +This Source is capable of syncing the following data as streams: + +- [Companies](https://apidocs.flexport.com/reference/company) +- [Locations](https://apidocs.flexport.com/reference/location) +- [Products](https://apidocs.flexport.com/reference/product) +- [Invoices](https://apidocs.flexport.com/reference/invoices) +- [Shipments](https://apidocs.flexport.com/reference/shipment) + +### Data type mapping + +| Integration Type | Airbyte Type | Notes | +| :--- | :--- | :--- | +| `number` | `number` | float number | +| `integer` | `integer` | whole number | +| `date` | `string` | FORMAT YYYY-MM-DD | +| `datetime` | `string` | FORMAT YYYY-MM-DDThh:mm:ss | +| `array` | `array` | | +| `boolean` | `boolean` | True/False | +| `string` | `string` | | + +### Features + +| Feature | Supported?\(Yes/No\) | Notes | +| :--- | :--- | :--- | +| Full Refresh Overwrite Sync | Yes | | +| Full Refresh Append Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Append + Deduplication Sync | Yes | | +| Namespaces | No | | + +## Getting started + +### Authentication + +Authentication uses a pre-created API token which can be [created in the UI](https://apidocs.flexport.com/reference/authentication). + +## Changelog + +| Version | Date | Pull Request | Subject | +| :--- | :--- | :--- | :--- | +| 0.1.0 | 2021-12-14 | [8777](https://github.com/airbytehq/airbyte/pull/8777) | New Source: Flexport | From db4093277fc3a358314aca4de441d81f02dea598 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 20 Jan 2022 07:56:06 -0800 Subject: [PATCH 169/215] async container launch kubernetes "process" (#9242) * add misc todos * save work so far * configure async pod processing * remove comment * fmt * working except logging propagation? * add comment * add logging and misc configuration fixes * add output propagation * fix state reading * logging is working (but background highlighting is not) * fix log highlighting * use sys instead of ctx * comment * clean up and test state management * clean up orchestrator app construction * unify launcher workers and handle resuming * respond to comments * misc * disable * fix comment * respond to comments --- .env | 1 - airbyte-commons/src/main/resources/log4j2.xml | 2 +- .../java/io/airbyte/config/EnvConfigs.java | 30 +- .../storage/DefaultGcsClientFactory.java | 14 +- .../storage/DefaultS3ClientFactory.java | 2 + .../config/storage/MinioS3ClientFactory.java | 2 + airbyte-container-orchestrator/Dockerfile | 7 + .../AsyncStateManager.java | 40 ++ .../ContainerOrchestratorApp.java | 150 ++++++-- .../DbtJobOrchestrator.java | 11 +- .../DefaultAsyncStateManager.java | 86 +++++ .../JobOrchestrator.java | 48 ++- .../NoOpOrchestrator.java | 33 ++ .../NormalizationJobOrchestrator.java | 10 +- .../ReplicationJobOrchestrator.java | 18 +- .../DefaultAsyncStateManagerTest.java | 108 ++++++ .../java/io/airbyte/workers/WorkerApp.java | 113 +++--- .../workers/process/AsyncKubePodStatus.java | 14 + .../process/AsyncOrchestratorPodProcess.java | 347 ++++++++++++++++++ .../io/airbyte/workers/process/KubePod.java | 21 ++ .../airbyte/workers/process/KubePodInfo.java | 7 + .../workers/process/KubePodProcess.java | 47 ++- .../workers/process/KubeProcessFactory.java | 20 +- .../airbyte/workers/storage/StateClients.java | 30 ++ .../airbyte/workers/storage/WorkerStore.java | 54 --- .../temporal/sync/DbtLauncherWorker.java | 129 +------ .../sync/DbtTransformationActivityImpl.java | 16 +- .../workers/temporal/sync/LauncherWorker.java | 183 +++++++++ .../sync/NormalizationActivityImpl.java | 16 +- .../sync/NormalizationLauncherWorker.java | 127 +------ .../temporal/sync/OrchestratorConstants.java | 28 +- .../sync/ReplicationActivityImpl.java | 22 +- .../sync/ReplicationLauncherWorker.java | 152 +------- .../resources/entrypoints/{ => sync}/check.sh | 0 .../resources/entrypoints/{ => sync}/main.sh | 0 ...OrchestratorPodProcessIntegrationTest.java | 149 ++++++++ .../workers/storage/WorkerStoreTest.java | 69 ---- charts/airbyte/templates/env-configmap.yaml | 2 + .../airbyte/templates/worker/deployment.yaml | 10 + kube/overlays/dev-integration-test/.env | 6 +- kube/overlays/dev-integration-test/.secrets | 2 + kube/overlays/dev/.env | 5 +- kube/overlays/dev/.secrets | 2 + .../overlays/stable-with-resource-limits/.env | 6 +- .../stable-with-resource-limits/.secrets | 3 +- kube/overlays/stable/.env | 6 +- kube/overlays/stable/.secrets | 2 + kube/resources/worker.yaml | 26 ++ 48 files changed, 1496 insertions(+), 680 deletions(-) create mode 100644 airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/AsyncStateManager.java create mode 100644 airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DefaultAsyncStateManager.java create mode 100644 airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NoOpOrchestrator.java create mode 100644 airbyte-container-orchestrator/src/test/java/io/airbyte/container_orchestrator/DefaultAsyncStateManagerTest.java create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/process/AsyncKubePodStatus.java create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/process/AsyncOrchestratorPodProcess.java create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/process/KubePod.java create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodInfo.java create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/storage/StateClients.java delete mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/storage/WorkerStore.java create mode 100644 airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/LauncherWorker.java rename airbyte-workers/src/main/resources/entrypoints/{ => sync}/check.sh (100%) rename airbyte-workers/src/main/resources/entrypoints/{ => sync}/main.sh (100%) create mode 100644 airbyte-workers/src/test-integration/java/io/airbyte/workers/process/AsyncOrchestratorPodProcessIntegrationTest.java delete mode 100644 airbyte-workers/src/test/java/io/airbyte/workers/storage/WorkerStoreTest.java diff --git a/.env b/.env index 03d63b19bfa95..4e4bd18d3cc47 100644 --- a/.env +++ b/.env @@ -50,7 +50,6 @@ CONFIG_DATABASE_PASSWORD= CONFIG_DATABASE_URL= CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION=0.35.1.001 - ### AIRBYTE SERVICES ### TEMPORAL_HOST=airbyte-temporal:7233 INTERNAL_API_HOST=airbyte-server:8001 diff --git a/airbyte-commons/src/main/resources/log4j2.xml b/airbyte-commons/src/main/resources/log4j2.xml index ed578354eb798..70cabfd000a74 100644 --- a/airbyte-commons/src/main/resources/log4j2.xml +++ b/airbyte-commons/src/main/resources/log4j2.xml @@ -128,7 +128,7 @@ s3AwsKey="${s3-aws-key}" s3AwsSecret="${s3-aws-secret}" s3ServiceEndpoint="${s3-minio-endpoint}" s3PathStyleAccess="${s3-path-style-access}" gcpStorageBucket="${gcs-log-bucket}" gcpStorageBlobNamePrefix="job-logging${ctx:cloud_job_log_path}"> - + diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java index 065c50963f902..540bc7b05b74f 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java @@ -89,16 +89,16 @@ public class EnvConfigs implements Configs { private static final String JOBS_DATABASE_INITIALIZATION_TIMEOUT_MS = "JOBS_DATABASE_INITIALIZATION_TIMEOUT_MS"; private static final String CONTAINER_ORCHESTRATOR_ENABLED = "CONTAINER_ORCHESTRATOR_ENABLED"; - private static final String STATE_STORAGE_S3_BUCKET_NAME = "STATE_STORAGE_S3_BUCKET_NAME"; - private static final String STATE_STORAGE_S3_REGION = "STATE_STORAGE_S3_REGION"; - private static final String STATE_STORAGE_S3_ACCESS_KEY = "STATE_STORAGE_S3_ACCESS_KEY"; - private static final String STATE_STORAGE_S3_SECRET_ACCESS_KEY = "STATE_STORAGE_S3_SECRET_ACCESS_KEY"; - private static final String STATE_STORAGE_MINIO_BUCKET_NAME = "STATE_STORAGE_MINIO_BUCKET_NAME"; - private static final String STATE_STORAGE_MINIO_ENDPOINT = "STATE_STORAGE_MINIO_ENDPOINT"; - private static final String STATE_STORAGE_MINIO_ACCESS_KEY = "STATE_STORAGE_MINIO_ACCESS_KEY"; - private static final String STATE_STORAGE_MINIO_SECRET_ACCESS_KEY = "STATE_STORAGE_MINIO_SECRET_ACCESS_KEY"; - private static final String STATE_STORAGE_GCS_BUCKET_NAME = "STATE_STORAGE_GCS_BUCKET_NAME"; - private static final String STATE_STORAGE_GCS_APPLICATION_CREDENTIALS = "STATE_STORAGE_GCS_APPLICATION_CREDENTIALS"; + public static final String STATE_STORAGE_S3_BUCKET_NAME = "STATE_STORAGE_S3_BUCKET_NAME"; + public static final String STATE_STORAGE_S3_REGION = "STATE_STORAGE_S3_REGION"; + public static final String STATE_STORAGE_S3_ACCESS_KEY = "STATE_STORAGE_S3_ACCESS_KEY"; + public static final String STATE_STORAGE_S3_SECRET_ACCESS_KEY = "STATE_STORAGE_S3_SECRET_ACCESS_KEY"; + public static final String STATE_STORAGE_MINIO_BUCKET_NAME = "STATE_STORAGE_MINIO_BUCKET_NAME"; + public static final String STATE_STORAGE_MINIO_ENDPOINT = "STATE_STORAGE_MINIO_ENDPOINT"; + public static final String STATE_STORAGE_MINIO_ACCESS_KEY = "STATE_STORAGE_MINIO_ACCESS_KEY"; + public static final String STATE_STORAGE_MINIO_SECRET_ACCESS_KEY = "STATE_STORAGE_MINIO_SECRET_ACCESS_KEY"; + public static final String STATE_STORAGE_GCS_BUCKET_NAME = "STATE_STORAGE_GCS_BUCKET_NAME"; + public static final String STATE_STORAGE_GCS_APPLICATION_CREDENTIALS = "STATE_STORAGE_GCS_APPLICATION_CREDENTIALS"; // defaults private static final String DEFAULT_SPEC_CACHE_BUCKET = "io-airbyte-cloud-spec-cache"; @@ -172,18 +172,18 @@ private Optional getStateStorageConfiguration() { if (getEnv(STATE_STORAGE_GCS_BUCKET_NAME) != null) { return Optional.of(CloudStorageConfigs.gcs(new GcsConfig( getEnvOrDefault(STATE_STORAGE_GCS_BUCKET_NAME, ""), - getEnvOrDefault(LogClientSingleton.GOOGLE_APPLICATION_CREDENTIALS, "")))); + getEnvOrDefault(STATE_STORAGE_GCS_APPLICATION_CREDENTIALS, "")))); } else if (getEnv(STATE_STORAGE_MINIO_ENDPOINT) != null) { return Optional.of(CloudStorageConfigs.minio(new MinioConfig( getEnvOrDefault(STATE_STORAGE_MINIO_BUCKET_NAME, ""), - getEnvOrDefault(LogClientSingleton.AWS_ACCESS_KEY_ID, ""), - getEnvOrDefault(LogClientSingleton.AWS_SECRET_ACCESS_KEY, ""), + getEnvOrDefault(STATE_STORAGE_MINIO_ACCESS_KEY, ""), + getEnvOrDefault(STATE_STORAGE_MINIO_SECRET_ACCESS_KEY, ""), getEnvOrDefault(STATE_STORAGE_MINIO_ENDPOINT, "")))); } else if (getEnv(STATE_STORAGE_S3_REGION) != null) { return Optional.of(CloudStorageConfigs.s3(new S3Config( getEnvOrDefault(STATE_STORAGE_S3_BUCKET_NAME, ""), - getEnvOrDefault(LogClientSingleton.AWS_ACCESS_KEY_ID, ""), - getEnvOrDefault(LogClientSingleton.AWS_SECRET_ACCESS_KEY, ""), + getEnvOrDefault(STATE_STORAGE_S3_ACCESS_KEY, ""), + getEnvOrDefault(STATE_STORAGE_S3_SECRET_ACCESS_KEY, ""), getEnvOrDefault(STATE_STORAGE_S3_REGION, "")))); } else { return Optional.empty(); diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultGcsClientFactory.java b/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultGcsClientFactory.java index 0c242c9098ec7..dbd4b7b6f3e4e 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultGcsClientFactory.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultGcsClientFactory.java @@ -5,9 +5,12 @@ package io.airbyte.config.storage; import com.google.api.client.util.Preconditions; +import com.google.auth.oauth2.ServiceAccountCredentials; import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageOptions; import io.airbyte.config.storage.CloudStorageConfigs.GcsConfig; +import java.io.ByteArrayInputStream; +import java.nio.charset.StandardCharsets; import java.util.function.Supplier; /** @@ -16,8 +19,11 @@ */ public class DefaultGcsClientFactory implements Supplier { + private final GcsConfig config; + public DefaultGcsClientFactory(final GcsConfig config) { validate(config); + this.config = config; } private static void validate(final GcsConfig config) { @@ -27,7 +33,13 @@ private static void validate(final GcsConfig config) { @Override public Storage get() { - return StorageOptions.getDefaultInstance().getService(); + try { + final var credentialsByteStream = new ByteArrayInputStream(config.getGoogleApplicationCredentials().getBytes(StandardCharsets.UTF_8)); + final var credentials = ServiceAccountCredentials.fromStream(credentialsByteStream); + return StorageOptions.newBuilder().setCredentials(credentials).build().getService(); + } catch (Exception e) { + throw new RuntimeException(e); + } } } diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultS3ClientFactory.java b/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultS3ClientFactory.java index 62b0237009b86..897cc68cd02a9 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultS3ClientFactory.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultS3ClientFactory.java @@ -8,6 +8,7 @@ import io.airbyte.config.storage.CloudStorageConfigs.S3ApiWorkerStorageConfig; import io.airbyte.config.storage.CloudStorageConfigs.S3Config; import java.util.function.Supplier; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3Client; @@ -41,6 +42,7 @@ static void validateBase(final S3ApiWorkerStorageConfig s3BaseConfig) { @Override public S3Client get() { final var builder = S3Client.builder(); + builder.credentialsProvider(() -> AwsBasicCredentials.create(s3Config.getAwsAccessKey(), s3Config.getAwsSecretAccessKey())); builder.region(Region.of(s3Config.getRegion())); return builder.build(); } diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/storage/MinioS3ClientFactory.java b/airbyte-config/models/src/main/java/io/airbyte/config/storage/MinioS3ClientFactory.java index 31cd86e71db74..5cef8b97ac2f3 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/storage/MinioS3ClientFactory.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/storage/MinioS3ClientFactory.java @@ -9,6 +9,7 @@ import java.net.URI; import java.net.URISyntaxException; import java.util.function.Supplier; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3Client; @@ -40,6 +41,7 @@ public S3Client get() { final var minioEndpoint = minioConfig.getMinioEndpoint(); try { final var minioUri = new URI(minioEndpoint); + builder.credentialsProvider(() -> AwsBasicCredentials.create(minioConfig.getAwsAccessKey(), minioConfig.getAwsSecretAccessKey())); builder.endpointOverride(minioUri); builder.region(Region.US_EAST_1); // Although this is not used, the S3 client will error out if this is not set. Set a stub value. } catch (final URISyntaxException e) { diff --git a/airbyte-container-orchestrator/Dockerfile b/airbyte-container-orchestrator/Dockerfile index b805172795d8d..4fba8521e0287 100644 --- a/airbyte-container-orchestrator/Dockerfile +++ b/airbyte-container-orchestrator/Dockerfile @@ -18,6 +18,13 @@ RUN add-apt-repository \ stable" RUN apt-get update && apt-get install -y docker-ce-cli jq +# Install kubectl for copying files to kube pods. Eventually should be replaced with a kube java client. +# See https://github.com/airbytehq/airbyte/issues/8643 for more information on why we are using kubectl for copying. +# The following commands were taken from https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/#install-using-native-package-management +RUN curl -fsSLo /usr/share/keyrings/kubernetes-archive-keyring.gpg https://packages.cloud.google.com/apt/doc/apt-key.gpg +RUN echo "deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main" | tee /etc/apt/sources.list.d/kubernetes.list +RUN apt-get update && apt-get install -y kubectl + ENV APPLICATION airbyte-container-orchestrator ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}" diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/AsyncStateManager.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/AsyncStateManager.java new file mode 100644 index 0000000000000..d05d2f408e1a6 --- /dev/null +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/AsyncStateManager.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.container_orchestrator; + +import io.airbyte.workers.process.AsyncKubePodStatus; +import io.airbyte.workers.process.KubePodInfo; + +/** + * The state manager writes the "truth" for states of the async pod process. If the store isn't + * updated by the underlying pod, it will appear as failed. + * + * It doesn't have a single value for a state. Instead, in a location on cloud storage or disk, it + * writes every state it's encountered. + */ +public interface AsyncStateManager { + + /** + * Writes a file containing a string value to a location designated by the input status. + */ + void write(final KubePodInfo kubePodInfo, final AsyncKubePodStatus status, final String value); + + /** + * Writes an empty file to a location designated by the input status. + */ + void write(final KubePodInfo kubePodInfo, final AsyncKubePodStatus status); + + /** + * Interprets the state given all written state messages for the pod. + */ + AsyncKubePodStatus getStatus(final KubePodInfo kubePodInfo); + + /** + * @return the output stored in the success file. This can be an empty string. + * @throws IllegalArgumentException if no success file exists + */ + String getOutput(final KubePodInfo kubePodInfo) throws IllegalArgumentException; + +} diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java index c75b0e3a65d64..476f049d608f6 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java @@ -4,16 +4,25 @@ package io.airbyte.container_orchestrator; -import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.logging.LoggingHelper; +import io.airbyte.commons.logging.MdcScope; import io.airbyte.config.Configs; import io.airbyte.config.EnvConfigs; +import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.scheduler.models.JobRunConfig; import io.airbyte.workers.WorkerApp; import io.airbyte.workers.WorkerConfigs; +import io.airbyte.workers.WorkerUtils; +import io.airbyte.workers.process.AsyncKubePodStatus; +import io.airbyte.workers.process.AsyncOrchestratorPodProcess; import io.airbyte.workers.process.DockerProcessFactory; +import io.airbyte.workers.process.KubePodInfo; +import io.airbyte.workers.process.KubePodProcess; import io.airbyte.workers.process.KubePortManagerSingleton; import io.airbyte.workers.process.KubeProcessFactory; import io.airbyte.workers.process.ProcessFactory; import io.airbyte.workers.process.WorkerHeartbeatServer; +import io.airbyte.workers.storage.StateClients; import io.airbyte.workers.temporal.sync.DbtLauncherWorker; import io.airbyte.workers.temporal.sync.NormalizationLauncherWorker; import io.airbyte.workers.temporal.sync.OrchestratorConstants; @@ -22,9 +31,9 @@ import io.fabric8.kubernetes.client.KubernetesClient; import java.io.IOException; import java.net.InetAddress; -import java.nio.file.Files; import java.nio.file.Path; import java.util.Map; +import java.util.Optional; import lombok.extern.slf4j.Slf4j; /** @@ -41,54 +50,129 @@ @Slf4j public class ContainerOrchestratorApp { - public static void main(final String[] args) throws Exception { - WorkerHeartbeatServer heartbeatServer = null; + private final String application; + private final Map envMap; + private final JobRunConfig jobRunConfig; + private final KubePodInfo kubePodInfo; + private final Configs configs; - try { - // read files that contain all necessary configuration - final String application = Files.readString(Path.of(OrchestratorConstants.INIT_FILE_APPLICATION)); - final Map envMap = - (Map) Jsons.deserialize(Files.readString(Path.of(OrchestratorConstants.INIT_FILE_ENV_MAP)), Map.class); + public ContainerOrchestratorApp( + final String application, + final Map envMap, + final JobRunConfig jobRunConfig, + final KubePodInfo kubePodInfo) { + this.application = application; + this.envMap = envMap; + this.jobRunConfig = jobRunConfig; + this.kubePodInfo = kubePodInfo; + this.configs = new EnvConfigs(envMap); + } - final Configs configs = new EnvConfigs(envMap); + private void configureLogging() { + for (String envVar : OrchestratorConstants.ENV_VARS_TO_TRANSFER) { + if (envMap.containsKey(envVar)) { + System.setProperty(envVar, envMap.get(envVar)); + } + } - heartbeatServer = new WorkerHeartbeatServer(WorkerApp.KUBE_HEARTBEAT_PORT); - heartbeatServer.startBackground(); + final var logClient = LogClientSingleton.getInstance(); + logClient.setJobMdc( + configs.getWorkerEnvironment(), + configs.getLogConfigs(), + WorkerUtils.getJobRoot(configs.getWorkspaceRoot(), jobRunConfig.getJobId(), jobRunConfig.getAttemptId())); + } + + /** + * Handles state updates (including writing failures) and running the job orchestrator. As much of + * the initialization as possible should go in here so it's logged properly and the state storage is + * updated appropriately. + */ + private void runInternal(final DefaultAsyncStateManager asyncStateManager) { + try { + asyncStateManager.write(kubePodInfo, AsyncKubePodStatus.INITIALIZING); final WorkerConfigs workerConfigs = new WorkerConfigs(configs); final ProcessFactory processFactory = getProcessBuilderFactory(configs, workerConfigs); final JobOrchestrator jobOrchestrator = getJobOrchestrator(configs, workerConfigs, processFactory, application); - log.info("Starting {} orchestrator...", jobOrchestrator.getOrchestratorName()); - jobOrchestrator.runJob(); - log.info("{} orchestrator complete!", jobOrchestrator.getOrchestratorName()); - } finally { - if (heartbeatServer != null) { - log.info("Shutting down heartbeat server..."); - heartbeatServer.stop(); + if (jobOrchestrator == null) { + throw new IllegalStateException("Could not find job orchestrator for application: " + application); } + + final var heartbeatServer = new WorkerHeartbeatServer(WorkerApp.KUBE_HEARTBEAT_PORT); + heartbeatServer.startBackground(); + + asyncStateManager.write(kubePodInfo, AsyncKubePodStatus.RUNNING); + + final Optional output = jobOrchestrator.runJob(); + + asyncStateManager.write(kubePodInfo, AsyncKubePodStatus.SUCCEEDED, output.orElse("")); + + // required to kill clients with thread pools + System.exit(0); + } catch (Throwable t) { + asyncStateManager.write(kubePodInfo, AsyncKubePodStatus.FAILED); + System.exit(1); } + } - // required to kill kube client - log.info("Runner closing..."); - System.exit(0); + /** + * Configures logging/mdc scope, and creates all objects necessary to handle state updates. + * Everything else is delegated to {@link ContainerOrchestratorApp#runInternal}. + */ + public void run() { + configureLogging(); + + // set mdc scope for the remaining execution + try (final var mdcScope = new MdcScope.Builder() + .setLogPrefix(application) + .setPrefixColor(LoggingHelper.Color.CYAN_BACKGROUND) + .build()) { + + // IMPORTANT: Changing the storage location will orphan already existing kube pods when the new + // version is deployed! + final var documentStoreClient = StateClients.create(configs.getStateStorageCloudConfigs(), WorkerApp.STATE_STORAGE_PREFIX); + final var asyncStateManager = new DefaultAsyncStateManager(documentStoreClient); + + runInternal(asyncStateManager); + } + } + + public static void main(final String[] args) { + try { + // wait for config files to be copied + final var successFile = Path.of(KubePodProcess.CONFIG_DIR, KubePodProcess.SUCCESS_FILE_NAME); + + while (!successFile.toFile().exists()) { + log.info("Waiting for config file transfers to complete..."); + Thread.sleep(1000); + } + + final var applicationName = JobOrchestrator.readApplicationName(); + final var envMap = JobOrchestrator.readEnvMap(); + final var jobRunConfig = JobOrchestrator.readJobRunConfig(); + final var kubePodInfo = JobOrchestrator.readKubePodInfo(); + + final var app = new ContainerOrchestratorApp(applicationName, envMap, jobRunConfig, kubePodInfo); + app.run(); + } catch (Throwable t) { + log.info("Orchestrator failed...", t); + System.exit(1); + } } private static JobOrchestrator getJobOrchestrator(final Configs configs, final WorkerConfigs workerConfigs, final ProcessFactory processFactory, final String application) { - if (application.equals(ReplicationLauncherWorker.REPLICATION)) { - return new ReplicationJobOrchestrator(configs, workerConfigs, processFactory); - } else if (application.equals(NormalizationLauncherWorker.NORMALIZATION)) { - return new NormalizationJobOrchestrator(configs, workerConfigs, processFactory); - } else if (application.equals(DbtLauncherWorker.DBT)) { - return new DbtJobOrchestrator(configs, workerConfigs, processFactory); - } else { - log.error("Runner failed", new IllegalStateException("Unexpected value: " + application)); - System.exit(1); - throw new IllegalStateException(); // should never be reached, but necessary to compile - } + + return switch (application) { + case ReplicationLauncherWorker.REPLICATION -> new ReplicationJobOrchestrator(configs, workerConfigs, processFactory); + case NormalizationLauncherWorker.NORMALIZATION -> new NormalizationJobOrchestrator(configs, workerConfigs, processFactory); + case DbtLauncherWorker.DBT -> new DbtJobOrchestrator(configs, workerConfigs, processFactory); + case AsyncOrchestratorPodProcess.NO_OP -> new NoOpOrchestrator(); + default -> null; + }; } /** diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java index fc426adb5ed22..a7b043f75a1ea 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DbtJobOrchestrator.java @@ -13,9 +13,11 @@ import io.airbyte.workers.WorkerConfigs; import io.airbyte.workers.WorkerUtils; import io.airbyte.workers.normalization.NormalizationRunnerFactory; +import io.airbyte.workers.process.KubePodProcess; import io.airbyte.workers.process.ProcessFactory; import io.airbyte.workers.temporal.sync.ReplicationLauncherWorker; import java.nio.file.Path; +import java.util.Optional; import lombok.extern.slf4j.Slf4j; @Slf4j @@ -42,12 +44,13 @@ public Class getInputClass() { } @Override - public void runJob() throws Exception { - final JobRunConfig jobRunConfig = readJobRunConfig(); + public Optional runJob() throws Exception { + final JobRunConfig jobRunConfig = JobOrchestrator.readJobRunConfig(); final OperatorDbtInput dbtInput = readInput(); final IntegrationLauncherConfig destinationLauncherConfig = JobOrchestrator.readAndDeserializeFile( - ReplicationLauncherWorker.INIT_FILE_DESTINATION_LAUNCHER_CONFIG, IntegrationLauncherConfig.class); + Path.of(KubePodProcess.CONFIG_DIR, ReplicationLauncherWorker.INIT_FILE_DESTINATION_LAUNCHER_CONFIG), + IntegrationLauncherConfig.class); log.info("Setting up dbt worker..."); final DbtTransformationWorker worker = new DbtTransformationWorker( @@ -65,6 +68,8 @@ public void runJob() throws Exception { log.info("Running dbt worker..."); final Path jobRoot = WorkerUtils.getJobRoot(configs.getWorkspaceRoot(), jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); worker.run(dbtInput, jobRoot); + + return Optional.empty(); } } diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DefaultAsyncStateManager.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DefaultAsyncStateManager.java new file mode 100644 index 0000000000000..212d9adef5786 --- /dev/null +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/DefaultAsyncStateManager.java @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.container_orchestrator; + +import io.airbyte.workers.process.AsyncKubePodStatus; +import io.airbyte.workers.process.KubePodInfo; +import io.airbyte.workers.storage.DocumentStoreClient; +import java.util.List; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class DefaultAsyncStateManager implements AsyncStateManager { + + private static final List STATUS_CHECK_ORDER = List.of( + // terminal states first + AsyncKubePodStatus.FAILED, + AsyncKubePodStatus.SUCCEEDED, + + // then check in progress state + AsyncKubePodStatus.RUNNING, + + // then check for initialization state + AsyncKubePodStatus.INITIALIZING); + + private final DocumentStoreClient documentStoreClient; + + public DefaultAsyncStateManager(final DocumentStoreClient documentStoreClient) { + this.documentStoreClient = documentStoreClient; + } + + @Override + public void write(final KubePodInfo kubePodInfo, final AsyncKubePodStatus status, final String value) { + final var key = getDocumentStoreKey(kubePodInfo, status); + log.info("Writing async status {} for {}...", status, kubePodInfo); + documentStoreClient.write(key, value); + } + + @Override + public void write(final KubePodInfo kubePodInfo, final AsyncKubePodStatus status) { + write(kubePodInfo, status, ""); + } + + /** + * Checks terminal states first, then running, then initialized. Defaults to not started. + * + * The order matters here! + */ + @Override + public AsyncKubePodStatus getStatus(KubePodInfo kubePodInfo) { + for (AsyncKubePodStatus status : STATUS_CHECK_ORDER) { + if (statusFileExists(kubePodInfo, status)) { + return status; + } + } + + return AsyncKubePodStatus.NOT_STARTED; + } + + @Override + public String getOutput(KubePodInfo kubePodInfo) throws IllegalArgumentException { + final var key = getDocumentStoreKey(kubePodInfo, AsyncKubePodStatus.SUCCEEDED); + final var output = documentStoreClient.read(key); + + if (output.isPresent()) { + return output.get(); + } else { + throw new IllegalArgumentException("Expected to retrieve output from a successfully completed pod!"); + } + } + + /** + * IMPORTANT: Changing the storage location will orphan already existing kube pods when the new + * version is deployed! + */ + public static String getDocumentStoreKey(final KubePodInfo kubePodInfo, final AsyncKubePodStatus status) { + return kubePodInfo.namespace() + "/" + kubePodInfo.name() + "/" + status.name(); + } + + private boolean statusFileExists(final KubePodInfo kubePodInfo, final AsyncKubePodStatus status) { + final var key = getDocumentStoreKey(kubePodInfo, status); + return documentStoreClient.read(key).isPresent(); + } + +} diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/JobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/JobOrchestrator.java index d207763a4f28f..8c572cd97bf6b 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/JobOrchestrator.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/JobOrchestrator.java @@ -6,10 +6,15 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.scheduler.models.JobRunConfig; +import io.airbyte.workers.process.AsyncOrchestratorPodProcess; +import io.airbyte.workers.process.KubePodInfo; +import io.airbyte.workers.process.KubePodProcess; import io.airbyte.workers.temporal.sync.OrchestratorConstants; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Map; +import java.util.Optional; /** * The job orchestrator helps abstract over container launcher application differences across @@ -27,19 +32,46 @@ public interface JobOrchestrator { // reads input from a file that was copied to the container launcher default INPUT readInput() throws IOException { - return readAndDeserializeFile(OrchestratorConstants.INIT_FILE_INPUT, getInputClass()); + return readAndDeserializeFile(Path.of(KubePodProcess.CONFIG_DIR, OrchestratorConstants.INIT_FILE_INPUT), getInputClass()); } - // reads the job run config from a file that was copied to the container launcher - default JobRunConfig readJobRunConfig() throws IOException { - return readAndDeserializeFile(OrchestratorConstants.INIT_FILE_JOB_RUN_CONFIG, JobRunConfig.class); + /** + * reads the application name from a file that was copied to the container launcher + */ + static String readApplicationName() throws IOException { + return Files.readString(Path.of(KubePodProcess.CONFIG_DIR, OrchestratorConstants.INIT_FILE_APPLICATION)); } - // the unique logic that belongs to each type of job belongs here - void runJob() throws Exception; + /** + * reads the environment variable map from a file that was copied to the container launcher + */ + static Map readEnvMap() throws IOException { + return (Map) readAndDeserializeFile(Path.of(KubePodProcess.CONFIG_DIR, OrchestratorConstants.INIT_FILE_ENV_MAP), Map.class); + } + + /** + * reads the job run config from a file that was copied to the container launcher + */ + static JobRunConfig readJobRunConfig() throws IOException { + return readAndDeserializeFile(Path.of(KubePodProcess.CONFIG_DIR, OrchestratorConstants.INIT_FILE_JOB_RUN_CONFIG), JobRunConfig.class); + } + + /** + * reads the kube pod info from a file that was copied to the container launcher + */ + static KubePodInfo readKubePodInfo() throws IOException { + return readAndDeserializeFile(Path.of(KubePodProcess.CONFIG_DIR, AsyncOrchestratorPodProcess.KUBE_POD_INFO), KubePodInfo.class); + } + + /** + * Contains the unique logic that belongs to each type of job. + * + * @return an optional output value to place within the output document store item. + */ + Optional runJob() throws Exception; - static T readAndDeserializeFile(String path, Class type) throws IOException { - return Jsons.deserialize(Files.readString(Path.of(path)), type); + static T readAndDeserializeFile(Path path, Class type) throws IOException { + return Jsons.deserialize(Files.readString(path), type); } } diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NoOpOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NoOpOrchestrator.java new file mode 100644 index 0000000000000..77181e8176440 --- /dev/null +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NoOpOrchestrator.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.container_orchestrator; + +import io.airbyte.workers.process.AsyncOrchestratorPodProcess; +import java.util.Optional; +import lombok.extern.slf4j.Slf4j; + +/** + * For testing only. + */ +@Slf4j +public class NoOpOrchestrator implements JobOrchestrator { + + @Override + public String getOrchestratorName() { + return AsyncOrchestratorPodProcess.NO_OP; + } + + @Override + public Class getInputClass() { + return String.class; + } + + @Override + public Optional runJob() throws Exception { + log.info("Running no-op job."); + return Optional.empty(); + } + +} diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java index 6d8251e28947f..0462be271ca81 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/NormalizationJobOrchestrator.java @@ -13,9 +13,11 @@ import io.airbyte.workers.WorkerConfigs; import io.airbyte.workers.WorkerUtils; import io.airbyte.workers.normalization.NormalizationRunnerFactory; +import io.airbyte.workers.process.KubePodProcess; import io.airbyte.workers.process.ProcessFactory; import io.airbyte.workers.temporal.sync.ReplicationLauncherWorker; import java.nio.file.Path; +import java.util.Optional; import lombok.extern.slf4j.Slf4j; @Slf4j @@ -42,12 +44,13 @@ public Class getInputClass() { } @Override - public void runJob() throws Exception { - final JobRunConfig jobRunConfig = readJobRunConfig(); + public Optional runJob() throws Exception { + final JobRunConfig jobRunConfig = JobOrchestrator.readJobRunConfig(); final NormalizationInput normalizationInput = readInput(); final IntegrationLauncherConfig destinationLauncherConfig = JobOrchestrator.readAndDeserializeFile( - ReplicationLauncherWorker.INIT_FILE_DESTINATION_LAUNCHER_CONFIG, IntegrationLauncherConfig.class); + Path.of(KubePodProcess.CONFIG_DIR, ReplicationLauncherWorker.INIT_FILE_DESTINATION_LAUNCHER_CONFIG), + IntegrationLauncherConfig.class); log.info("Setting up normalization worker..."); final NormalizationWorker normalizationWorker = new DefaultNormalizationWorker( @@ -64,6 +67,7 @@ public void runJob() throws Exception { final Path jobRoot = WorkerUtils.getJobRoot(configs.getWorkspaceRoot(), jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); normalizationWorker.run(normalizationInput, jobRoot); + return Optional.empty(); } } diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java index cbcc4338c0562..f7f515ce4d180 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ReplicationJobOrchestrator.java @@ -17,6 +17,7 @@ import io.airbyte.workers.WorkerUtils; import io.airbyte.workers.process.AirbyteIntegrationLauncher; import io.airbyte.workers.process.IntegrationLauncher; +import io.airbyte.workers.process.KubePodProcess; import io.airbyte.workers.process.ProcessFactory; import io.airbyte.workers.protocols.airbyte.AirbyteMessageTracker; import io.airbyte.workers.protocols.airbyte.AirbyteSource; @@ -26,6 +27,7 @@ import io.airbyte.workers.protocols.airbyte.NamespacingMapper; import io.airbyte.workers.temporal.sync.ReplicationLauncherWorker; import java.nio.file.Path; +import java.util.Optional; import lombok.extern.slf4j.Slf4j; @Slf4j @@ -52,15 +54,17 @@ public Class getInputClass() { } @Override - public void runJob() throws Exception { - final JobRunConfig jobRunConfig = readJobRunConfig(); + public Optional runJob() throws Exception { + final JobRunConfig jobRunConfig = JobOrchestrator.readJobRunConfig(); final StandardSyncInput syncInput = readInput(); final IntegrationLauncherConfig sourceLauncherConfig = JobOrchestrator.readAndDeserializeFile( - ReplicationLauncherWorker.INIT_FILE_SOURCE_LAUNCHER_CONFIG, IntegrationLauncherConfig.class); + Path.of(KubePodProcess.CONFIG_DIR, ReplicationLauncherWorker.INIT_FILE_SOURCE_LAUNCHER_CONFIG), + IntegrationLauncherConfig.class); final IntegrationLauncherConfig destinationLauncherConfig = JobOrchestrator.readAndDeserializeFile( - ReplicationLauncherWorker.INIT_FILE_DESTINATION_LAUNCHER_CONFIG, IntegrationLauncherConfig.class); + Path.of(KubePodProcess.CONFIG_DIR, ReplicationLauncherWorker.INIT_FILE_DESTINATION_LAUNCHER_CONFIG), + IntegrationLauncherConfig.class); log.info("Setting up source launcher..."); final IntegrationLauncher sourceLauncher = new AirbyteIntegrationLauncher( @@ -97,10 +101,8 @@ public void runJob() throws Exception { final Path jobRoot = WorkerUtils.getJobRoot(configs.getWorkspaceRoot(), jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); final ReplicationOutput replicationOutput = replicationWorker.run(syncInput, jobRoot); - log.info("Sending output..."); - // this uses stdout directly because it shouldn't have the logging related prefix - // the replication output is read from the container that launched the runner - System.out.println(Jsons.serialize(replicationOutput)); + log.info("Returning output..."); + return Optional.of(Jsons.serialize(replicationOutput)); } } diff --git a/airbyte-container-orchestrator/src/test/java/io/airbyte/container_orchestrator/DefaultAsyncStateManagerTest.java b/airbyte-container-orchestrator/src/test/java/io/airbyte/container_orchestrator/DefaultAsyncStateManagerTest.java new file mode 100644 index 0000000000000..c543d3d6a8238 --- /dev/null +++ b/airbyte-container-orchestrator/src/test/java/io/airbyte/container_orchestrator/DefaultAsyncStateManagerTest.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.container_orchestrator; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.*; +import static org.mockito.Mockito.when; + +import io.airbyte.workers.process.AsyncKubePodStatus; +import io.airbyte.workers.process.KubePodInfo; +import io.airbyte.workers.storage.DocumentStoreClient; +import java.util.Optional; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class DefaultAsyncStateManagerTest { + + private static final KubePodInfo KUBE_POD_INFO = new KubePodInfo("default", "pod1"); + + private DocumentStoreClient documentStore; + private AsyncStateManager stateManager; + + @BeforeEach + void setup() { + documentStore = mock(DocumentStoreClient.class); + stateManager = new DefaultAsyncStateManager(documentStore); + } + + @Test + void testEmptyWrite() { + stateManager.write(KUBE_POD_INFO, AsyncKubePodStatus.INITIALIZING); + + // test for overwrite (which should be allowed) + stateManager.write(KUBE_POD_INFO, AsyncKubePodStatus.INITIALIZING); + + final var key = getKey(AsyncKubePodStatus.INITIALIZING); + verify(documentStore, times(2)).write(key, ""); + } + + @Test + void testContentfulWrite() { + stateManager.write(KUBE_POD_INFO, AsyncKubePodStatus.SUCCEEDED, "some output value"); + + final var key = getKey(AsyncKubePodStatus.SUCCEEDED); + verify(documentStore, times(1)).write(key, "some output value"); + } + + @Test + void testReadingOutputWhenItExists() { + final var key = getKey(AsyncKubePodStatus.SUCCEEDED); + when(documentStore.read(key)).thenReturn(Optional.of("some output value")); + assertEquals("some output value", stateManager.getOutput(KUBE_POD_INFO)); + } + + @Test + void testReadingOutputWhenItDoesNotExist() { + // getting the output should throw an exception when there is no record in the document store + assertThrows(IllegalArgumentException.class, () -> { + stateManager.getOutput(KUBE_POD_INFO); + }); + } + + @Test + void testSuccessfulStatusRetrievalLifecycle() { + when(documentStore.read(getKey(AsyncKubePodStatus.INITIALIZING))).thenReturn(Optional.empty()); + final var beforeInitializingStatus = stateManager.getStatus(KUBE_POD_INFO); + assertEquals(AsyncKubePodStatus.NOT_STARTED, beforeInitializingStatus); + + when(documentStore.read(getKey(AsyncKubePodStatus.INITIALIZING))).thenReturn(Optional.of("")); + final var initializingStatus = stateManager.getStatus(KUBE_POD_INFO); + assertEquals(AsyncKubePodStatus.INITIALIZING, initializingStatus); + + when(documentStore.read(getKey(AsyncKubePodStatus.RUNNING))).thenReturn(Optional.of("")); + final var runningStatus = stateManager.getStatus(KUBE_POD_INFO); + assertEquals(AsyncKubePodStatus.RUNNING, runningStatus); + + when(documentStore.read(getKey(AsyncKubePodStatus.SUCCEEDED))).thenReturn(Optional.of("output")); + final var succeededStatus = stateManager.getStatus(KUBE_POD_INFO); + assertEquals(AsyncKubePodStatus.SUCCEEDED, succeededStatus); + } + + @Test + void testFailureStatusRetrievalLifecycle() { + when(documentStore.read(getKey(AsyncKubePodStatus.INITIALIZING))).thenReturn(Optional.empty()); + final var beforeInitializingStatus = stateManager.getStatus(KUBE_POD_INFO); + assertEquals(AsyncKubePodStatus.NOT_STARTED, beforeInitializingStatus); + + when(documentStore.read(getKey(AsyncKubePodStatus.INITIALIZING))).thenReturn(Optional.of("")); + final var initializingStatus = stateManager.getStatus(KUBE_POD_INFO); + assertEquals(AsyncKubePodStatus.INITIALIZING, initializingStatus); + + when(documentStore.read(getKey(AsyncKubePodStatus.RUNNING))).thenReturn(Optional.of("")); + final var runningStatus = stateManager.getStatus(KUBE_POD_INFO); + assertEquals(AsyncKubePodStatus.RUNNING, runningStatus); + + when(documentStore.read(getKey(AsyncKubePodStatus.FAILED))).thenReturn(Optional.of("output")); + final var failedStatus = stateManager.getStatus(KUBE_POD_INFO); + assertEquals(AsyncKubePodStatus.FAILED, failedStatus); + } + + private static String getKey(final AsyncKubePodStatus status) { + return DefaultAsyncStateManager.getDocumentStoreKey(KUBE_POD_INFO, status); + } + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java index 5bf166662218f..bc9e89b01e69b 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java @@ -39,6 +39,8 @@ import io.airbyte.workers.process.KubeProcessFactory; import io.airbyte.workers.process.ProcessFactory; import io.airbyte.workers.process.WorkerHeartbeatServer; +import io.airbyte.workers.storage.DocumentStoreClient; +import io.airbyte.workers.storage.StateClients; import io.airbyte.workers.temporal.TemporalClient; import io.airbyte.workers.temporal.TemporalJobType; import io.airbyte.workers.temporal.TemporalUtils; @@ -84,9 +86,12 @@ public class WorkerApp { private static final Logger LOGGER = LoggerFactory.getLogger(WorkerApp.class); public static final int KUBE_HEARTBEAT_PORT = 9000; + // IMPORTANT: Changing the storage location will orphan already existing kube pods when the new + // version is deployed! + public static final Path STATE_STORAGE_PREFIX = Path.of("/state"); + private final Path workspaceRoot; private final ProcessFactory jobProcessFactory; - private final ProcessFactory orchestratorProcessFactory; private final SecretsHydrator secretsHydrator; private final WorkflowServiceStubs temporalService; private final ConfigRepository configRepository; @@ -103,7 +108,7 @@ public class WorkerApp { private final TemporalWorkerRunFactory temporalWorkerRunFactory; private final Configs configs; private final ConnectionHelper connectionHelper; - private final boolean containerOrchestratorEnabled; + private final Optional containerOrchestratorConfig; private final JobNotifier jobNotifier; private final JobTracker jobTracker; @@ -147,10 +152,9 @@ public void start() { final NormalizationActivityImpl normalizationActivity = new NormalizationActivityImpl( - containerOrchestratorEnabled, + containerOrchestratorConfig, workerConfigs, jobProcessFactory, - orchestratorProcessFactory, secretsHydrator, workspaceRoot, workerEnvironment, @@ -161,10 +165,9 @@ public void start() { airbyteVersion); final DbtTransformationActivityImpl dbtTransformationActivity = new DbtTransformationActivityImpl( - containerOrchestratorEnabled, + containerOrchestratorConfig, workerConfigs, jobProcessFactory, - orchestratorProcessFactory, secretsHydrator, workspaceRoot, workerEnvironment, @@ -177,10 +180,9 @@ public void start() { final PersistStateActivityImpl persistStateActivity = new PersistStateActivityImpl(workspaceRoot, configRepository); final Worker syncWorker = factory.newWorker(TemporalJobType.SYNC.name(), getWorkerOptions(maxWorkers.getMaxSyncWorkers())); final ReplicationActivityImpl replicationActivity = getReplicationActivityImpl( - containerOrchestratorEnabled, + containerOrchestratorConfig, workerConfigs, jobProcessFactory, - orchestratorProcessFactory, secretsHydrator, workspaceRoot, workerEnvironment, @@ -226,10 +228,9 @@ public void start() { * launching or not. */ private ReplicationActivityImpl getReplicationActivityImpl( - final boolean containerOrchestratorEnabled, + final Optional containerOrchestratorConfig, final WorkerConfigs workerConfigs, final ProcessFactory jobProcessFactory, - final ProcessFactory orchestratorProcessFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, final WorkerEnvironment workerEnvironment, @@ -238,33 +239,19 @@ private ReplicationActivityImpl getReplicationActivityImpl( final String databasePassword, final String databaseUrl, final String airbyteVersion) { - if (containerOrchestratorEnabled) { - return new ReplicationActivityImpl( - containerOrchestratorEnabled, - workerConfigs, - orchestratorProcessFactory, - secretsHydrator, - workspaceRoot, - workerEnvironment, - logConfigs, - databaseUser, - databasePassword, - databaseUrl, - airbyteVersion); - } else { - return new ReplicationActivityImpl( - containerOrchestratorEnabled, - workerConfigs, - jobProcessFactory, - secretsHydrator, - workspaceRoot, - workerEnvironment, - logConfigs, - databaseUser, - databasePassword, - databaseUrl, - airbyteVersion); - } + + return new ReplicationActivityImpl( + containerOrchestratorConfig, + workerConfigs, + jobProcessFactory, + secretsHydrator, + workspaceRoot, + workerEnvironment, + logConfigs, + databaseUser, + databasePassword, + databaseUrl, + airbyteVersion); } private static ProcessFactory getJobProcessFactory(final Configs configs) throws IOException { @@ -287,36 +274,34 @@ private static ProcessFactory getJobProcessFactory(final Configs configs) throws } } - private static ProcessFactory getOrchestratorProcessFactory(final Configs configs) throws IOException { - final WorkerConfigs workerConfigs = new WorkerConfigs(configs); - - if (configs.getWorkerEnvironment() == Configs.WorkerEnvironment.KUBERNETES) { - final KubernetesClient fabricClient = new DefaultKubernetesClient(); - final String localIp = InetAddress.getLocalHost().getHostAddress(); - final String kubeHeartbeatUrl = localIp + ":" + KUBE_HEARTBEAT_PORT; - LOGGER.info("Using Kubernetes namespace: {}", configs.getJobKubeNamespace()); - return new KubeProcessFactory(workerConfigs, configs.getJobKubeNamespace(), fabricClient, kubeHeartbeatUrl, true); - } else { - return new DockerProcessFactory( - workerConfigs, - configs.getWorkspaceRoot(), - configs.getWorkspaceDockerMount(), - configs.getLocalDockerMount(), - - // this needs to point at the Docker network Airbyte is running on, not the host network or job - // runner network, otherwise it can't talk with the db/minio - "airbyte_default", - - true); - } - } - private static WorkerOptions getWorkerOptions(final int max) { return WorkerOptions.newBuilder() .setMaxConcurrentActivityExecutionSize(max) .build(); } + public static record ContainerOrchestratorConfig( + String namespace, + DocumentStoreClient documentStoreClient, + KubernetesClient kubernetesClient) {} + + static Optional getContainerOrchestratorConfig(Configs configs) { + if (configs.getContainerOrchestratorEnabled()) { + final var kubernetesClient = new DefaultKubernetesClient(); + + final DocumentStoreClient documentStoreClient = StateClients.create( + configs.getStateStorageCloudConfigs(), + STATE_STORAGE_PREFIX); + + return Optional.of(new ContainerOrchestratorConfig( + configs.getJobKubeNamespace(), + documentStoreClient, + kubernetesClient)); + } else { + return Optional.empty(); + } + } + public static void main(final String[] args) throws IOException, InterruptedException { final Configs configs = new EnvConfigs(); @@ -336,7 +321,6 @@ public static void main(final String[] args) throws IOException, InterruptedExce } final ProcessFactory jobProcessFactory = getJobProcessFactory(configs); - final ProcessFactory orchestratorProcessFactory = getOrchestratorProcessFactory(configs); final WorkflowServiceStubs temporalService = TemporalUtils.createTemporalService(temporalHost); @@ -390,6 +374,8 @@ public static void main(final String[] args) throws IOException, InterruptedExce workspaceHelper, workerConfigs); + final Optional containerOrchestratorConfig = getContainerOrchestratorConfig(configs); + final JobNotifier jobNotifier = new JobNotifier( configs.getWebappUrl(), configRepository, @@ -401,7 +387,6 @@ public static void main(final String[] args) throws IOException, InterruptedExce new WorkerApp( workspaceRoot, jobProcessFactory, - orchestratorProcessFactory, secretsHydrator, temporalService, configRepository, @@ -418,7 +403,7 @@ public static void main(final String[] args) throws IOException, InterruptedExce temporalWorkerRunFactory, configs, connectionHelper, - configs.getContainerOrchestratorEnabled(), + containerOrchestratorConfig, jobNotifier, jobTracker).start(); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/AsyncKubePodStatus.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/AsyncKubePodStatus.java new file mode 100644 index 0000000000000..58a4e7de1c226 --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/AsyncKubePodStatus.java @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.process; + +public enum AsyncKubePodStatus { + NOT_STARTED, // Pod hasn't been started yet. + INITIALIZING, // On-start container started but not completed + RUNNING, // Main container posted running + FAILED, // Reported status was "failed" or pod was in Error (or other terminal state) without a reported + // status. + SUCCEEDED; // Reported status was "success" so both main and on-start succeeded. +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/AsyncOrchestratorPodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/AsyncOrchestratorPodProcess.java new file mode 100644 index 0000000000000..0e0ea440d8788 --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/AsyncOrchestratorPodProcess.java @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.process; + +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.ResourceRequirements; +import io.airbyte.workers.WorkerApp; +import io.airbyte.workers.storage.DocumentStoreClient; +import io.fabric8.kubernetes.api.model.ContainerBuilder; +import io.fabric8.kubernetes.api.model.ContainerPort; +import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.api.model.PodBuilder; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeBuilder; +import io.fabric8.kubernetes.api.model.VolumeMount; +import io.fabric8.kubernetes.api.model.VolumeMountBuilder; +import io.fabric8.kubernetes.client.KubernetesClient; +import java.io.IOException; +import java.nio.file.Path; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import lombok.extern.slf4j.Slf4j; + +/** + * This process allows creating and managing a pod outside the lifecycle of the launching + * application. Unlike {@link KubePodProcess} there is no heartbeat mechanism that requires the + * launching pod and the launched pod to co-exist for the duration of execution for the launched + * pod. + * + * Instead, this process creates the pod and interacts with a document store on cloud storage to + * understand the state of the created pod. + * + * The document store is considered to be the truth when retrieving the status for an async pod + * process. If the store isn't updated by the underlying pod, it will appear as failed. + */ +@Slf4j +public class AsyncOrchestratorPodProcess implements KubePod { + + public static final String KUBE_POD_INFO = "KUBE_POD_INFO"; + public static final String NO_OP = "NO_OP"; + + private final KubePodInfo kubePodInfo; + private final DocumentStoreClient documentStoreClient; + private final KubernetesClient kubernetesClient; + private final AtomicReference> cachedExitValue; + + public AsyncOrchestratorPodProcess( + final KubePodInfo kubePodInfo, + final DocumentStoreClient documentStoreClient, + final KubernetesClient kubernetesClient) { + this.kubePodInfo = kubePodInfo; + this.documentStoreClient = documentStoreClient; + this.kubernetesClient = kubernetesClient; + this.cachedExitValue = new AtomicReference<>(Optional.empty()); + } + + public Optional getOutput() { + final var possibleOutput = getDocument(AsyncKubePodStatus.SUCCEEDED.name()); + + if (possibleOutput.isPresent() && possibleOutput.get().isBlank()) { + return Optional.empty(); + } else { + return possibleOutput; + } + } + + private int computeExitValue() { + final AsyncKubePodStatus docStoreStatus = getDocStoreStatus(); + + // trust the doc store if it's in a terminal state + if (docStoreStatus.equals(AsyncKubePodStatus.FAILED)) { + return 1; + } else if (docStoreStatus.equals(AsyncKubePodStatus.SUCCEEDED)) { + return 0; + } + + final Pod pod = kubernetesClient.pods() + .inNamespace(getInfo().namespace()) + .withName(getInfo().name()) + .get(); + + // Since the pod creation blocks until the pod is created the first time, + // if the pod no longer exists (and we don't have a success/fail document) + // we must be in a failure state. If it wasn't able to write out its status + // we must assume failure, since the document store is the "truth" for + // async pod status. + if (pod == null) { + return 1; + } + + // If the pod does exist, it may be in a terminal (error or completed) state. + final boolean isTerminal = KubePodProcess.isTerminal(pod); + + if (isTerminal) { + // In case the doc store was updated in between when we pulled it and when + // we read the status from the Kubernetes API, we need to check the doc store again. + final AsyncKubePodStatus secondDocStoreStatus = getDocStoreStatus(); + if (secondDocStoreStatus.equals(AsyncKubePodStatus.FAILED)) { + return 1; + } else if (secondDocStoreStatus.equals(AsyncKubePodStatus.SUCCEEDED)) { + return 0; + } else { + // otherwise, the actual pod is terminal when the doc store says it shouldn't be. + return 1; + } + } + + // Otherwise, throw an exception because this is still running, which will be caught in hasExited + switch (docStoreStatus) { + case NOT_STARTED -> throw new IllegalThreadStateException("Pod hasn't started yet."); + case INITIALIZING -> throw new IllegalThreadStateException("Pod is initializing."); + default -> throw new IllegalThreadStateException("Pod is running."); + } + } + + @Override + public int exitValue() { + final var optionalCached = cachedExitValue.get(); + + if (optionalCached.isPresent()) { + return optionalCached.get(); + } else { + final var exitValue = computeExitValue(); + cachedExitValue.set(Optional.of(exitValue)); + return exitValue; + } + } + + @Override + public void destroy() { + final var wasDestroyed = kubernetesClient.pods() + .inNamespace(getInfo().namespace()) + .withName(getInfo().name()) + .delete(); + + if (wasDestroyed) { + log.info("Deleted pod {} in namespace {}", getInfo().name(), getInfo().namespace()); + } else { + log.warn("Wasn't able to delete pod {} from namespace {}", getInfo().name(), getInfo().namespace()); + } + } + + // implementation copied from Process.java since this isn't a real Process + public boolean hasExited() { + try { + exitValue(); + return true; + } catch (IllegalThreadStateException e) { + return false; + } + } + + @Override + public boolean waitFor(long timeout, TimeUnit unit) throws InterruptedException { + // implementation copied from Process.java since this isn't a real Process + long remainingNanos = unit.toNanos(timeout); + if (hasExited()) + return true; + if (timeout <= 0) + return false; + + long deadline = System.nanoTime() + remainingNanos; + do { + Thread.sleep(Math.min(TimeUnit.NANOSECONDS.toMillis(remainingNanos) + 1, 100)); + if (hasExited()) + return true; + remainingNanos = deadline - System.nanoTime(); + } while (remainingNanos > 0); + + return false; + } + + @Override + public int waitFor() throws InterruptedException { + boolean exited = waitFor(10, TimeUnit.DAYS); + + if (exited) { + return exitValue(); + } else { + throw new InterruptedException("Pod did not complete within timeout."); + } + } + + @Override + public KubePodInfo getInfo() { + return kubePodInfo; + } + + private Optional getDocument(final String key) { + return documentStoreClient.read(getInfo().namespace() + "/" + getInfo().name() + "/" + key); + } + + private boolean checkStatus(final AsyncKubePodStatus status) { + return getDocument(status.name()).isPresent(); + } + + /** + * Checks terminal states first, then running, then initialized. Defaults to not started. + * + * The order matters here! + */ + public AsyncKubePodStatus getDocStoreStatus() { + if (checkStatus(AsyncKubePodStatus.FAILED)) { + return AsyncKubePodStatus.FAILED; + } else if (checkStatus(AsyncKubePodStatus.SUCCEEDED)) { + return AsyncKubePodStatus.SUCCEEDED; + } else if (checkStatus(AsyncKubePodStatus.RUNNING)) { + return AsyncKubePodStatus.RUNNING; + } else if (checkStatus(AsyncKubePodStatus.INITIALIZING)) { + return AsyncKubePodStatus.INITIALIZING; + } else { + return AsyncKubePodStatus.NOT_STARTED; + } + } + + // but does that mean there won't be a docker equivalent? + public void create(final String airbyteVersion, + final Map allLabels, + final ResourceRequirements resourceRequirements, + final Map fileMap, + final Map portMap) { + final Volume configVolume = new VolumeBuilder() + .withName("airbyte-config") + .withNewEmptyDir() + .withMedium("Memory") + .endEmptyDir() + .build(); + + final VolumeMount configVolumeMount = new VolumeMountBuilder() + .withName("airbyte-config") + .withMountPath(KubePodProcess.CONFIG_DIR) + .build(); + + final List containerPorts = KubePodProcess.createContainerPortList(portMap); + + final var mainContainer = new ContainerBuilder() + .withName("main") + .withImage("airbyte/container-orchestrator:" + airbyteVersion) + .withResources(KubePodProcess.getResourceRequirementsBuilder(resourceRequirements).build()) + .withPorts(containerPorts) + .withPorts(new ContainerPort(WorkerApp.KUBE_HEARTBEAT_PORT, null, null, null, null)) + .withVolumeMounts(configVolumeMount) + .build(); + + final Pod pod = new PodBuilder() + .withApiVersion("v1") + .withNewMetadata() + .withName(getInfo().name()) + .withNamespace(getInfo().namespace()) + .withLabels(allLabels) + .endMetadata() + .withNewSpec() + .withServiceAccount("airbyte-admin").withAutomountServiceAccountToken(true) + .withRestartPolicy("Never") + .withContainers(mainContainer) + .withVolumes(configVolume) + .endSpec() + .build(); + + // should only create after the kubernetes API creates the pod + final var createdPod = kubernetesClient.pods().createOrReplace(pod); + + log.info("Waiting for pod to be running..."); + try { + kubernetesClient.pods() + .inNamespace(kubePodInfo.namespace()) + .withName(kubePodInfo.name()) + .waitUntilCondition(p -> { + return !p.getStatus().getContainerStatuses().isEmpty() && p.getStatus().getContainerStatuses().get(0).getState().getWaiting() == null; + }, 5, TimeUnit.MINUTES); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + + final var containerState = kubernetesClient.pods() + .inNamespace(kubePodInfo.namespace()) + .withName(kubePodInfo.name()) + .get() + .getStatus() + .getContainerStatuses() + .get(0) + .getState(); + + if (containerState.getRunning() == null) { + throw new RuntimeException("Pod was not running, state was: " + containerState); + } + + final var updatedFileMap = new HashMap<>(fileMap); + updatedFileMap.put(KUBE_POD_INFO, Jsons.serialize(kubePodInfo)); + + copyFilesToKubeConfigVolumeMain(createdPod, updatedFileMap); + } + + public static void copyFilesToKubeConfigVolumeMain(final Pod podDefinition, final Map files) { + final List> fileEntries = new ArrayList<>(files.entrySet()); + + // copy this file last to indicate that the copy has completed + fileEntries.add(new AbstractMap.SimpleEntry<>(KubePodProcess.SUCCESS_FILE_NAME, "")); + + Path tmpFile = null; + Process proc = null; + for (final Map.Entry file : fileEntries) { + try { + tmpFile = Path.of(IOs.writeFileToRandomTmpDir(file.getKey(), file.getValue())); + + log.info("Uploading file: " + file.getKey()); + final var containerPath = Path.of(KubePodProcess.CONFIG_DIR + "/" + file.getKey()); + + // using kubectl cp directly here, because both fabric and the official kube client APIs have + // several issues with copying files. See https://github.com/airbytehq/airbyte/issues/8643 for + // details. + final String command = String.format("kubectl cp %s %s/%s:%s -c %s", tmpFile, podDefinition.getMetadata().getNamespace(), + podDefinition.getMetadata().getName(), containerPath, "main"); + log.info(command); + + proc = Runtime.getRuntime().exec(command); + log.info("Waiting for kubectl cp to complete"); + final int exitCode = proc.waitFor(); + + if (exitCode != 0) { + throw new IOException("kubectl cp failed with exit code " + exitCode); + } + + log.info("kubectl cp complete, closing process"); + } catch (final IOException | InterruptedException e) { + throw new RuntimeException(e); + } finally { + if (tmpFile != null) { + tmpFile.toFile().delete(); + } + if (proc != null) { + proc.destroy(); + } + } + } + } + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePod.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePod.java new file mode 100644 index 0000000000000..2cd4640eb185f --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePod.java @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.process; + +import java.util.concurrent.TimeUnit; + +public interface KubePod { + + int exitValue(); + + void destroy(); + + boolean waitFor(final long timeout, final TimeUnit unit) throws InterruptedException; + + int waitFor() throws InterruptedException; + + KubePodInfo getInfo(); + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodInfo.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodInfo.java new file mode 100644 index 0000000000000..0a0ab5949a513 --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodInfo.java @@ -0,0 +1,7 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.process; + +public record KubePodInfo(String namespace, String name) {} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index 0274f42126e54..c0b6f83401eb8 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -90,7 +90,7 @@ */ // TODO(Davin): Better test for this. See https://github.com/airbytehq/airbyte/issues/3700. -public class KubePodProcess extends Process { +public class KubePodProcess extends Process implements KubePod { private static final Logger LOGGER = LoggerFactory.getLogger(KubePodProcess.class); @@ -105,11 +105,11 @@ public class KubePodProcess extends Process { private static final String STDIN_PIPE_FILE = PIPES_DIR + "/stdin"; private static final String STDOUT_PIPE_FILE = PIPES_DIR + "/stdout"; private static final String STDERR_PIPE_FILE = PIPES_DIR + "/stderr"; - private static final String CONFIG_DIR = "/config"; + public static final String CONFIG_DIR = "/config"; private static final String TERMINATION_DIR = "/termination"; private static final String TERMINATION_FILE_MAIN = TERMINATION_DIR + "/main"; private static final String TERMINATION_FILE_CHECK = TERMINATION_DIR + "/check"; - private static final String SUCCESS_FILE_NAME = "FINISHED_UPLOADING"; + public static final String SUCCESS_FILE_NAME = "FINISHED_UPLOADING"; // 143 is the typical SIGTERM exit code. private static final int KILLED_EXIT_CODE = 143; @@ -182,7 +182,7 @@ private static Container getMain(final String image, // communicates its completion to the heartbeat check via a file and closes itself if the heartbeat // fails - final var mainCommand = MoreResources.readResource("entrypoints/main.sh") + final var mainCommand = MoreResources.readResource("entrypoints/sync/main.sh") .replaceAll("TERMINATION_FILE_CHECK", TERMINATION_FILE_CHECK) .replaceAll("TERMINATION_FILE_MAIN", TERMINATION_FILE_MAIN) .replaceAll("OPTIONAL_STDIN", optionalStdin) @@ -191,11 +191,7 @@ private static Container getMain(final String image, .replaceAll("STDERR_PIPE_FILE", STDERR_PIPE_FILE) .replaceAll("STDOUT_PIPE_FILE", STDOUT_PIPE_FILE); - final List containerPorts = internalToExternalPorts.keySet().stream() - .map(integer -> new ContainerPortBuilder() - .withContainerPort(integer) - .build()) - .collect(Collectors.toList()); + final List containerPorts = createContainerPortList(internalToExternalPorts); final List envVars = envMap.entrySet().stream() .map(entry -> new EnvVar(entry.getKey(), entry.getValue(), null)) @@ -218,9 +214,17 @@ private static Container getMain(final String image, return containerBuilder.build(); } - private static void copyFilesToKubeConfigVolume(final KubernetesClient client, - final Pod podDefinition, - final Map files) { + public static List createContainerPortList(final Map internalToExternalPorts) { + return internalToExternalPorts.keySet().stream() + .map(integer -> new ContainerPortBuilder() + .withContainerPort(integer) + .build()) + .collect(Collectors.toList()); + } + + public static void copyFilesToKubeConfigVolume(final KubernetesClient client, + final Pod podDefinition, + final Map files) { final List> fileEntries = new ArrayList<>(files.entrySet()); // copy this file last to indicate that the copy has completed @@ -262,7 +266,11 @@ private static void copyFilesToKubeConfigVolume(final KubernetesClient client, throw new RuntimeException(e); } finally { if (tmpFile != null) { - tmpFile.toFile().delete(); + try { + tmpFile.toFile().delete(); + } catch (Exception e) { + LOGGER.info("Caught exception when deleting temp file but continuing to allow process deletion.", e); + } } if (proc != null) { proc.destroy(); @@ -429,7 +437,7 @@ public KubePodProcess(final boolean isOrchestrator, // communicates via a file if it isn't able to reach the heartbeating server and succeeds if the // main container completes - final String heartbeatCommand = MoreResources.readResource("entrypoints/check.sh") + final String heartbeatCommand = MoreResources.readResource("entrypoints/sync/check.sh") .replaceAll("TERMINATION_FILE_CHECK", TERMINATION_FILE_CHECK) .replaceAll("TERMINATION_FILE_MAIN", TERMINATION_FILE_MAIN) .replaceAll("HEARTBEAT_URL", kubeHeartbeatUrl); @@ -552,7 +560,7 @@ public InputStream getErrorStream() { public int waitFor() throws InterruptedException { final Pod refreshedPod = fabricClient.pods().inNamespace(podDefinition.getMetadata().getNamespace()).withName(podDefinition.getMetadata().getName()).get(); - fabricClient.resource(refreshedPod).waitUntilCondition(this::isTerminal, 10, TimeUnit.DAYS); + fabricClient.resource(refreshedPod).waitUntilCondition(KubePodProcess::isTerminal, 10, TimeUnit.DAYS); wasKilled.set(true); return exitValue(); } @@ -586,6 +594,11 @@ public Info info() { return new KubePodProcessInfo(podDefinition.getMetadata().getName()); } + @Override + public KubePodInfo getInfo() { + return new KubePodInfo(podDefinition.getMetadata().getNamespace(), podDefinition.getMetadata().getName()); + } + /** * Close all open resource in the opposite order of resource creation. * @@ -620,7 +633,7 @@ private void close() { LOGGER.debug("Closed {}", podDefinition.getMetadata().getName()); } - private boolean isTerminal(final Pod pod) { + public static boolean isTerminal(final Pod pod) { if (pod.getStatus() != null) { // Check if "main" container has terminated, as that defines whether the parent process has // terminated. @@ -698,7 +711,7 @@ public int exitValue() { return returnCode; } - private static ResourceRequirementsBuilder getResourceRequirementsBuilder(final ResourceRequirements resourceRequirements) { + public static ResourceRequirementsBuilder getResourceRequirementsBuilder(final ResourceRequirements resourceRequirements) { if (resourceRequirements != null) { final Map requestMap = new HashMap<>(); // if null then use unbounded resource allocation diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessFactory.java index 5323d9e2e4e78..c6b18cfce653c 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubeProcessFactory.java @@ -117,12 +117,7 @@ public Process create(final String jobId, final int stderrLocalPort = KubePortManagerSingleton.getInstance().take(); LOGGER.info("{} stderrLocalPort = {}", podName, stderrLocalPort); - final var allLabels = new HashMap<>(customLabels); - final var generalKubeLabels = Map.of( - JOB_LABEL_KEY, jobId, - ATTEMPT_LABEL_KEY, String.valueOf(attempt), - WORKER_POD_LABEL_KEY, WORKER_POD_LABEL_VALUE); - allLabels.putAll(generalKubeLabels); + final var allLabels = getLabels(jobId, attempt, customLabels); return new KubePodProcess( isOrchestrator, @@ -155,6 +150,19 @@ public Process create(final String jobId, } } + public static Map getLabels(final String jobId, final int attemptId, final Map customLabels) { + final var allLabels = new HashMap<>(customLabels); + + final var generalKubeLabels = Map.of( + JOB_LABEL_KEY, jobId, + ATTEMPT_LABEL_KEY, String.valueOf(attemptId), + WORKER_POD_LABEL_KEY, WORKER_POD_LABEL_VALUE); + + allLabels.putAll(generalKubeLabels); + + return allLabels; + } + /** * Docker image names are by convention separated by slashes. The last portion is the image's name. * This is followed by a colon and a version number. e.g. airbyte/scheduler:v1 or diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/storage/StateClients.java b/airbyte-workers/src/main/java/io/airbyte/workers/storage/StateClients.java new file mode 100644 index 0000000000000..ce4be0d5e36ab --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/storage/StateClients.java @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.storage; + +import io.airbyte.config.storage.CloudStorageConfigs; +import java.nio.file.Path; + +public class StateClients { + + public static DocumentStoreClient create(final CloudStorageConfigs cloudStorageConfigs, final Path prefix) { + DocumentStoreClient documentStoreClient = null; + + switch (cloudStorageConfigs.getType()) { + case S3 -> { + documentStoreClient = S3DocumentStoreClient.s3(cloudStorageConfigs.getS3Config(), prefix); + } + case MINIO -> { + documentStoreClient = S3DocumentStoreClient.minio(cloudStorageConfigs.getMinioConfig(), prefix); + } + case GCS -> { + documentStoreClient = GcsDocumentStoreClient.create(cloudStorageConfigs.getGcsConfig(), prefix); + } + } + + return documentStoreClient; + } + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/storage/WorkerStore.java b/airbyte-workers/src/main/java/io/airbyte/workers/storage/WorkerStore.java deleted file mode 100644 index 2da164ec40086..0000000000000 --- a/airbyte-workers/src/main/java/io/airbyte/workers/storage/WorkerStore.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.workers.storage; - -import com.fasterxml.jackson.databind.JsonNode; -import io.airbyte.commons.json.Jsons; -import java.util.Optional; -import java.util.UUID; - -/** - * JSON layer over { @link CloudDocumentStore }. - */ -public class WorkerStore { - - private final DocumentStoreClient documentStoreClient; - - public WorkerStore(final DocumentStoreClient documentStoreClient) { - this.documentStoreClient = documentStoreClient; - } - - /** - * Set the document for an id. Overwrites existing document, if present. - * - * @param id - id to associate document with - * @param document - document to persist - */ - void set(final UUID id, final JsonNode document) { - documentStoreClient.write(id.toString(), Jsons.serialize(document)); - } - - /** - * Fetch previously persisted document. - * - * @param id - id that the document is associated with - * @return returns document if present, otherwise empty - */ - Optional get(final UUID id) { - return documentStoreClient.read(id.toString()).map(Jsons::deserialize); - } - - /** - * Delete persisted document. - * - * @param id - id that the document is associated with - * @return true if actually deletes something, otherwise false. (e.g. false if document doest not - * exist). - */ - boolean delete(final UUID id) { - return documentStoreClient.delete(id.toString()); - } - -} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtLauncherWorker.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtLauncherWorker.java index 4a3eb9eb9efb7..46b73467a6d63 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtLauncherWorker.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtLauncherWorker.java @@ -4,134 +4,35 @@ package io.airbyte.workers.temporal.sync; -import io.airbyte.commons.io.LineGobbler; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.logging.LoggingHelper; -import io.airbyte.commons.logging.MdcScope; import io.airbyte.config.OperatorDbtInput; import io.airbyte.scheduler.models.IntegrationLauncherConfig; import io.airbyte.scheduler.models.JobRunConfig; -import io.airbyte.workers.Worker; import io.airbyte.workers.WorkerApp; import io.airbyte.workers.WorkerConfigs; -import io.airbyte.workers.WorkerException; -import io.airbyte.workers.WorkerUtils; -import io.airbyte.workers.process.KubeProcessFactory; -import io.airbyte.workers.process.ProcessFactory; -import java.nio.file.Path; import java.util.Map; -import java.util.UUID; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -// todo: DRY the launchers -public class DbtLauncherWorker implements Worker { +public class DbtLauncherWorker extends LauncherWorker { - private static final Logger LOGGER = LoggerFactory.getLogger(DbtLauncherWorker.class); - - private static final MdcScope.Builder LOG_MDC_BUILDER = new MdcScope.Builder() - .setLogPrefix("dbt-orchestrator") - .setPrefixColor(LoggingHelper.Color.CYAN_BACKGROUND); - - public static final String DBT = "dbt"; + public static final String DBT = "dbt-orchestrator"; + private static final String POD_NAME_PREFIX = "orchestrator-dbt"; public static final String INIT_FILE_DESTINATION_LAUNCHER_CONFIG = "destinationLauncherConfig.json"; - private final WorkerConfigs workerConfigs; - private final ProcessFactory processFactory; - private final String airbyteVersion; - private final Path workspaceRoot; - private final IntegrationLauncherConfig destinationLauncherConfig; - private final JobRunConfig jobRunConfig; - - private final AtomicBoolean cancelled = new AtomicBoolean(false); - - private Process process; - - public DbtLauncherWorker( - final Path workspaceRoot, - final IntegrationLauncherConfig destinationLauncherConfig, + public DbtLauncherWorker(final IntegrationLauncherConfig destinationLauncherConfig, final JobRunConfig jobRunConfig, final WorkerConfigs workerConfigs, - final ProcessFactory processFactory, + final WorkerApp.ContainerOrchestratorConfig containerOrchestratorConfig, final String airbyteVersion) { - this.workspaceRoot = workspaceRoot; - this.destinationLauncherConfig = destinationLauncherConfig; - this.jobRunConfig = jobRunConfig; - this.workerConfigs = workerConfigs; - this.processFactory = processFactory; - this.airbyteVersion = airbyteVersion; - } - - @Override - public Void run(OperatorDbtInput operatorDbtInput, Path jobRoot) throws WorkerException { - try { - final Path jobPath = WorkerUtils.getJobRoot(workspaceRoot, jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); - - // we want to filter down to remove secrets, so we aren't writing over a bunch of unnecessary - // secrets - final Map envMap = System.getenv().entrySet().stream() - .filter(entry -> OrchestratorConstants.ENV_VARS_TO_TRANSFER.contains(entry.getKey())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - - final Map fileMap = Map.of( - OrchestratorConstants.INIT_FILE_APPLICATION, DBT, - OrchestratorConstants.INIT_FILE_JOB_RUN_CONFIG, Jsons.serialize(jobRunConfig), - OrchestratorConstants.INIT_FILE_INPUT, Jsons.serialize(operatorDbtInput), - OrchestratorConstants.INIT_FILE_ENV_MAP, Jsons.serialize(envMap), - INIT_FILE_DESTINATION_LAUNCHER_CONFIG, Jsons.serialize(destinationLauncherConfig)); - - process = processFactory.create( - "runner-" + UUID.randomUUID().toString().substring(0, 10), - 0, - jobPath, - "airbyte/container-orchestrator:" + airbyteVersion, - false, - fileMap, - null, - workerConfigs.getResourceRequirements(), - Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.SYNC_RUNNER), - Map.of( - WorkerApp.KUBE_HEARTBEAT_PORT, WorkerApp.KUBE_HEARTBEAT_PORT, - OrchestratorConstants.PORT1, OrchestratorConstants.PORT1, - OrchestratorConstants.PORT2, OrchestratorConstants.PORT2, - OrchestratorConstants.PORT3, OrchestratorConstants.PORT3, - OrchestratorConstants.PORT4, OrchestratorConstants.PORT4)); - - LineGobbler.gobble(process.getInputStream(), LOGGER::info, LOG_MDC_BUILDER); - LineGobbler.gobble(process.getErrorStream(), LOGGER::error, LOG_MDC_BUILDER); - - WorkerUtils.wait(process); - - if (process.exitValue() != 0) { - throw new WorkerException("Non-zero exit code!"); - } - } catch (Exception e) { - if (cancelled.get()) { - throw new WorkerException("Sync was cancelled.", e); - } else { - throw new WorkerException("Running the sync attempt failed", e); - } - } - - return null; - } - - @Override - public void cancel() { - cancelled.set(true); - - if (process == null) { - return; - } - - LOGGER.debug("Closing dbt launcher process"); - WorkerUtils.gentleClose(workerConfigs, process, 1, TimeUnit.MINUTES); - if (process.isAlive() || process.exitValue() != 0) { - LOGGER.error("Dbt launcher process wasn't successful"); - } + super( + DBT, + POD_NAME_PREFIX, + jobRunConfig, + Map.of( + INIT_FILE_DESTINATION_LAUNCHER_CONFIG, Jsons.serialize(destinationLauncherConfig)), + containerOrchestratorConfig, + airbyteVersion, + workerConfigs.getResourceRequirements(), + Void.class); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java index 404eaef204125..425a915c69bb9 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/DbtTransformationActivityImpl.java @@ -18,20 +18,20 @@ import io.airbyte.workers.DbtTransformationRunner; import io.airbyte.workers.DbtTransformationWorker; import io.airbyte.workers.Worker; +import io.airbyte.workers.WorkerApp; import io.airbyte.workers.WorkerConfigs; import io.airbyte.workers.normalization.NormalizationRunnerFactory; import io.airbyte.workers.process.ProcessFactory; import io.airbyte.workers.temporal.CancellationHandler; import io.airbyte.workers.temporal.TemporalAttemptExecution; import java.nio.file.Path; +import java.util.Optional; import java.util.function.Supplier; public class DbtTransformationActivityImpl implements DbtTransformationActivity { - private final boolean containerOrchestratorEnabled; private final WorkerConfigs workerConfigs; private final ProcessFactory jobProcessFactory; - private final ProcessFactory orchestratorProcessFactory; private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; private final AirbyteConfigValidator validator; @@ -41,11 +41,11 @@ public class DbtTransformationActivityImpl implements DbtTransformationActivity private final String databasePassword; private final String databaseUrl; private final String airbyteVersion; + private final Optional containerOrchestratorConfig; - public DbtTransformationActivityImpl(final boolean containerOrchestratorEnabled, + public DbtTransformationActivityImpl(final Optional containerOrchestratorConfig, final WorkerConfigs workerConfigs, final ProcessFactory jobProcessFactory, - final ProcessFactory orchestratorProcessFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, final WorkerEnvironment workerEnvironment, @@ -54,10 +54,9 @@ public DbtTransformationActivityImpl(final boolean containerOrchestratorEnabled, final String databasePassword, final String databaseUrl, final String airbyteVersion) { - this.containerOrchestratorEnabled = containerOrchestratorEnabled; + this.containerOrchestratorConfig = containerOrchestratorConfig; this.workerConfigs = workerConfigs; this.jobProcessFactory = jobProcessFactory; - this.orchestratorProcessFactory = orchestratorProcessFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; this.validator = new AirbyteConfigValidator(); @@ -85,7 +84,7 @@ public Void run(final JobRunConfig jobRunConfig, final CheckedSupplier, Exception> workerFactory; - if (containerOrchestratorEnabled) { + if (containerOrchestratorConfig.isPresent()) { workerFactory = getContainerLauncherWorkerFactory(workerConfigs, destinationLauncherConfig, jobRunConfig); } else { workerFactory = getLegacyWorkerFactory(destinationLauncherConfig, jobRunConfig, resourceRequirements); @@ -122,11 +121,10 @@ private CheckedSupplier, Exception> getContainerL final IntegrationLauncherConfig destinationLauncherConfig, final JobRunConfig jobRunConfig) { return () -> new DbtLauncherWorker( - workspaceRoot, destinationLauncherConfig, jobRunConfig, workerConfigs, - orchestratorProcessFactory, + containerOrchestratorConfig.get(), airbyteVersion); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/LauncherWorker.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/LauncherWorker.java new file mode 100644 index 0000000000000..7481a5ae5b854 --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/LauncherWorker.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.temporal.sync; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.ResourceRequirements; +import io.airbyte.scheduler.models.JobRunConfig; +import io.airbyte.workers.Worker; +import io.airbyte.workers.WorkerApp; +import io.airbyte.workers.WorkerException; +import io.airbyte.workers.process.AsyncKubePodStatus; +import io.airbyte.workers.process.AsyncOrchestratorPodProcess; +import io.airbyte.workers.process.KubePodInfo; +import io.airbyte.workers.process.KubeProcessFactory; +import java.nio.file.Path; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; + +/** + * Coordinates configuring and managing the state of an async process. This is tied to the (job_id, + * attempt_id) and will attempt to kill off lower attempt ids. + * + * @param a json-serializable input class for the worker + * @param either {@link Void} or a json-serializable output class for the worker + */ +@Slf4j +public class LauncherWorker implements Worker { + + private final String application; + private final String podNamePrefix; + private final JobRunConfig jobRunConfig; + private final Map additionalFileMap; + private final WorkerApp.ContainerOrchestratorConfig containerOrchestratorConfig; + private final String airbyteVersion; + private final ResourceRequirements resourceRequirements; + private final Class outputClass; + + private final AtomicBoolean cancelled = new AtomicBoolean(false); + private AsyncOrchestratorPodProcess process; + + public LauncherWorker( + final String application, + final String podNamePrefix, + final JobRunConfig jobRunConfig, + final Map additionalFileMap, + final WorkerApp.ContainerOrchestratorConfig containerOrchestratorConfig, + final String airbyteVersion, + final ResourceRequirements resourceRequirements, + final Class outputClass) { + this.application = application; + this.podNamePrefix = podNamePrefix; + this.jobRunConfig = jobRunConfig; + this.additionalFileMap = additionalFileMap; + this.containerOrchestratorConfig = containerOrchestratorConfig; + this.airbyteVersion = airbyteVersion; + this.resourceRequirements = resourceRequirements; + this.outputClass = outputClass; + } + + @Override + public OUTPUT run(INPUT input, Path jobRoot) throws WorkerException { + try { + final Map envMap = System.getenv().entrySet().stream() + .filter(entry -> OrchestratorConstants.ENV_VARS_TO_TRANSFER.contains(entry.getKey())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + final Map fileMap = new HashMap<>(additionalFileMap); + fileMap.putAll(Map.of( + OrchestratorConstants.INIT_FILE_APPLICATION, application, + OrchestratorConstants.INIT_FILE_JOB_RUN_CONFIG, Jsons.serialize(jobRunConfig), + OrchestratorConstants.INIT_FILE_INPUT, Jsons.serialize(input), + OrchestratorConstants.INIT_FILE_ENV_MAP, Jsons.serialize(envMap))); + + final Map portMap = Map.of( + WorkerApp.KUBE_HEARTBEAT_PORT, WorkerApp.KUBE_HEARTBEAT_PORT, + OrchestratorConstants.PORT1, OrchestratorConstants.PORT1, + OrchestratorConstants.PORT2, OrchestratorConstants.PORT2, + OrchestratorConstants.PORT3, OrchestratorConstants.PORT3, + OrchestratorConstants.PORT4, OrchestratorConstants.PORT4); + + final var allLabels = KubeProcessFactory.getLabels( + jobRunConfig.getJobId(), + Math.toIntExact(jobRunConfig.getAttemptId()), + Collections.emptyMap()); + + final var podNameAndJobPrefix = podNamePrefix + "-j-" + jobRunConfig.getJobId() + "-a-"; + killLowerAttemptIdsIfPresent(podNameAndJobPrefix, jobRunConfig.getAttemptId()); + + final var podName = podNameAndJobPrefix + jobRunConfig.getAttemptId(); + final var kubePodInfo = new KubePodInfo(containerOrchestratorConfig.namespace(), podName); + + process = new AsyncOrchestratorPodProcess( + kubePodInfo, + containerOrchestratorConfig.documentStoreClient(), + containerOrchestratorConfig.kubernetesClient()); + + if (process.getDocStoreStatus().equals(AsyncKubePodStatus.NOT_STARTED)) { + process.create( + airbyteVersion, + allLabels, + resourceRequirements, + fileMap, + portMap); + } + + // this waitFor can resume if the activity is re-run + process.waitFor(); + + if (process.exitValue() != 0) { + throw new WorkerException("Non-zero exit code!"); + } + + final var output = process.getOutput(); + + if (output.isPresent()) { + return Jsons.deserialize(output.get(), outputClass); + } else { + throw new WorkerException("Running the " + application + " launcher resulted in no readable output!"); + } + } catch (Exception e) { + if (cancelled.get()) { + throw new WorkerException("Launcher " + application + " was cancelled.", e); + } else { + throw new WorkerException("Running the launcher " + application + " failed", e); + } + } + } + + /** + * If the sync workflow has advanced to the next attempt, we don't want to leave a zombie of the + * older job running (if it exists). In order to ensure a consistent state, we should kill the older + * versions. + */ + private void killLowerAttemptIdsIfPresent(final String podNameAndJobPrefix, final long currentAttempt) { + for (long previousAttempt = currentAttempt - 1; previousAttempt >= 0; previousAttempt--) { + final var podName = podNameAndJobPrefix + previousAttempt; + final var kubePodInfo = new KubePodInfo(containerOrchestratorConfig.namespace(), podName); + final var oldProcess = new AsyncOrchestratorPodProcess( + kubePodInfo, + containerOrchestratorConfig.documentStoreClient(), + containerOrchestratorConfig.kubernetesClient()); + + try { + oldProcess.destroy(); + log.info("Found and destroyed a previous attempt: " + previousAttempt); + } catch (Exception e) { + log.warn("Wasn't able to find and destroy a previous attempt: " + previousAttempt); + } + } + } + + @Override + public void cancel() { + cancelled.set(true); + + if (process == null) { + return; + } + + log.debug("Closing sync runner process"); + process.destroy(); + + if (process.hasExited()) { + log.info("Successfully cancelled process."); + } else { + // try again + process.destroy(); + + if (process.hasExited()) { + log.info("Successfully cancelled process."); + } else { + log.error("Unable to cancel process"); + } + } + } + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java index ee01e9075e676..eb82b2c730eb8 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java @@ -16,20 +16,20 @@ import io.airbyte.scheduler.models.JobRunConfig; import io.airbyte.workers.DefaultNormalizationWorker; import io.airbyte.workers.Worker; +import io.airbyte.workers.WorkerApp; import io.airbyte.workers.WorkerConfigs; import io.airbyte.workers.normalization.NormalizationRunnerFactory; import io.airbyte.workers.process.ProcessFactory; import io.airbyte.workers.temporal.CancellationHandler; import io.airbyte.workers.temporal.TemporalAttemptExecution; import java.nio.file.Path; +import java.util.Optional; import java.util.function.Supplier; public class NormalizationActivityImpl implements NormalizationActivity { - private final boolean containerOrchestratorEnabled; private final WorkerConfigs workerConfigs; private final ProcessFactory jobProcessFactory; - private final ProcessFactory orchestratorProcessFactory; private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; private final AirbyteConfigValidator validator; @@ -39,11 +39,11 @@ public class NormalizationActivityImpl implements NormalizationActivity { private final String databasePassword; private final String databaseUrl; private final String airbyteVersion; + private final Optional containerOrchestratorConfig; - public NormalizationActivityImpl(final boolean containerOrchestratorEnabled, + public NormalizationActivityImpl(final Optional containerOrchestratorConfig, final WorkerConfigs workerConfigs, final ProcessFactory jobProcessFactory, - final ProcessFactory orchestratorProcessFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, final WorkerEnvironment workerEnvironment, @@ -52,10 +52,9 @@ public NormalizationActivityImpl(final boolean containerOrchestratorEnabled, final String databasePassword, final String databaseUrl, final String airbyteVersion) { - this.containerOrchestratorEnabled = containerOrchestratorEnabled; + this.containerOrchestratorConfig = containerOrchestratorConfig; this.workerConfigs = workerConfigs; this.jobProcessFactory = jobProcessFactory; - this.orchestratorProcessFactory = orchestratorProcessFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; this.validator = new AirbyteConfigValidator(); @@ -82,7 +81,7 @@ public Void normalize(final JobRunConfig jobRunConfig, final CheckedSupplier, Exception> workerFactory; - if (containerOrchestratorEnabled) { + if (containerOrchestratorConfig.isPresent()) { workerFactory = getContainerLauncherWorkerFactory(workerConfigs, destinationLauncherConfig, jobRunConfig); } else { workerFactory = getLegacyWorkerFactory(workerConfigs, destinationLauncherConfig, jobRunConfig); @@ -118,11 +117,10 @@ private CheckedSupplier, Exception> getContaine final IntegrationLauncherConfig destinationLauncherConfig, final JobRunConfig jobRunConfig) { return () -> new NormalizationLauncherWorker( - workspaceRoot, destinationLauncherConfig, jobRunConfig, workerConfigs, - orchestratorProcessFactory, + containerOrchestratorConfig.get(), airbyteVersion); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationLauncherWorker.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationLauncherWorker.java index ade742b135ad6..d353f46c52af1 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationLauncherWorker.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationLauncherWorker.java @@ -4,132 +4,35 @@ package io.airbyte.workers.temporal.sync; -import io.airbyte.commons.io.LineGobbler; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.logging.LoggingHelper; -import io.airbyte.commons.logging.MdcScope; import io.airbyte.config.NormalizationInput; import io.airbyte.scheduler.models.IntegrationLauncherConfig; import io.airbyte.scheduler.models.JobRunConfig; -import io.airbyte.workers.Worker; import io.airbyte.workers.WorkerApp; import io.airbyte.workers.WorkerConfigs; -import io.airbyte.workers.WorkerException; -import io.airbyte.workers.WorkerUtils; -import io.airbyte.workers.process.KubeProcessFactory; -import io.airbyte.workers.process.ProcessFactory; -import java.nio.file.Path; import java.util.Map; -import java.util.UUID; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -public class NormalizationLauncherWorker implements Worker { +public class NormalizationLauncherWorker extends LauncherWorker { - private static final Logger LOGGER = LoggerFactory.getLogger(NormalizationLauncherWorker.class); - - private static final MdcScope.Builder LOG_MDC_BUILDER = new MdcScope.Builder() - .setLogPrefix("normalization-orchestrator") - .setPrefixColor(LoggingHelper.Color.CYAN_BACKGROUND); - - public static final String NORMALIZATION = "normalization"; + public static final String NORMALIZATION = "normalization-orchestrator"; + private static final String POD_NAME_PREFIX = "orchestrator-norm"; public static final String INIT_FILE_DESTINATION_LAUNCHER_CONFIG = "destinationLauncherConfig.json"; - private final WorkerConfigs workerConfigs; - private final ProcessFactory processFactory; - private final String airbyteVersion; - private final AtomicBoolean cancelled = new AtomicBoolean(false); - private final Path workspaceRoot; - private final IntegrationLauncherConfig destinationLauncherConfig; - private final JobRunConfig jobRunConfig; - - private Process process; - - public NormalizationLauncherWorker( - final Path workspaceRoot, - final IntegrationLauncherConfig destinationLauncherConfig, + public NormalizationLauncherWorker(final IntegrationLauncherConfig destinationLauncherConfig, final JobRunConfig jobRunConfig, final WorkerConfigs workerConfigs, - final ProcessFactory processFactory, + final WorkerApp.ContainerOrchestratorConfig containerOrchestratorConfig, final String airbyteVersion) { - this.workspaceRoot = workspaceRoot; - this.destinationLauncherConfig = destinationLauncherConfig; - this.jobRunConfig = jobRunConfig; - this.workerConfigs = workerConfigs; - this.processFactory = processFactory; - this.airbyteVersion = airbyteVersion; - } - - @Override - public Void run(NormalizationInput normalizationInput, Path jobRoot) throws WorkerException { - try { - final Path jobPath = WorkerUtils.getJobRoot(workspaceRoot, jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); - - // we want to filter down to remove secrets, so we aren't writing over a bunch of unnecessary - // secrets - final Map envMap = System.getenv().entrySet().stream() - .filter(entry -> OrchestratorConstants.ENV_VARS_TO_TRANSFER.contains(entry.getKey())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - - final Map fileMap = Map.of( - OrchestratorConstants.INIT_FILE_APPLICATION, NORMALIZATION, - OrchestratorConstants.INIT_FILE_JOB_RUN_CONFIG, Jsons.serialize(jobRunConfig), - OrchestratorConstants.INIT_FILE_INPUT, Jsons.serialize(normalizationInput), - OrchestratorConstants.INIT_FILE_ENV_MAP, Jsons.serialize(envMap), - INIT_FILE_DESTINATION_LAUNCHER_CONFIG, Jsons.serialize(destinationLauncherConfig)); - - process = processFactory.create( - "runner-" + UUID.randomUUID().toString().substring(0, 10), - 0, - jobPath, - "airbyte/container-orchestrator:" + airbyteVersion, - false, - fileMap, - null, - workerConfigs.getResourceRequirements(), - Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.SYNC_RUNNER), - Map.of( - WorkerApp.KUBE_HEARTBEAT_PORT, WorkerApp.KUBE_HEARTBEAT_PORT, - OrchestratorConstants.PORT1, OrchestratorConstants.PORT1, - OrchestratorConstants.PORT2, OrchestratorConstants.PORT2, - OrchestratorConstants.PORT3, OrchestratorConstants.PORT3, - OrchestratorConstants.PORT4, OrchestratorConstants.PORT4)); - - LineGobbler.gobble(process.getInputStream(), LOGGER::info, LOG_MDC_BUILDER); - LineGobbler.gobble(process.getErrorStream(), LOGGER::error, LOG_MDC_BUILDER); - - WorkerUtils.wait(process); - - if (process.exitValue() != 0) { - throw new WorkerException("Non-zero exit code!"); - } - } catch (Exception e) { - if (cancelled.get()) { - throw new WorkerException("Sync was cancelled.", e); - } else { - throw new WorkerException("Running the sync attempt failed", e); - } - } - - return null; - } - - @Override - public void cancel() { - cancelled.set(true); - - if (process == null) { - return; - } - - LOGGER.debug("Closing normalization launcher process"); - WorkerUtils.gentleClose(workerConfigs, process, 1, TimeUnit.MINUTES); - if (process.isAlive() || process.exitValue() != 0) { - LOGGER.error("Normalization launcher process wasn't successful"); - } + super( + NORMALIZATION, + POD_NAME_PREFIX, + jobRunConfig, + Map.of( + INIT_FILE_DESTINATION_LAUNCHER_CONFIG, Jsons.serialize(destinationLauncherConfig)), + containerOrchestratorConfig, + airbyteVersion, + workerConfigs.getResourceRequirements(), + Void.class); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java index ce7b456faf71a..85de5c0b6f795 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/OrchestratorConstants.java @@ -5,10 +5,17 @@ package io.airbyte.workers.temporal.sync; import io.airbyte.config.EnvConfigs; +import io.airbyte.config.helpers.LogClientSingleton; import java.util.Set; public class OrchestratorConstants { + // we want to propagate log level, even if it isn't consumed by EnvConfigs + private static final String LOG_LEVEL = "LOG_LEVEL"; + + // necessary for s3/minio logging. used in the log4j2 configuration. + private static final String S3_PATH_STYLE_ACCESS = "S3_PATH_STYLE_ACCESS"; + // set of env vars necessary for the container orchestrator app to run public static final Set ENV_VARS_TO_TRANSFER = Set.of( EnvConfigs.WORKER_ENVIRONMENT, @@ -29,7 +36,26 @@ public class OrchestratorConstants { EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_REQUEST, EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_LIMIT, EnvConfigs.JOB_DEFAULT_ENV_MAP, - EnvConfigs.LOCAL_ROOT); + EnvConfigs.LOCAL_ROOT, + LOG_LEVEL, + LogClientSingleton.GCS_LOG_BUCKET, + LogClientSingleton.GOOGLE_APPLICATION_CREDENTIALS, + LogClientSingleton.S3_MINIO_ENDPOINT, + S3_PATH_STYLE_ACCESS, + LogClientSingleton.S3_LOG_BUCKET, + LogClientSingleton.AWS_ACCESS_KEY_ID, + LogClientSingleton.AWS_SECRET_ACCESS_KEY, + LogClientSingleton.S3_LOG_BUCKET_REGION, + EnvConfigs.STATE_STORAGE_GCS_BUCKET_NAME, + EnvConfigs.STATE_STORAGE_GCS_APPLICATION_CREDENTIALS, + EnvConfigs.STATE_STORAGE_MINIO_ENDPOINT, + EnvConfigs.STATE_STORAGE_MINIO_BUCKET_NAME, + EnvConfigs.STATE_STORAGE_MINIO_ACCESS_KEY, + EnvConfigs.STATE_STORAGE_MINIO_SECRET_ACCESS_KEY, + EnvConfigs.STATE_STORAGE_S3_BUCKET_NAME, + EnvConfigs.STATE_STORAGE_S3_ACCESS_KEY, + EnvConfigs.STATE_STORAGE_S3_SECRET_ACCESS_KEY, + EnvConfigs.STATE_STORAGE_S3_REGION); public static final String INIT_FILE_ENV_MAP = "envMap.json"; public static final String INIT_FILE_INPUT = "input.json"; diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java index 8eb3533b4e1d8..763f6e056e50e 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java @@ -20,6 +20,7 @@ import io.airbyte.scheduler.models.JobRunConfig; import io.airbyte.workers.DefaultReplicationWorker; import io.airbyte.workers.Worker; +import io.airbyte.workers.WorkerApp; import io.airbyte.workers.WorkerConfigs; import io.airbyte.workers.WorkerConstants; import io.airbyte.workers.process.AirbyteIntegrationLauncher; @@ -34,6 +35,7 @@ import io.airbyte.workers.temporal.CancellationHandler; import io.airbyte.workers.temporal.TemporalAttemptExecution; import java.nio.file.Path; +import java.util.Optional; import java.util.function.Supplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +44,7 @@ public class ReplicationActivityImpl implements ReplicationActivity { private static final Logger LOGGER = LoggerFactory.getLogger(ReplicationActivityImpl.class); - private final boolean containerOrchestratorEnabled; + private final Optional containerOrchestratorConfig; private final WorkerConfigs workerConfigs; private final ProcessFactory processFactory; private final SecretsHydrator secretsHydrator; @@ -56,7 +58,7 @@ public class ReplicationActivityImpl implements ReplicationActivity { private final String databaseUrl; private final String airbyteVersion; - public ReplicationActivityImpl(final boolean containerOrchestratorEnabled, + public ReplicationActivityImpl(final Optional containerOrchestratorConfig, final WorkerConfigs workerConfigs, final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, @@ -67,13 +69,13 @@ public ReplicationActivityImpl(final boolean containerOrchestratorEnabled, final String databasePassword, final String databaseUrl, final String airbyteVersion) { - this(containerOrchestratorEnabled, workerConfigs, processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, + this(containerOrchestratorConfig, workerConfigs, processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, new AirbyteConfigValidator(), databaseUser, databasePassword, databaseUrl, airbyteVersion); } @VisibleForTesting - ReplicationActivityImpl(final boolean containerOrchestratorEnabled, + ReplicationActivityImpl(final Optional containerOrchestratorConfig, final WorkerConfigs workerConfigs, final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, @@ -85,7 +87,7 @@ public ReplicationActivityImpl(final boolean containerOrchestratorEnabled, final String databasePassword, final String databaseUrl, final String airbyteVersion) { - this.containerOrchestratorEnabled = containerOrchestratorEnabled; + this.containerOrchestratorConfig = containerOrchestratorConfig; this.workerConfigs = workerConfigs; this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; @@ -119,8 +121,9 @@ public StandardSyncOutput replicate(final JobRunConfig jobRunConfig, final CheckedSupplier, Exception> workerFactory; - if (containerOrchestratorEnabled) { - workerFactory = getContainerLauncherWorkerFactory(sourceLauncherConfig, destinationLauncherConfig, jobRunConfig, syncInput); + if (containerOrchestratorConfig.isPresent()) { + workerFactory = getContainerLauncherWorkerFactory(containerOrchestratorConfig.get(), sourceLauncherConfig, destinationLauncherConfig, + jobRunConfig, syncInput); } else { workerFactory = getLegacyWorkerFactory(sourceLauncherConfig, destinationLauncherConfig, jobRunConfig, syncInput); } @@ -202,17 +205,16 @@ private CheckedSupplier, Exception> } private CheckedSupplier, Exception> getContainerLauncherWorkerFactory( + final WorkerApp.ContainerOrchestratorConfig containerOrchestratorConfig, final IntegrationLauncherConfig sourceLauncherConfig, final IntegrationLauncherConfig destinationLauncherConfig, final JobRunConfig jobRunConfig, final StandardSyncInput syncInput) { return () -> new ReplicationLauncherWorker( + containerOrchestratorConfig, sourceLauncherConfig, destinationLauncherConfig, jobRunConfig, - syncInput, - workspaceRoot, - processFactory, airbyteVersion, workerConfigs); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationLauncherWorker.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationLauncherWorker.java index 6fad439553e8a..d8dcadaa00528 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationLauncherWorker.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationLauncherWorker.java @@ -4,167 +4,45 @@ package io.airbyte.workers.temporal.sync; -import io.airbyte.commons.io.LineGobbler; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.logging.LoggingHelper; -import io.airbyte.commons.logging.MdcScope; import io.airbyte.config.ReplicationOutput; import io.airbyte.config.StandardSyncInput; import io.airbyte.scheduler.models.IntegrationLauncherConfig; import io.airbyte.scheduler.models.JobRunConfig; -import io.airbyte.workers.Worker; import io.airbyte.workers.WorkerApp; import io.airbyte.workers.WorkerConfigs; -import io.airbyte.workers.WorkerException; -import io.airbyte.workers.WorkerUtils; -import io.airbyte.workers.process.KubeProcessFactory; -import io.airbyte.workers.process.ProcessFactory; -import java.nio.file.Path; import java.util.Map; -import java.util.Optional; -import java.util.UUID; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; -import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Launches a container-orchestrator container/pod to manage the message passing for the replication * step. This step configs onto the container-orchestrator and retrieves logs and the output from * the container-orchestrator. */ -public class ReplicationLauncherWorker implements Worker { +public class ReplicationLauncherWorker extends LauncherWorker { - private static final Logger LOGGER = LoggerFactory.getLogger(ReplicationLauncherWorker.class); - - private static final MdcScope.Builder LOG_MDC_BUILDER = new MdcScope.Builder() - .setLogPrefix("replication-orchestrator") - .setPrefixColor(LoggingHelper.Color.CYAN_BACKGROUND); - - public static final String REPLICATION = "replication"; + public static final String REPLICATION = "replication-orchestrator"; + private static final String POD_NAME_PREFIX = "orchestrator-repl"; public static final String INIT_FILE_SOURCE_LAUNCHER_CONFIG = "sourceLauncherConfig.json"; public static final String INIT_FILE_DESTINATION_LAUNCHER_CONFIG = "destinationLauncherConfig.json"; - private final AtomicBoolean cancelled = new AtomicBoolean(false); - private final IntegrationLauncherConfig sourceLauncherConfig; - private final IntegrationLauncherConfig destinationLauncherConfig; - private final JobRunConfig jobRunConfig; - private final StandardSyncInput syncInput; - private final Path workspaceRoot; - private final ProcessFactory processFactory; - private final String airbyteVersion; - private final WorkerConfigs workerConfigs; - - private Process process; - public ReplicationLauncherWorker( + final WorkerApp.ContainerOrchestratorConfig containerOrchestratorConfig, final IntegrationLauncherConfig sourceLauncherConfig, final IntegrationLauncherConfig destinationLauncherConfig, final JobRunConfig jobRunConfig, - final StandardSyncInput syncInput, - final Path workspaceRoot, - final ProcessFactory processFactory, final String airbyteVersion, final WorkerConfigs workerConfigs) { - - this.sourceLauncherConfig = sourceLauncherConfig; - this.destinationLauncherConfig = destinationLauncherConfig; - this.jobRunConfig = jobRunConfig; - this.syncInput = syncInput; - this.workspaceRoot = workspaceRoot; - this.processFactory = processFactory; - this.airbyteVersion = airbyteVersion; - this.workerConfigs = workerConfigs; - } - - @Override - public ReplicationOutput run(StandardSyncInput standardSyncInput, Path jobRoot) throws WorkerException { - try { - final Path jobPath = WorkerUtils.getJobRoot(workspaceRoot, jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); - - // we want to filter down to remove secrets, so we aren't writing over a bunch of unnecessary - // secrets - final Map envMap = System.getenv().entrySet().stream() - .filter(entry -> OrchestratorConstants.ENV_VARS_TO_TRANSFER.contains(entry.getKey())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - - final Map fileMap = Map.of( - OrchestratorConstants.INIT_FILE_APPLICATION, REPLICATION, - OrchestratorConstants.INIT_FILE_JOB_RUN_CONFIG, Jsons.serialize(jobRunConfig), - OrchestratorConstants.INIT_FILE_INPUT, Jsons.serialize(syncInput), - OrchestratorConstants.INIT_FILE_ENV_MAP, Jsons.serialize(envMap), - INIT_FILE_SOURCE_LAUNCHER_CONFIG, Jsons.serialize(sourceLauncherConfig), - INIT_FILE_DESTINATION_LAUNCHER_CONFIG, Jsons.serialize(destinationLauncherConfig)); - - process = processFactory.create( - "runner-" + UUID.randomUUID().toString().substring(0, 10), - 0, - jobPath, - "airbyte/container-orchestrator:" + airbyteVersion, - false, - fileMap, - null, - workerConfigs.getResourceRequirements(), - Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.SYNC_RUNNER), - Map.of( - WorkerApp.KUBE_HEARTBEAT_PORT, WorkerApp.KUBE_HEARTBEAT_PORT, - OrchestratorConstants.PORT1, OrchestratorConstants.PORT1, - OrchestratorConstants.PORT2, OrchestratorConstants.PORT2, - OrchestratorConstants.PORT3, OrchestratorConstants.PORT3, - OrchestratorConstants.PORT4, OrchestratorConstants.PORT4)); - - final AtomicReference output = new AtomicReference<>(); - - LineGobbler.gobble(process.getInputStream(), line -> { - final Optional maybeOutput = Jsons.tryDeserialize(line, ReplicationOutput.class); - - if (maybeOutput.isPresent()) { - LOGGER.info("Found output!"); - output.set(maybeOutput.get()); - } else { - try (final var mdcScope = LOG_MDC_BUILDER.build()) { - LOGGER.info(line); - } - } - }); - - LineGobbler.gobble(process.getErrorStream(), LOGGER::error, LOG_MDC_BUILDER); - - WorkerUtils.wait(process); - - if (process.exitValue() != 0) { - throw new WorkerException("Non-zero exit code!"); - } - - if (output.get() != null) { - return output.get(); - } else { - throw new WorkerException("Running the sync attempt resulted in no readable output!"); - } - } catch (Exception e) { - if (cancelled.get()) { - throw new WorkerException("Sync was cancelled.", e); - } else { - throw new WorkerException("Running the sync attempt failed", e); - } - } - } - - @Override - public void cancel() { - cancelled.set(true); - - if (process == null) { - return; - } - - LOGGER.debug("Closing replication launcher process"); - WorkerUtils.gentleClose(workerConfigs, process, 1, TimeUnit.MINUTES); - if (process.isAlive() || process.exitValue() != 0) { - LOGGER.error("Replication launcher process wasn't successful"); - } + super( + REPLICATION, + POD_NAME_PREFIX, + jobRunConfig, + Map.of( + INIT_FILE_SOURCE_LAUNCHER_CONFIG, Jsons.serialize(sourceLauncherConfig), + INIT_FILE_DESTINATION_LAUNCHER_CONFIG, Jsons.serialize(destinationLauncherConfig)), + containerOrchestratorConfig, + airbyteVersion, + workerConfigs.getResourceRequirements(), + ReplicationOutput.class); } } diff --git a/airbyte-workers/src/main/resources/entrypoints/check.sh b/airbyte-workers/src/main/resources/entrypoints/sync/check.sh similarity index 100% rename from airbyte-workers/src/main/resources/entrypoints/check.sh rename to airbyte-workers/src/main/resources/entrypoints/sync/check.sh diff --git a/airbyte-workers/src/main/resources/entrypoints/main.sh b/airbyte-workers/src/main/resources/entrypoints/sync/main.sh similarity index 100% rename from airbyte-workers/src/main/resources/entrypoints/main.sh rename to airbyte-workers/src/main/resources/entrypoints/sync/main.sh diff --git a/airbyte-workers/src/test-integration/java/io/airbyte/workers/process/AsyncOrchestratorPodProcessIntegrationTest.java b/airbyte-workers/src/test-integration/java/io/airbyte/workers/process/AsyncOrchestratorPodProcessIntegrationTest.java new file mode 100644 index 0000000000000..49b648b302994 --- /dev/null +++ b/airbyte-workers/src/test-integration/java/io/airbyte/workers/process/AsyncOrchestratorPodProcessIntegrationTest.java @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.process; + +import static org.junit.jupiter.api.Assertions.*; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.EnvConfigs; +import io.airbyte.config.storage.CloudStorageConfigs; +import io.airbyte.config.storage.MinioS3ClientFactory; +import io.airbyte.workers.WorkerApp; +import io.airbyte.workers.WorkerConfigs; +import io.airbyte.workers.storage.DocumentStoreClient; +import io.airbyte.workers.storage.S3DocumentStoreClient; +import io.airbyte.workers.temporal.sync.OrchestratorConstants; +import io.fabric8.kubernetes.api.model.ContainerBuilder; +import io.fabric8.kubernetes.api.model.ContainerPort; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.api.model.PodBuilder; +import io.fabric8.kubernetes.client.DefaultKubernetesClient; +import io.fabric8.kubernetes.client.KubernetesClient; +import java.nio.file.Path; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import org.apache.commons.lang3.RandomStringUtils; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.s3.model.CreateBucketRequest; + +public class AsyncOrchestratorPodProcessIntegrationTest { + + private static KubernetesClient kubernetesClient; + private static DocumentStoreClient documentStoreClient; + private static Process portForwardProcess; + + @BeforeAll + public static void init() throws Exception { + kubernetesClient = new DefaultKubernetesClient(); + + final var podName = "test-minio-" + RandomStringUtils.randomAlphabetic(10).toLowerCase(); + + final var minioContainer = new ContainerBuilder() + .withName("minio") + .withImage("minio/minio:latest") + .withArgs("server", "/home/shared") + .withEnv( + new EnvVar("MINIO_ACCESS_KEY", "minio", null), + new EnvVar("MINIO_SECRET_KEY", "minio123", null)) + .withPorts(new ContainerPort(9000, null, null, null, null)) + .build(); + + final Pod minioPod = new PodBuilder() + .withApiVersion("v1") + .withNewMetadata() + .withName(podName) + .withNamespace("default") + .endMetadata() + .withNewSpec() + .withRestartPolicy("Never") + .withContainers(minioContainer) + .endSpec() + .build(); + + kubernetesClient.pods().inNamespace("default").create(minioPod); + kubernetesClient.resource(minioPod).waitUntilReady(1, TimeUnit.MINUTES); + + portForwardProcess = new ProcessBuilder("kubectl", "port-forward", "pod/" + podName, "9432:9000").start(); + + final var localMinioEndpoint = "http://localhost:9432"; + + final var minioConfig = new CloudStorageConfigs.MinioConfig( + "anything", + "minio", + "minio123", + localMinioEndpoint); + + final var s3Client = new MinioS3ClientFactory(minioConfig).get(); + + final var createBucketRequest = CreateBucketRequest.builder() + .bucket("anything") + .build(); + + s3Client.createBucket(createBucketRequest); + + documentStoreClient = S3DocumentStoreClient.minio( + minioConfig, + Path.of("/")); + } + + @Test + public void test() throws InterruptedException { + final var podName = "test-async-" + RandomStringUtils.randomAlphabetic(10).toLowerCase(); + + // make kubepodinfo + final var kubePodInfo = new KubePodInfo("default", podName); + + // another activity issues the request to create the pod process -> here we'll just create it + final var asyncProcess = new AsyncOrchestratorPodProcess( + kubePodInfo, + documentStoreClient, + kubernetesClient); + + final Map portMap = Map.of( + WorkerApp.KUBE_HEARTBEAT_PORT, WorkerApp.KUBE_HEARTBEAT_PORT, + OrchestratorConstants.PORT1, OrchestratorConstants.PORT1, + OrchestratorConstants.PORT2, OrchestratorConstants.PORT2, + OrchestratorConstants.PORT3, OrchestratorConstants.PORT3, + OrchestratorConstants.PORT4, OrchestratorConstants.PORT4); + + final Map envMap = System.getenv().entrySet().stream() + .filter(entry -> OrchestratorConstants.ENV_VARS_TO_TRANSFER.contains(entry.getKey())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + asyncProcess.create("dev", Map.of(), new WorkerConfigs(new EnvConfigs()).getResourceRequirements(), Map.of( + OrchestratorConstants.INIT_FILE_APPLICATION, AsyncOrchestratorPodProcess.NO_OP, + OrchestratorConstants.INIT_FILE_ENV_MAP, Jsons.serialize(envMap)), portMap); + + // a final activity waits until there is output from the kube pod process + asyncProcess.waitFor(10, TimeUnit.SECONDS); + + final var exitValue = asyncProcess.exitValue(); + final var output = asyncProcess.getOutput(); + + assertEquals(0, exitValue); + assertTrue(output.isPresent()); + assertEquals("expected output", output.get()); + } + + @AfterAll + public static void teardown() { + try { + portForwardProcess.destroyForcibly(); + } catch (Exception e) { + e.printStackTrace(); + } + + try { + kubernetesClient.pods().delete(); + } catch (Exception e) { + e.printStackTrace(); + } + } + +} diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/storage/WorkerStoreTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/storage/WorkerStoreTest.java deleted file mode 100644 index 181299de2c9cc..0000000000000 --- a/airbyte-workers/src/test/java/io/airbyte/workers/storage/WorkerStoreTest.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.workers.storage; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import com.fasterxml.jackson.databind.JsonNode; -import io.airbyte.commons.json.Jsons; -import java.util.Optional; -import java.util.UUID; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; - -class WorkerStoreTest { - - private static final UUID ID = UUID.randomUUID(); - private static final JsonNode DOCUMENT = Jsons.jsonNode(ImmutableMap.of("a", 1)); - - private DocumentStoreClient documentStore; - private WorkerStore store; - - @BeforeEach - void setup() { - documentStore = mock(DocumentStoreClient.class); - store = new WorkerStore(documentStore); - } - - @Test - void testWrite() { - store.set(ID, DOCUMENT); - // overwrites are allowed, so test calling it twice. - store.set(ID, DOCUMENT); - verify(documentStore, times(2)).write(ID.toString(), Jsons.serialize(DOCUMENT)); - } - - @Test - void testReadExists() { - when(documentStore.read(ID.toString())).thenReturn(Optional.of(Jsons.serialize(DOCUMENT))); - assertEquals(Optional.of(DOCUMENT), store.get(ID)); - } - - @Test - void testReadNotExists() { - when(documentStore.read(ID.toString())).thenReturn(Optional.empty()); - assertEquals(Optional.empty(), store.get(ID)); - } - - @Test - void testDeleteExists() { - when(documentStore.delete(ID.toString())).thenReturn(true); - assertTrue(store.delete(ID)); - } - - @Test - void testDeleteNotExists() { - when(documentStore.delete(ID.toString())).thenReturn(false); - assertFalse(store.delete(ID)); - } - -} diff --git a/charts/airbyte/templates/env-configmap.yaml b/charts/airbyte/templates/env-configmap.yaml index 073b20249ac05..0b4bc12141753 100644 --- a/charts/airbyte/templates/env-configmap.yaml +++ b/charts/airbyte/templates/env-configmap.yaml @@ -38,6 +38,8 @@ data: S3_PATH_STYLE_ACCESS: {{ include "airbyte.s3PathStyleAccess" . | quote }} STATE_STORAGE_MINIO_BUCKET_NAME: airbyte-state-storage STATE_STORAGE_MINIO_ENDPOINT: {{ include "airbyte.minio.endpoint" . | quote }} + STATE_STORAGE_MINIO_ACCESS_KEY: {{ include "airbyte.minio.accessKey.password" . | quote }} + STATE_STORAGE_MINIO_SECRET_ACCESS_KEY: {{ include "airbyte.minio.secretKey.password" . | quote }} SUBMITTER_NUM_THREADS: "10" TEMPORAL_HOST: {{ include "common.names.fullname" . }}-temporal:{{ .Values.temporal.service.port }} TEMPORAL_WORKER_PORTS: 9001,9002,9003,9004,9005,9006,9007,9008,9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,9022,9023,9024,9025,9026,9027,9028,9029,9030,9031,9032,9033,9034,9035,9036,9037,9038,9039,9040 diff --git a/charts/airbyte/templates/worker/deployment.yaml b/charts/airbyte/templates/worker/deployment.yaml index d62546dd918a6..fd6c110f74fd6 100644 --- a/charts/airbyte/templates/worker/deployment.yaml +++ b/charts/airbyte/templates/worker/deployment.yaml @@ -187,6 +187,16 @@ spec: configMapKeyRef: name: airbyte-env key: STATE_STORAGE_MINIO_BUCKET_NAME + - name: STATE_STORAGE_MINIO_ACCESS_KEY + valueFrom: + configMapKeyRef: + name: airbyte-env + key: STATE_STORAGE_MINIO_ACCESS_KEY + - name: STATE_STORAGE_MINIO_SECRET_ACCESS_KEY + valueFrom: + configMapKeyRef: + name: airbyte-env + key: STATE_STORAGE_MINIO_SECRET_ACCESS_KEY - name: STATE_STORAGE_MINIO_ENDPOINT valueFrom: configMapKeyRef: diff --git a/kube/overlays/dev-integration-test/.env b/kube/overlays/dev-integration-test/.env index 6cadc72d4a7fb..e85cf73e04d34 100644 --- a/kube/overlays/dev-integration-test/.env +++ b/kube/overlays/dev-integration-test/.env @@ -48,7 +48,7 @@ S3_PATH_STYLE_ACCESS=true GCS_LOG_BUCKET= # State Storage Configuration -STATE_STORAGE_MINIO_BUCKET_NAME=airbyte-state-storage +STATE_STORAGE_MINIO_BUCKET_NAME=airbyte-dev-logs STATE_STORAGE_MINIO_ENDPOINT=http://airbyte-minio-svc:9000 # Docker Resource Limits @@ -63,3 +63,7 @@ JOB_KUBE_NODE_SELECTORS= # Job image pull policy JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY= + +# Launch a separate pod to orchestrate sync steps +CONTAINER_ORCHESTRATOR_ENABLED=false + diff --git a/kube/overlays/dev-integration-test/.secrets b/kube/overlays/dev-integration-test/.secrets index bf69a08191f34..67e8f4aae9770 100644 --- a/kube/overlays/dev-integration-test/.secrets +++ b/kube/overlays/dev-integration-test/.secrets @@ -3,3 +3,5 @@ DATABASE_PASSWORD=docker AWS_ACCESS_KEY_ID=minio AWS_SECRET_ACCESS_KEY=minio123 GOOGLE_APPLICATION_CREDENTIALS= +STATE_STORAGE_MINIO_ACCESS_KEY=minio +STATE_STORAGE_MINIO_SECRET_ACCESS_KEY=minio123 diff --git a/kube/overlays/dev/.env b/kube/overlays/dev/.env index 61acc4aad4981..5e4680e590cfd 100644 --- a/kube/overlays/dev/.env +++ b/kube/overlays/dev/.env @@ -50,7 +50,7 @@ S3_PATH_STYLE_ACCESS=true GCS_LOG_BUCKET= # State Storage Configuration -STATE_STORAGE_MINIO_BUCKET_NAME=airbyte-state-storage +STATE_STORAGE_MINIO_BUCKET_NAME=airbyte-dev-logs STATE_STORAGE_MINIO_ENDPOINT=http://airbyte-minio-svc:9000 # Docker Resource Limits @@ -65,3 +65,6 @@ JOB_KUBE_NODE_SELECTORS= # Job image pull policy JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY= + +# Launch a separate pod to orchestrate sync steps +CONTAINER_ORCHESTRATOR_ENABLED=false diff --git a/kube/overlays/dev/.secrets b/kube/overlays/dev/.secrets index bf69a08191f34..67e8f4aae9770 100644 --- a/kube/overlays/dev/.secrets +++ b/kube/overlays/dev/.secrets @@ -3,3 +3,5 @@ DATABASE_PASSWORD=docker AWS_ACCESS_KEY_ID=minio AWS_SECRET_ACCESS_KEY=minio123 GOOGLE_APPLICATION_CREDENTIALS= +STATE_STORAGE_MINIO_ACCESS_KEY=minio +STATE_STORAGE_MINIO_SECRET_ACCESS_KEY=minio123 diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index 041ebcb9c50bf..4d6d391b63159 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -50,7 +50,7 @@ S3_PATH_STYLE_ACCESS=true GCS_LOG_BUCKET= # State Storage Configuration -STATE_STORAGE_MINIO_BUCKET_NAME=airbyte-state-storage +STATE_STORAGE_MINIO_BUCKET_NAME=airbyte-dev-logs STATE_STORAGE_MINIO_ENDPOINT=http://airbyte-minio-svc:9000 # Docker Resource Limits @@ -65,3 +65,7 @@ JOB_KUBE_NODE_SELECTORS= # Job image pull policy JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY= + +# Launch a separate pod to orchestrate sync steps +CONTAINER_ORCHESTRATOR_ENABLED=false + diff --git a/kube/overlays/stable-with-resource-limits/.secrets b/kube/overlays/stable-with-resource-limits/.secrets index bf69a08191f34..c9f0964dda5bc 100644 --- a/kube/overlays/stable-with-resource-limits/.secrets +++ b/kube/overlays/stable-with-resource-limits/.secrets @@ -2,4 +2,5 @@ DATABASE_USER=docker DATABASE_PASSWORD=docker AWS_ACCESS_KEY_ID=minio AWS_SECRET_ACCESS_KEY=minio123 -GOOGLE_APPLICATION_CREDENTIALS= +STATE_STORAGE_MINIO_ACCESS_KEY=minio +STATE_STORAGE_MINIO_SECRET_ACCESS_KEY=minio123 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index 041ebcb9c50bf..4d6d391b63159 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -50,7 +50,7 @@ S3_PATH_STYLE_ACCESS=true GCS_LOG_BUCKET= # State Storage Configuration -STATE_STORAGE_MINIO_BUCKET_NAME=airbyte-state-storage +STATE_STORAGE_MINIO_BUCKET_NAME=airbyte-dev-logs STATE_STORAGE_MINIO_ENDPOINT=http://airbyte-minio-svc:9000 # Docker Resource Limits @@ -65,3 +65,7 @@ JOB_KUBE_NODE_SELECTORS= # Job image pull policy JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY= + +# Launch a separate pod to orchestrate sync steps +CONTAINER_ORCHESTRATOR_ENABLED=false + diff --git a/kube/overlays/stable/.secrets b/kube/overlays/stable/.secrets index bf69a08191f34..67e8f4aae9770 100644 --- a/kube/overlays/stable/.secrets +++ b/kube/overlays/stable/.secrets @@ -3,3 +3,5 @@ DATABASE_PASSWORD=docker AWS_ACCESS_KEY_ID=minio AWS_SECRET_ACCESS_KEY=minio123 GOOGLE_APPLICATION_CREDENTIALS= +STATE_STORAGE_MINIO_ACCESS_KEY=minio +STATE_STORAGE_MINIO_SECRET_ACCESS_KEY=minio123 diff --git a/kube/resources/worker.yaml b/kube/resources/worker.yaml index 0e051e1bfaf86..2c8c5eb92e903 100644 --- a/kube/resources/worker.yaml +++ b/kube/resources/worker.yaml @@ -184,6 +184,32 @@ spec: configMapKeyRef: name: airbyte-env key: JOB_KUBE_MAIN_CONTAINER_IMAGE_PULL_POLICY + # todo: add other state storage keys + - name: STATE_STORAGE_MINIO_BUCKET_NAME + valueFrom: + configMapKeyRef: + name: airbyte-env + key: STATE_STORAGE_MINIO_BUCKET_NAME + - name: STATE_STORAGE_MINIO_ENDPOINT + valueFrom: + configMapKeyRef: + name: airbyte-env + key: STATE_STORAGE_MINIO_ENDPOINT + - name: STATE_STORAGE_MINIO_ACCESS_KEY + valueFrom: + secretKeyRef: + name: airbyte-secrets + key: STATE_STORAGE_MINIO_ACCESS_KEY + - name: STATE_STORAGE_MINIO_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: airbyte-secrets + key: STATE_STORAGE_MINIO_SECRET_ACCESS_KEY + - name: CONTAINER_ORCHESTRATOR_ENABLED + valueFrom: + configMapKeyRef: + name: airbyte-env + key: CONTAINER_ORCHESTRATOR_ENABLED ports: - containerPort: 9000 # for heartbeat server - containerPort: 9001 # start temporal worker port pool From 1108361d1f06f6591024e065f3c6a7513b152832 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 20 Jan 2022 08:54:19 -0800 Subject: [PATCH 170/215] remove flexport until it is published (#9647) * remove flexport until it is published * remove file * our script doesn't respect comments --- .../a4444d7b-c9ee-4d99-8d50-78e71abe7174.json | 8 -------- .../init/src/main/resources/seed/source_definitions.yaml | 7 ------- 2 files changed, 15 deletions(-) delete mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/a4444d7b-c9ee-4d99-8d50-78e71abe7174.json diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/a4444d7b-c9ee-4d99-8d50-78e71abe7174.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/a4444d7b-c9ee-4d99-8d50-78e71abe7174.json deleted file mode 100644 index 7e548380395f1..0000000000000 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/a4444d7b-c9ee-4d99-8d50-78e71abe7174.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "sourceDefinitionId": "a4444d7b-c9ee-4d99-8d50-78e71abe7174", - "name": "Flexport", - "dockerRepository": "airbyte/source-flexport", - "dockerImageTag": "0.1.0", - "documentationUrl": "https://docs.airbyte.io/integrations/sources/flexport", - "icon": "flexport.svg" -} diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 2b10e51ed7c23..724e3cf8f2f19 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -200,13 +200,6 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/file icon: file.svg sourceType: file -- name: Flexport - sourceDefinitionId: a4444d7b-c9ee-4d99-8d50-78e71abe7174 - dockerRepository: airbyte/source-flexport - dockerImageTag: 0.1.0 - documentationUrl: https://docs.airbyte.io/integrations/sources/flexport - icon: flexport.svg - sourceType: api - name: Freshdesk sourceDefinitionId: ec4b9503-13cb-48ab-a4ab-6ade4be46567 dockerRepository: airbyte/source-freshdesk From 0e5f1a8c21ef1eb1947bd669b23dcb6bb962abb5 Mon Sep 17 00:00:00 2001 From: Harshith Mullapudi Date: Thu, 20 Jan 2022 23:30:54 +0530 Subject: [PATCH 171/215] Publish source shopify: added shop_url to all stream records (#9635) * add shop url in shopify orders stream * Add shop url in all streams * Fix: add shop url in customers schema * chore: bump version * chore: added spec Co-authored-by: Manoj --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- airbyte-integrations/connectors/source-shopify/Dockerfile | 2 +- .../source_shopify/schemas/abandoned_checkouts.json | 3 +++ .../source-shopify/source_shopify/schemas/collects.json | 3 +++ .../source_shopify/schemas/custom_collections.json | 3 +++ .../source-shopify/source_shopify/schemas/customers.json | 3 +++ .../source-shopify/source_shopify/schemas/discount_codes.json | 3 +++ .../source-shopify/source_shopify/schemas/draft_orders.json | 3 +++ .../source_shopify/schemas/fulfillment_orders.json | 3 +++ .../source-shopify/source_shopify/schemas/fulfillments.json | 3 +++ .../source_shopify/schemas/inventory_items.json | 3 +++ .../source_shopify/schemas/inventory_levels.json | 3 +++ .../source-shopify/source_shopify/schemas/locations.json | 3 +++ .../source-shopify/source_shopify/schemas/metafields.json | 3 +++ .../source-shopify/source_shopify/schemas/order_refunds.json | 3 +++ .../source-shopify/source_shopify/schemas/order_risks.json | 3 +++ .../source-shopify/source_shopify/schemas/orders.json | 3 +++ .../source-shopify/source_shopify/schemas/pages.json | 3 +++ .../source-shopify/source_shopify/schemas/price_rules.json | 3 +++ .../source-shopify/source_shopify/schemas/products.json | 3 +++ .../source-shopify/source_shopify/schemas/shop.json | 3 +++ .../source-shopify/source_shopify/schemas/transactions.json | 3 +++ .../connectors/source-shopify/source_shopify/source.py | 4 ++++ docs/integrations/sources/shopify.md | 1 + 25 files changed, 68 insertions(+), 3 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 724e3cf8f2f19..aed21d4979a6b 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -641,7 +641,7 @@ - name: Shopify sourceDefinitionId: 9da77001-af33-4bcd-be46-6252bf9342b9 dockerRepository: airbyte/source-shopify - dockerImageTag: 0.1.28 + dockerImageTag: 0.1.29 documentationUrl: https://docs.airbyte.io/integrations/sources/shopify icon: shopify.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 07ec4b040181c..fbc4d170850a6 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6755,7 +6755,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-shopify:0.1.28" +- dockerImage: "airbyte/source-shopify:0.1.29" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/shopify" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-shopify/Dockerfile b/airbyte-integrations/connectors/source-shopify/Dockerfile index 27a2909ad9283..88b6c8ef31047 100644 --- a/airbyte-integrations/connectors/source-shopify/Dockerfile +++ b/airbyte-integrations/connectors/source-shopify/Dockerfile @@ -28,5 +28,5 @@ COPY source_shopify ./source_shopify ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.28 +LABEL io.airbyte.version=0.1.29 LABEL io.airbyte.name=airbyte/source-shopify diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/abandoned_checkouts.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/abandoned_checkouts.json index 6e2c0e6cfc522..e08c7a9068b12 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/abandoned_checkouts.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/abandoned_checkouts.json @@ -252,6 +252,9 @@ "source_url": { "type": ["null", "string"] }, + "shop_url": { + "type": ["null", "string"] + }, "total_discounts": { "type": ["null", "number"] }, diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/collects.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/collects.json index aa2bd0a282e33..05495af8ce4da 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/collects.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/collects.json @@ -20,6 +20,9 @@ "sort_value": { "type": ["null", "string"] }, + "shop_url": { + "type": ["null", "string"] + }, "updated_at": { "type": ["null", "string"], "format": "date-time" diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/custom_collections.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/custom_collections.json index 1c5e34282b322..a52a84d3ed67a 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/custom_collections.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/custom_collections.json @@ -52,6 +52,9 @@ }, "template_suffix": { "type": ["null", "string"] + }, + "shop_url": { + "type": ["null", "string"] } }, "type": ["null", "object"] diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/customers.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/customers.json index fd298b7b12517..f5ade7534ef79 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/customers.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/customers.json @@ -13,6 +13,9 @@ "multipass_identifier": { "type": ["null", "string"] }, + "shop_url":{ + "type": ["null", "string"] + }, "default_address": { "type": ["null", "object"], "properties": { diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/discount_codes.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/discount_codes.json index 4532de8601117..7f5f0d425bca7 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/discount_codes.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/discount_codes.json @@ -20,6 +20,9 @@ "updated_at": { "type": ["null", "string"], "format": "date-time" + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/draft_orders.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/draft_orders.json index 4a6c86820999f..3c30979cfb686 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/draft_orders.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/draft_orders.json @@ -528,6 +528,9 @@ } } } + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillment_orders.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillment_orders.json index 16987cbedd703..b4ae5dc07d098 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillment_orders.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillment_orders.json @@ -175,6 +175,9 @@ "type": ["null", "string"] } } + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillments.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillments.json index 05cdc8b4be903..c106c0b50f762 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillments.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillments.json @@ -264,6 +264,9 @@ } } } + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/inventory_items.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/inventory_items.json index 2d4e564e6f7ad..908bf2a21b502 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/inventory_items.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/inventory_items.json @@ -20,6 +20,9 @@ }, "sku": { "type": ["null", "string"] + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/inventory_levels.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/inventory_levels.json index 89baedb117281..afad23e9d2e11 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/inventory_levels.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/inventory_levels.json @@ -13,6 +13,9 @@ "updated_at": { "type": ["null", "string"], "format": "date-time" + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/locations.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/locations.json index 5f15a0aa956b8..a25575e2a3bdd 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/locations.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/locations.json @@ -50,6 +50,9 @@ }, "localized_province_name": { "type": ["null", "string"] + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/metafields.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/metafields.json index 4277aeb4351a1..3a4a7bc1fcbc9 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/metafields.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/metafields.json @@ -34,6 +34,9 @@ "updated_at": { "type": ["null", "string"], "format": "date-time" + }, + "shop_url": { + "type": ["null", "string"] } }, "type": ["null", "object"] diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/order_refunds.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/order_refunds.json index 608a420a46917..dbb0b7fbf1eab 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/order_refunds.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/order_refunds.json @@ -461,6 +461,9 @@ } } } + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/order_risks.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/order_risks.json index fae5bb5acf8f0..04d364c93f807 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/order_risks.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/order_risks.json @@ -30,6 +30,9 @@ }, "merchant_message": { "type": ["null", "string"] + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/orders.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/orders.json index 6a95e33402666..18da58fcc2c27 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/orders.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/orders.json @@ -304,6 +304,9 @@ "source_url": { "type": ["null", "string"] }, + "shop_url": { + "type": ["null", "string"] + }, "subtotal_price": { "type": ["null", "number"] }, diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/pages.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/pages.json index f7af9faddbabf..95de888f20807 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/pages.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/pages.json @@ -33,6 +33,9 @@ "updated_at": { "type": ["null", "string"], "format": "date-time" + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/price_rules.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/price_rules.json index 984968486b1c3..b04a19349cd07 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/price_rules.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/price_rules.json @@ -146,6 +146,9 @@ }, "allocation_limit": { "type": ["null", "integer"] + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/products.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/products.json index ac2f37095ef31..ca403f5622019 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/products.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/products.json @@ -264,6 +264,9 @@ }, "id": { "type": ["null", "integer"] + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/shop.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/shop.json index b4eb4fb815b25..d29981f47486d 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/shop.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/shop.json @@ -167,6 +167,9 @@ }, "zip": { "type": ["null", "string"] + }, + "shop_url": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/transactions.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/transactions.json index 75e06fbef586f..a3410fcf8a708 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/transactions.json +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/transactions.json @@ -91,6 +91,9 @@ }, "location_id": { "type": ["null", "integer"] + }, + "shop_url": { + "type": ["null", "string"] } }, "type": ["null", "object"] diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/source.py b/airbyte-integrations/connectors/source-shopify/source_shopify/source.py index 25275e71191c7..2ce5fb0ce9d62 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/source.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/source.py @@ -65,10 +65,14 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp # this solution designed to convert string into number, but in future can be modified for general purpose if isinstance(records, dict): # for cases when we have a single record as dict + # add shop_url to the record to make querying easy + records['shop_url'] = self.config["shop"] yield self._transformer.transform(records) else: # for other cases for record in records: + # add shop_url to the record to make querying easy + record['shop_url'] = self.config["shop"] yield self._transformer.transform(record) @property diff --git a/docs/integrations/sources/shopify.md b/docs/integrations/sources/shopify.md index 79afd2388b4bc..b9dced7ffb2e1 100644 --- a/docs/integrations/sources/shopify.md +++ b/docs/integrations/sources/shopify.md @@ -101,6 +101,7 @@ This connector support both: `OAuth 2.0` and `API PASSWORD` (for private applica | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.29 | 2022-01-20 | [9049](https://github.com/airbytehq/airbyte/pull/9248) | Added `shop_url` to the record for all streams | | 0.1.28 | 2022-01-19 | [9591](https://github.com/airbytehq/airbyte/pull/9591) | Implemented `OAuth2.0` authentication method for Airbyte Cloud | | 0.1.27 | 2021-12-22 | [9049](https://github.com/airbytehq/airbyte/pull/9049) | Update connector fields title/description | | 0.1.26 | 2021-12-14 | [8597](https://github.com/airbytehq/airbyte/pull/8597) | Fix `mismatched number of tables` for base-normalization, increased performance of `order_refunds` stream | From d3d4d203b39fc24a90a08632f15a5ae14299d4ee Mon Sep 17 00:00:00 2001 From: Artem Astapenko <3767150+Jamakase@users.noreply.github.com> Date: Thu, 20 Jan 2022 22:41:45 +0300 Subject: [PATCH 172/215] Fixes editing of already selected item for array of objects editor (#9544) --- .../Connector/ServiceForm/components/Sections/ArraySection.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-webapp/src/views/Connector/ServiceForm/components/Sections/ArraySection.tsx b/airbyte-webapp/src/views/Connector/ServiceForm/components/Sections/ArraySection.tsx index a64add38b8943..941b3963f9ef8 100644 --- a/airbyte-webapp/src/views/Connector/ServiceForm/components/Sections/ArraySection.tsx +++ b/airbyte-webapp/src/views/Connector/ServiceForm/components/Sections/ArraySection.tsx @@ -45,7 +45,7 @@ export const ArraySection: React.FC<{ onStartEdit={(index) => addUnfinishedFlow(path, { id: index, - startValue: index < items.length ? items[index] : null, + startValue: index < items.length ? items : null, }) } onDone={() => removeUnfinishedFlow(path)} From 1aee597303aa5ff65aeeb07d02d37642566229aa Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 20 Jan 2022 11:47:22 -0800 Subject: [PATCH 173/215] temporarily disable async orchestrator integration test (#9662) --- .../process/AsyncOrchestratorPodProcessIntegrationTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airbyte-workers/src/test-integration/java/io/airbyte/workers/process/AsyncOrchestratorPodProcessIntegrationTest.java b/airbyte-workers/src/test-integration/java/io/airbyte/workers/process/AsyncOrchestratorPodProcessIntegrationTest.java index 49b648b302994..9a3f7c6fad57e 100644 --- a/airbyte-workers/src/test-integration/java/io/airbyte/workers/process/AsyncOrchestratorPodProcessIntegrationTest.java +++ b/airbyte-workers/src/test-integration/java/io/airbyte/workers/process/AsyncOrchestratorPodProcessIntegrationTest.java @@ -29,9 +29,11 @@ import org.apache.commons.lang3.RandomStringUtils; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import software.amazon.awssdk.services.s3.model.CreateBucketRequest; +@Disabled public class AsyncOrchestratorPodProcessIntegrationTest { private static KubernetesClient kubernetesClient; From 9fe804aac3e2998bfa38b4322bbf7213ec1f8508 Mon Sep 17 00:00:00 2001 From: Eugene Date: Thu, 20 Jan 2022 22:21:41 +0200 Subject: [PATCH 174/215] =?UTF-8?q?=F0=9F=8E=89=20Destination-snowflake:?= =?UTF-8?q?=20start=20using=20new=20S3StreamCopier,=20and=20expose=20the?= =?UTF-8?q?=20purgeStagingData=20option=20(#9531)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [8820] Destination-snowflake: start using new S3StreamCopier and expose the purgeStagingData option --- .../424892c4-daac-4491-b35d-c6688ba547ba.json | 2 +- .../seed/destination_definitions.yaml | 2 +- .../resources/seed/destination_specs.yaml | 10 ++- .../jdbc/copy/s3/S3CopyConfig.java | 5 ++ .../jdbc/copy/s3/S3StreamCopier.java | 11 +++ .../redshift/RedshiftCopyS3Destination.java | 2 +- .../redshift/RedshiftStreamCopier.java | 5 -- .../destination-snowflake/Dockerfile | 2 +- .../destination-snowflake/README.md | 2 +- .../snowflake/SnowflakeCopyS3Destination.java | 3 +- .../snowflake/SnowflakeS3StreamCopier.java | 55 ++++++++++-- .../SnowflakeS3StreamCopierFactory.java | 28 +++--- .../src/main/resources/spec.json | 7 ++ .../SnowflakeS3StreamCopierTest.java | 90 +++++++++++++++++++ docs/integrations/destinations/snowflake.md | 20 +++++ 15 files changed, 210 insertions(+), 34 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json index b90ab00a62c1c..acf0a7d031470 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "424892c4-daac-4491-b35d-c6688ba547ba", "name": "Snowflake", "dockerRepository": "airbyte/destination-snowflake", - "dockerImageTag": "0.4.2", + "dockerImageTag": "0.4.3", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/snowflake", "icon": "snowflake.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 6f120e18eca22..712996128cd71 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -179,7 +179,7 @@ - name: Snowflake destinationDefinitionId: 424892c4-daac-4491-b35d-c6688ba547ba dockerRepository: airbyte/destination-snowflake - dockerImageTag: 0.4.2 + dockerImageTag: 0.4.3 documentationUrl: https://docs.airbyte.io/integrations/destinations/snowflake icon: snowflake.svg - name: MariaDB ColumnStore diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 6f9cb6856d36c..dd34e8ef899b4 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -3786,7 +3786,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-snowflake:0.4.2" +- dockerImage: "airbyte/destination-snowflake:0.4.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" connectionSpecification: @@ -3974,6 +3974,14 @@ \ memory requirement. Modify this with care." title: "Stream Part Size" order: 5 + purge_staging_data: + title: "Purge Staging Files and Tables" + type: "boolean" + description: "Whether to delete the staging files from S3 after completing\ + \ the sync. See the docs for details. Only relevant for COPY. Defaults\ + \ to true." + default: true + order: 6 - title: "GCS Staging" additionalProperties: false description: "Writes large batches of records to a file, uploads the file\ diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3CopyConfig.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3CopyConfig.java index bfce8529dfceb..6b13de6f73c5f 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3CopyConfig.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3CopyConfig.java @@ -22,4 +22,9 @@ public static boolean shouldPurgeStagingData(final JsonNode config) { } } + public static S3CopyConfig getS3CopyConfig(final JsonNode config) { + return new S3CopyConfig(S3CopyConfig.shouldPurgeStagingData(config), + S3DestinationConfig.getS3DestinationConfig(config)); + } + } diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java index 5f7aef024cbd3..e6a2988b7b66a 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java @@ -5,6 +5,7 @@ package io.airbyte.integrations.destination.jdbc.copy.s3; import com.amazonaws.services.s3.AmazonS3; +import com.google.common.annotations.VisibleForTesting; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.destination.jdbc.SqlOperations; @@ -201,6 +202,16 @@ protected static String getFullS3Path(final String s3BucketName, final String s3 return String.join("/", "s3:/", s3BucketName, s3StagingFile); } + @VisibleForTesting + public String getTmpTableName() { + return tmpTableName; + } + + @VisibleForTesting + public Map getStagingWritersByFile() { + return stagingWritersByFile; + } + public abstract void copyS3CsvFileIntoTable(JdbcDatabase database, String s3FileLocation, String schema, diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftCopyS3Destination.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftCopyS3Destination.java index 1b5249d0ed7fa..8b8212bb8cf7e 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftCopyS3Destination.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftCopyS3Destination.java @@ -43,7 +43,7 @@ public AirbyteMessageConsumer getConsumer(final JsonNode config, getDatabase(config), getSqlOperations(), getNameTransformer(), - new S3CopyConfig(S3CopyConfig.shouldPurgeStagingData(config), getS3DestinationConfig(config)), + S3CopyConfig.getS3CopyConfig(config), catalog, new RedshiftStreamCopierFactory(), getConfiguredSchema(config)); diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStreamCopier.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStreamCopier.java index c296ddf1d1265..bd6e878d36835 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStreamCopier.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStreamCopier.java @@ -110,11 +110,6 @@ public void removeFileAndDropTmpTable() throws Exception { } } - @VisibleForTesting - String getTmpTableName() { - return tmpTableName; - } - /** * Creates the contents of a manifest file given the `s3StagingFiles`. There must be at least one * entry in a manifest file otherwise it is not considered valid for the COPY command. diff --git a/airbyte-integrations/connectors/destination-snowflake/Dockerfile b/airbyte-integrations/connectors/destination-snowflake/Dockerfile index f854615fb1c72..b0e293930d9cc 100644 --- a/airbyte-integrations/connectors/destination-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/destination-snowflake/Dockerfile @@ -18,5 +18,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.4.2 +LABEL io.airbyte.version=0.4.3 LABEL io.airbyte.name=airbyte/destination-snowflake diff --git a/airbyte-integrations/connectors/destination-snowflake/README.md b/airbyte-integrations/connectors/destination-snowflake/README.md index b21bda7163d7c..e48eaa79fc85f 100644 --- a/airbyte-integrations/connectors/destination-snowflake/README.md +++ b/airbyte-integrations/connectors/destination-snowflake/README.md @@ -22,7 +22,7 @@ Put the contents of the `Snowflake Integration Test Config` secret on Rippling under the `Engineering` folder into `secrets/config.json` to be able to run integration tests locally. 1. Put the contents of the `destination snowflake - insert test creds` LastPass secret into `secrets/insert_config.json`. -1. Put the contents of the `destination snowflake - insert staging test creds` secret into `insert_staging_config.json`. +1. Put the contents of the `destination snowflake - insert staging test creds` secret into `internal_staging_config.json`. 1. Put the contents of the `destination snowflake - gcs copy test creds` secret into `secrets/copy_gcs_config.json` 1. Put the contents of the `destination snowflake - s3 copy test creds` secret into `secrets/copy_s3_config.json` diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeCopyS3Destination.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeCopyS3Destination.java index 0e58e705c6690..53fd764e1227a 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeCopyS3Destination.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeCopyS3Destination.java @@ -11,6 +11,7 @@ import io.airbyte.integrations.destination.jdbc.SqlOperations; import io.airbyte.integrations.destination.jdbc.copy.CopyConsumerFactory; import io.airbyte.integrations.destination.jdbc.copy.CopyDestination; +import io.airbyte.integrations.destination.jdbc.copy.s3.S3CopyConfig; import io.airbyte.integrations.destination.s3.S3Destination; import io.airbyte.integrations.destination.s3.S3DestinationConfig; import io.airbyte.protocol.models.AirbyteMessage; @@ -28,7 +29,7 @@ public AirbyteMessageConsumer getConsumer(final JsonNode config, getDatabase(config), getSqlOperations(), getNameTransformer(), - getS3DestinationConfig(config), + S3CopyConfig.getS3CopyConfig(config.get("loading_method")), catalog, new SnowflakeS3StreamCopierFactory(), getConfiguredSchema(config)); diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java index e2f886c111527..d25e00a7675c7 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java @@ -5,26 +5,65 @@ package io.airbyte.integrations.destination.snowflake; import com.amazonaws.services.s3.AmazonS3; +import com.google.common.annotations.VisibleForTesting; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.destination.jdbc.SqlOperations; -import io.airbyte.integrations.destination.jdbc.copy.s3.LegacyS3StreamCopier; +import io.airbyte.integrations.destination.jdbc.copy.s3.S3CopyConfig; +import io.airbyte.integrations.destination.jdbc.copy.s3.S3StreamCopier; import io.airbyte.integrations.destination.s3.S3DestinationConfig; -import io.airbyte.protocol.models.DestinationSyncMode; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; import java.sql.SQLException; +import java.sql.Timestamp; +import java.time.Instant; -public class SnowflakeS3StreamCopier extends LegacyS3StreamCopier { +public class SnowflakeS3StreamCopier extends S3StreamCopier { + + // From https://docs.aws.amazon.com/redshift/latest/dg/t_loading-tables-from-s3.html + // "Split your load data files so that the files are about equal size, between 1 MB and 1 GB after + // compression" + public static final int MAX_PARTS_PER_FILE = 4; public SnowflakeS3StreamCopier(final String stagingFolder, - final DestinationSyncMode destSyncMode, final String schema, - final String streamName, final AmazonS3 client, final JdbcDatabase db, - final S3DestinationConfig s3Config, + final S3CopyConfig config, final ExtendedNameTransformer nameTransformer, - final SqlOperations sqlOperations) { - super(stagingFolder, destSyncMode, schema, streamName, client, db, s3Config, nameTransformer, sqlOperations); + final SqlOperations sqlOperations, + final ConfiguredAirbyteStream configuredAirbyteStream) { + this( + stagingFolder, + schema, + client, + db, + config, + nameTransformer, + sqlOperations, + Timestamp.from(Instant.now()), + configuredAirbyteStream); + } + + @VisibleForTesting + SnowflakeS3StreamCopier(final String stagingFolder, + final String schema, + final AmazonS3 client, + final JdbcDatabase db, + final S3CopyConfig config, + final ExtendedNameTransformer nameTransformer, + final SqlOperations sqlOperations, + final Timestamp uploadTime, + final ConfiguredAirbyteStream configuredAirbyteStream) { + super(stagingFolder, + schema, + client, + db, + config, + nameTransformer, + sqlOperations, + configuredAirbyteStream, + uploadTime, + MAX_PARTS_PER_FILE); } @Override diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierFactory.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierFactory.java index d2d9139af5182..52d203e660a34 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierFactory.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierFactory.java @@ -9,24 +9,24 @@ import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.destination.jdbc.SqlOperations; import io.airbyte.integrations.destination.jdbc.copy.StreamCopier; -import io.airbyte.integrations.destination.jdbc.copy.s3.LegacyS3StreamCopierFactory; -import io.airbyte.integrations.destination.s3.S3DestinationConfig; -import io.airbyte.protocol.models.DestinationSyncMode; +import io.airbyte.integrations.destination.jdbc.copy.s3.S3CopyConfig; +import io.airbyte.integrations.destination.jdbc.copy.s3.S3StreamCopierFactory; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; -public class SnowflakeS3StreamCopierFactory extends LegacyS3StreamCopierFactory { +public class SnowflakeS3StreamCopierFactory extends S3StreamCopierFactory { @Override - public StreamCopier create(final String stagingFolder, - final DestinationSyncMode syncMode, - final String schema, - final String streamName, - final AmazonS3 s3Client, - final JdbcDatabase db, - final S3DestinationConfig s3Config, - final ExtendedNameTransformer nameTransformer, - final SqlOperations sqlOperations) + protected StreamCopier create(final String stagingFolder, + final String schema, + final AmazonS3 s3Client, + final JdbcDatabase db, + final S3CopyConfig config, + final ExtendedNameTransformer nameTransformer, + final SqlOperations sqlOperations, + final ConfiguredAirbyteStream configuredStream) throws Exception { - return new SnowflakeS3StreamCopier(stagingFolder, syncMode, schema, streamName, s3Client, db, s3Config, nameTransformer, sqlOperations); + return new SnowflakeS3StreamCopier(stagingFolder, schema, s3Client, db, config, nameTransformer, + sqlOperations, configuredStream); } } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json index de555da19b948..914aee0d1aac1 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json @@ -178,6 +178,13 @@ "description": "Optional. Increase this if syncing tables larger than 100GB. Only relevant for COPY. Files are streamed to S3 in parts. This determines the size of each part, in MBs. As S3 has a limit of 10,000 parts per file, part size affects the table size. This is 10MB by default, resulting in a default limit of 100GB tables. Note, a larger part size will result in larger memory requirements. A rule of thumb is to multiply the part size by 10 to get the memory requirement. Modify this with care.", "title": "Stream Part Size", "order": 5 + }, + "purge_staging_data": { + "title": "Purge Staging Files and Tables", + "type": "boolean", + "description": "Whether to delete the staging files from S3 after completing the sync. See the docs for details. Only relevant for COPY. Defaults to true.", + "default": true, + "order": 6 } } }, diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java new file mode 100644 index 0000000000000..77913d82e6fa0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.snowflake; + +import static io.airbyte.integrations.destination.snowflake.SnowflakeS3StreamCopier.MAX_PARTS_PER_FILE; +import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; + +import com.amazonaws.services.s3.AmazonS3Client; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.destination.ExtendedNameTransformer; +import io.airbyte.integrations.destination.jdbc.SqlOperations; +import io.airbyte.integrations.destination.jdbc.copy.s3.S3CopyConfig; +import io.airbyte.integrations.destination.s3.S3DestinationConfig; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.DestinationSyncMode; +import java.sql.Timestamp; +import java.time.Instant; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class SnowflakeS3StreamCopierTest { + + private static final int PART_SIZE = 5; + + // equivalent to Thu, 09 Dec 2021 19:17:54 GMT + private static final Timestamp UPLOAD_TIME = Timestamp.from(Instant.ofEpochMilli(1639077474000L)); + + private AmazonS3Client s3Client; + private JdbcDatabase db; + private SqlOperations sqlOperations; + private SnowflakeS3StreamCopier copier; + + @BeforeEach + public void setup() { + s3Client = mock(AmazonS3Client.class, RETURNS_DEEP_STUBS); + db = mock(JdbcDatabase.class); + sqlOperations = mock(SqlOperations.class); + + copier = new SnowflakeS3StreamCopier( + // In reality, this is normally a UUID - see CopyConsumerFactory#createWriteConfigs + "fake-staging-folder", + "fake-schema", + s3Client, + db, + new S3CopyConfig( + true, + new S3DestinationConfig( + "fake-endpoint", + "fake-bucket", + "fake-bucketPath", + "fake-region", + "fake-access-key-id", + "fake-secret-access-key", + PART_SIZE, + null)), + new ExtendedNameTransformer(), + sqlOperations, + UPLOAD_TIME, + new ConfiguredAirbyteStream() + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withStream(new AirbyteStream() + .withName("fake-stream") + .withNamespace("fake-namespace"))); + } + + @Test + public void copiesCorrectFilesToTable() throws Exception { + // Generate two files + for (int i = 0; i < MAX_PARTS_PER_FILE + 1; i++) { + copier.prepareStagingFile(); + } + + copier.copyStagingFileToTemporaryTable(); + + for (String fileName : copier.getStagingWritersByFile().keySet()) { + verify(db).execute(String.format("COPY INTO fake-schema.%s FROM " + + "'s3://fake-bucket/%s'" + + " CREDENTIALS=(aws_key_id='fake-access-key-id' aws_secret_key='fake-secret-access-key') " + + "file_format = (type = csv field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"');", + copier.getTmpTableName(), fileName)); + } + + } + +} diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index 8c9885b71a862..fff0a73cf611b 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -160,6 +160,25 @@ Internal named stages are storage location objects within a Snowflake database/s For AWS S3, you will need to create a bucket and provide credentials to access the bucket. We recommend creating a bucket that is only used for Airbyte to stage data to Snowflake. Airbyte needs read/write access to interact with this bucket. +Provide the required S3 info. + +* **S3 Bucket Name** + * See [this](https://docs.aws.amazon.com/AmazonS3/latest/userguide/create-bucket-overview.html) to create an S3 bucket. +* **S3 Bucket Region** + * Place the S3 bucket and the Redshift cluster in the same region to save on networking costs. +* **Access Key Id** + * See [this](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) on how to generate an access key. + * We recommend creating an Airbyte-specific user. This user will require [read and write permissions](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_examples_s3_rw-bucket.html) to objects in the staging bucket. +* **Secret Access Key** + * Corresponding key to the above key id. +* **Part Size** + * Affects the size limit of an individual Redshift table. Optional. Increase this if syncing tables larger than 100GB. Files are streamed to S3 in parts. This determines the size of each part, in MBs. As S3 has a limit of 10,000 parts per file, part size affects the table size. This is 10MB by default, resulting in a default table limit of 100GB. Note, a larger part size will result in larger memory requirements. A rule of thumb is to multiply the part size by 10 to get the memory requirement. Modify this with care. + +Optional parameters: +* **Purge Staging Data** + * Whether to delete the staging files from S3 after completing the sync. Specifically, the connector will create CSV files named `bucketPath/namespace/streamName/syncDate_epochMillis_randomUuid.csv` containing three columns (`ab_id`, `data`, `emitted_at`). Normally these files are deleted after the `COPY` command completes; if you want to keep them for other purposes, set `purge_staging_data` to `false`. + + ### Google Cloud Storage \(GCS\) First you will need to create a GCS bucket. @@ -198,6 +217,7 @@ Finally, you need to add read/write permissions to your bucket with that email. | Version | Date | Pull Request | Subject | |:--------|:-----------| :----- | :------ | +| 0.4.3 | 2022-01-20 | [#9531](https://github.com/airbytehq/airbyte/pull/9531) | Start using new S3StreamCopier and expose the purgeStagingData option | | 0.4.2 | 2022-01-10 | [#9141](https://github.com/airbytehq/airbyte/pull/9141) | Fixed duplicate rows on retries | | 0.4.1 | 2021-01-06 | [#9311](https://github.com/airbytehq/airbyte/pull/9311) | Update сreating schema during check | | 0.4.0 | 2021-12-27 | [#9063](https://github.com/airbytehq/airbyte/pull/9063) | Updated normalization to produce permanent tables | From d2d0335dc06f4818bede48ec4b0c696bc377f020 Mon Sep 17 00:00:00 2001 From: Noah Kawasaki <68556134+noahkawasakigoogle@users.noreply.github.com> Date: Thu, 20 Jan 2022 13:09:57 -0800 Subject: [PATCH 175/215] Make sure generate.sh has docker running before trying to generate, and make the script always execute in the correct directory (#9649) --- .../connector-templates/generator/generate.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connector-templates/generator/generate.sh b/airbyte-integrations/connector-templates/generator/generate.sh index b7714d5ceb278..c3b7d263838eb 100755 --- a/airbyte-integrations/connector-templates/generator/generate.sh +++ b/airbyte-integrations/connector-templates/generator/generate.sh @@ -6,6 +6,17 @@ error_handler() { trap 'error_handler $LINENO' ERR set -e + +# Ensure script always runs from this directory because thats how docker build contexts work +cd "$(dirname "${0}")" || exit 1 + +# Make sure docker is running before trying +if ! docker ps; then + echo "docker is not running, this script requires docker to be up" + echo "please start up the docker daemon!" + exit +fi + _UID=$(id -u) _GID=$(id -g) # Remove container if already exist @@ -28,5 +39,3 @@ else fi echo "Finished running generator" - -exit 0 From a0079534fdb59fd41207a7bb1c960848ee327220 Mon Sep 17 00:00:00 2001 From: Parker Mossman Date: Thu, 20 Jan 2022 14:03:30 -0800 Subject: [PATCH 176/215] Add totalStats and streamStats in Attempts API response (#9583) * expose new stat fields in Attempts api response * remove extranneous import --- airbyte-api/src/main/openapi/config.yaml | 31 ++ .../server/converters/JobConverter.java | 48 ++- .../server/converters/JobConverterTest.java | 48 +++ .../api/generated-api-html/index.html | 298 ++++++++++++++++++ 4 files changed, 423 insertions(+), 2 deletions(-) diff --git a/airbyte-api/src/main/openapi/config.yaml b/airbyte-api/src/main/openapi/config.yaml index 602b45f2e897a..6086830250f4d 100644 --- a/airbyte-api/src/main/openapi/config.yaml +++ b/airbyte-api/src/main/openapi/config.yaml @@ -3114,6 +3114,37 @@ components: recordsSynced: type: integer format: int64 + totalStats: + $ref: "#/components/schemas/AttemptStats" + streamStats: + type: array + items: + $ref: "#/components/schemas/AttemptStreamStats" + AttemptStats: + type: object + properties: + recordsEmitted: + type: integer + format: int64 + bytesEmitted: + type: integer + format: int64 + stateMessagesEmitted: + type: integer + format: int64 + recordsCommitted: + type: integer + format: int64 + AttemptStreamStats: + type: object + required: + - streamName + - stats + properties: + streamName: + type: string + stats: + $ref: "#/components/schemas/AttemptStats" AttemptStatus: type: string enum: diff --git a/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java b/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java index 9841a41d16eb8..5c4f74b52e79c 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java +++ b/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java @@ -6,7 +6,9 @@ import io.airbyte.api.model.AttemptInfoRead; import io.airbyte.api.model.AttemptRead; +import io.airbyte.api.model.AttemptStats; import io.airbyte.api.model.AttemptStatus; +import io.airbyte.api.model.AttemptStreamStats; import io.airbyte.api.model.JobConfigType; import io.airbyte.api.model.JobInfoRead; import io.airbyte.api.model.JobRead; @@ -19,6 +21,8 @@ import io.airbyte.config.JobOutput; import io.airbyte.config.StandardSyncOutput; import io.airbyte.config.StandardSyncSummary; +import io.airbyte.config.StreamSyncStats; +import io.airbyte.config.SyncStats; import io.airbyte.config.helpers.LogClientSingleton; import io.airbyte.config.helpers.LogConfigs; import io.airbyte.scheduler.client.SynchronousJobMetadata; @@ -27,6 +31,8 @@ import io.airbyte.scheduler.models.Job; import java.io.IOException; import java.nio.file.Path; +import java.util.Collections; +import java.util.List; import java.util.stream.Collectors; public class JobConverter { @@ -72,21 +78,59 @@ public static AttemptRead getAttemptRead(final Attempt attempt) { return new AttemptRead() .id(attempt.getId()) .status(Enums.convertTo(attempt.getStatus(), AttemptStatus.class)) - .bytesSynced(attempt.getOutput() + .bytesSynced(attempt.getOutput() // TODO (parker) remove after frontend switches to totalStats .map(JobOutput::getSync) .map(StandardSyncOutput::getStandardSyncSummary) .map(StandardSyncSummary::getBytesSynced) .orElse(null)) - .recordsSynced(attempt.getOutput() + .recordsSynced(attempt.getOutput() // TODO (parker) remove after frontend switches to totalStats .map(JobOutput::getSync) .map(StandardSyncOutput::getStandardSyncSummary) .map(StandardSyncSummary::getRecordsSynced) .orElse(null)) + .totalStats(getTotalAttemptStats(attempt)) + .streamStats(getAttemptStreamStats(attempt)) .createdAt(attempt.getCreatedAtInSecond()) .updatedAt(attempt.getUpdatedAtInSecond()) .endedAt(attempt.getEndedAtInSecond().orElse(null)); } + public static AttemptStats getTotalAttemptStats(final Attempt attempt) { + final SyncStats totalStats = attempt.getOutput() + .map(JobOutput::getSync) + .map(StandardSyncOutput::getStandardSyncSummary) + .map(StandardSyncSummary::getTotalStats) + .orElse(null); + + if (totalStats == null) { + return null; + } + + return new AttemptStats() + .bytesEmitted(totalStats.getBytesEmitted()) + .recordsEmitted(totalStats.getRecordsEmitted()) + .stateMessagesEmitted(totalStats.getStateMessagesEmitted()) + .recordsCommitted(totalStats.getRecordsCommitted()); + } + + public static List getAttemptStreamStats(final Attempt attempt) { + final List streamStats = attempt.getOutput() + .map(JobOutput::getSync) + .map(StandardSyncOutput::getStandardSyncSummary) + .map(StandardSyncSummary::getStreamStats) + .orElse(Collections.emptyList()); + + return streamStats.stream() + .map(streamStat -> new AttemptStreamStats() + .streamName(streamStat.getStreamName()) + .stats(new AttemptStats() + .bytesEmitted(streamStat.getStats().getBytesEmitted()) + .recordsEmitted(streamStat.getStats().getRecordsEmitted()) + .stateMessagesEmitted(streamStat.getStats().getStateMessagesEmitted()) + .recordsCommitted(streamStat.getStats().getRecordsCommitted()))) + .collect(Collectors.toList()); + } + public LogRead getLogRead(final Path logPath) { try { return new LogRead().logLines(LogClientSingleton.getInstance().getJobLogFile(workerEnvironment, logConfigs, logPath)); diff --git a/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java b/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java index 7ed276081c53a..37a9d8cab50bc 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java @@ -12,6 +12,8 @@ import com.google.common.collect.Lists; import io.airbyte.api.model.AttemptInfoRead; import io.airbyte.api.model.AttemptRead; +import io.airbyte.api.model.AttemptStats; +import io.airbyte.api.model.AttemptStreamStats; import io.airbyte.api.model.JobConfigType; import io.airbyte.api.model.JobInfoRead; import io.airbyte.api.model.JobRead; @@ -21,6 +23,12 @@ import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobCheckConnectionConfig; import io.airbyte.config.JobConfig; +import io.airbyte.config.JobOutput; +import io.airbyte.config.JobOutput.OutputType; +import io.airbyte.config.StandardSyncOutput; +import io.airbyte.config.StandardSyncSummary; +import io.airbyte.config.StreamSyncStats; +import io.airbyte.config.SyncStats; import io.airbyte.config.helpers.LogConfigs; import io.airbyte.scheduler.models.Attempt; import io.airbyte.scheduler.models.AttemptStatus; @@ -46,6 +54,30 @@ class JobConverterTest { .withCheckConnection(new JobCheckConnectionConfig()); private static final Path LOG_PATH = Path.of("log_path"); private static final long CREATED_AT = System.currentTimeMillis() / 1000; + private static final long RECORDS_EMITTED = 15L; + private static final long BYTES_EMITTED = 100L; + private static final long RECORDS_COMMITTED = 10L; + private static final long STATE_MESSAGES_EMITTED = 2L; + private static final String STREAM_NAME = "stream1"; + + private static final JobOutput JOB_OUTPUT = new JobOutput() + .withOutputType(OutputType.SYNC) + .withSync(new StandardSyncOutput() + .withStandardSyncSummary(new StandardSyncSummary() + .withRecordsSynced(RECORDS_EMITTED) + .withBytesSynced(BYTES_EMITTED) + .withTotalStats(new SyncStats() + .withRecordsEmitted(RECORDS_EMITTED) + .withBytesEmitted(BYTES_EMITTED) + .withStateMessagesEmitted(STATE_MESSAGES_EMITTED) + .withRecordsCommitted(RECORDS_COMMITTED)) + .withStreamStats(Lists.newArrayList(new StreamSyncStats() + .withStreamName(STREAM_NAME) + .withStats(new SyncStats() + .withRecordsEmitted(RECORDS_EMITTED) + .withBytesEmitted(BYTES_EMITTED) + .withStateMessagesEmitted(STATE_MESSAGES_EMITTED) + .withRecordsCommitted(RECORDS_COMMITTED)))))); private JobConverter jobConverter; private Job job; @@ -63,6 +95,20 @@ class JobConverterTest { .attempt(new AttemptRead() .id(ATTEMPT_ID) .status(io.airbyte.api.model.AttemptStatus.RUNNING) + .recordsSynced(RECORDS_EMITTED) + .bytesSynced(BYTES_EMITTED) + .totalStats(new AttemptStats() + .recordsEmitted(RECORDS_EMITTED) + .bytesEmitted(BYTES_EMITTED) + .stateMessagesEmitted(STATE_MESSAGES_EMITTED) + .recordsCommitted(RECORDS_COMMITTED)) + .streamStats(Lists.newArrayList(new AttemptStreamStats() + .streamName(STREAM_NAME) + .stats(new AttemptStats() + .recordsEmitted(RECORDS_EMITTED) + .bytesEmitted(BYTES_EMITTED) + .stateMessagesEmitted(STATE_MESSAGES_EMITTED) + .recordsCommitted(RECORDS_COMMITTED)))) .updatedAt(CREATED_AT) .createdAt(CREATED_AT) .endedAt(CREATED_AT)) @@ -87,10 +133,12 @@ public void setUp() { when(job.getAttempts()).thenReturn(Lists.newArrayList(attempt)); when(attempt.getId()).thenReturn(ATTEMPT_ID); when(attempt.getStatus()).thenReturn(ATTEMPT_STATUS); + when(attempt.getOutput()).thenReturn(Optional.of(JOB_OUTPUT)); when(attempt.getLogPath()).thenReturn(LOG_PATH); when(attempt.getCreatedAtInSecond()).thenReturn(CREATED_AT); when(attempt.getUpdatedAtInSecond()).thenReturn(CREATED_AT); when(attempt.getEndedAtInSecond()).thenReturn(Optional.of(CREATED_AT)); + } @Test diff --git a/docs/reference/api/generated-api-html/index.html b/docs/reference/api/generated-api-html/index.html index bff6feaf45603..4e9ff250a3027 100644 --- a/docs/reference/api/generated-api-html/index.html +++ b/docs/reference/api/generated-api-html/index.html @@ -1023,9 +1023,32 @@

Example data

}, "attempts" : [ { "attempt" : { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 @@ -1035,9 +1058,32 @@

Example data

} }, { "attempt" : { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 @@ -1262,9 +1308,32 @@

Example data

}, "attempts" : [ { "attempt" : { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 @@ -1274,9 +1343,32 @@

Example data

} }, { "attempt" : { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 @@ -2845,9 +2937,32 @@

Example data

}, "attempts" : [ { "attempt" : { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 @@ -2857,9 +2972,32 @@

Example data

} }, { "attempt" : { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 @@ -2933,9 +3071,32 @@

Example data

}, "attempts" : [ { "attempt" : { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 @@ -2945,9 +3106,32 @@

Example data

} }, { "attempt" : { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 @@ -3021,16 +3205,62 @@

Example data

"updatedAt" : 1 }, "attempts" : [ { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 }, { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 @@ -3043,16 +3273,62 @@

Example data

"updatedAt" : 1 }, "attempts" : [ { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 }, { + "totalStats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, "createdAt" : 5, "bytesSynced" : 9, "endedAt" : 7, + "streamStats" : [ { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + }, { + "stats" : { + "stateMessagesEmitted" : 7, + "recordsCommitted" : 1, + "bytesEmitted" : 4, + "recordsEmitted" : 2 + }, + "streamName" : "streamName" + } ], "id" : 5, "recordsSynced" : 3, "updatedAt" : 2 @@ -7033,7 +7309,9 @@

Table of Contents

  • AirbyteStreamConfiguration -
  • AttemptInfoRead -
  • AttemptRead -
  • +
  • AttemptStats -
  • AttemptStatus -
  • +
  • AttemptStreamStats -
  • AuthSpecification -
  • CheckConnectionRead -
  • CheckOperationRead -
  • @@ -7213,6 +7491,18 @@

    AttemptRead - endedAt (optional)

    Long format: int64
    bytesSynced (optional)
    Long format: int64
    recordsSynced (optional)
    Long format: int64
    +
    totalStats (optional)
    +
    streamStats (optional)
    + + +
    +

    AttemptStats - Up

    +
    +
    +
    recordsEmitted (optional)
    Long format: int64
    +
    bytesEmitted (optional)
    Long format: int64
    +
    stateMessagesEmitted (optional)
    Long format: int64
    +
    recordsCommitted (optional)
    Long format: int64
    @@ -7221,6 +7511,14 @@

    AttemptStatus -

    +
    +

    AttemptStreamStats - Up

    +
    +
    +
    streamName
    +
    stats
    +
    +

    AuthSpecification - Up

    From 4c83ac1f16d4eb839eda0a2fdacddb867a70fcc0 Mon Sep 17 00:00:00 2001 From: Parker Mossman Date: Thu, 20 Jan 2022 14:21:52 -0800 Subject: [PATCH 177/215] Migration: add `failureSummary` column to `Attempts` table in jobs database (#9579) * Add migration to add failures column to Attempts table * rename column from failures to failureSummary * add missing copyright * update latest migration version in test --- .../airbyte/bootloader/BootloaderAppTest.java | 2 +- ...1__Add_failureSummary_col_to_Attempts.java | 33 ++++++++++++++++++ .../resources/jobs_database/schema_dump.txt | 1 + ...dd_failureSummary_col_to_AttemptsTest.java | 34 +++++++++++++++++++ 4 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 airbyte-db/lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_35_5_001__Add_failureSummary_col_to_Attempts.java create mode 100644 airbyte-db/lib/src/test/java/io/airbyte/db/instance/jobs/migrations/V0_35_5_001__Add_failureSummary_col_to_AttemptsTest.java diff --git a/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java b/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java index 9f3da91773721..18ccdf2fc1ff4 100644 --- a/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java +++ b/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java @@ -68,7 +68,7 @@ void testBootloaderAppBlankDb() throws Exception { container.getPassword(), container.getJdbcUrl()).getInitialized(); val jobsMigrator = new JobsDatabaseMigrator(jobDatabase, this.getClass().getName()); - assertEquals("0.29.15.001", jobsMigrator.getLatestMigration().getVersion().getVersion()); + assertEquals("0.35.5.001", jobsMigrator.getLatestMigration().getVersion().getVersion()); val configDatabase = new ConfigsDatabaseInstance( mockedConfigs.getConfigDatabaseUser(), diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_35_5_001__Add_failureSummary_col_to_Attempts.java b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_35_5_001__Add_failureSummary_col_to_Attempts.java new file mode 100644 index 0000000000000..6bb63405b6320 --- /dev/null +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/jobs/migrations/V0_35_5_001__Add_failureSummary_col_to_Attempts.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.db.instance.jobs.migrations; + +import org.flywaydb.core.api.migration.BaseJavaMigration; +import org.flywaydb.core.api.migration.Context; +import org.jooq.DSLContext; +import org.jooq.impl.DSL; +import org.jooq.impl.SQLDataType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class V0_35_5_001__Add_failureSummary_col_to_Attempts extends BaseJavaMigration { + + private static final Logger LOGGER = LoggerFactory.getLogger(V0_35_5_001__Add_failureSummary_col_to_Attempts.class); + + @Override + public void migrate(final Context context) throws Exception { + LOGGER.info("Running migration: {}", this.getClass().getSimpleName()); + + final DSLContext ctx = DSL.using(context.getConnection()); + addFailureSummaryColumn(ctx); + } + + public static void addFailureSummaryColumn(final DSLContext ctx) { + ctx.alterTable("attempts") + .addColumnIfNotExists(DSL.field("failure_summary", SQLDataType.JSONB.nullable(true))) + .execute(); + } + +} diff --git a/airbyte-db/lib/src/main/resources/jobs_database/schema_dump.txt b/airbyte-db/lib/src/main/resources/jobs_database/schema_dump.txt index 32657bdf7af1e..f15611ab47cb0 100644 --- a/airbyte-db/lib/src/main/resources/jobs_database/schema_dump.txt +++ b/airbyte-db/lib/src/main/resources/jobs_database/schema_dump.txt @@ -33,6 +33,7 @@ create table "public"."attempts"( "updated_at" timestamptz(35) null, "ended_at" timestamptz(35) null, "temporal_workflow_id" varchar(256) null, + "failure_summary" jsonb null, constraint "attempts_pkey" primary key ("id") ); diff --git a/airbyte-db/lib/src/test/java/io/airbyte/db/instance/jobs/migrations/V0_35_5_001__Add_failureSummary_col_to_AttemptsTest.java b/airbyte-db/lib/src/test/java/io/airbyte/db/instance/jobs/migrations/V0_35_5_001__Add_failureSummary_col_to_AttemptsTest.java new file mode 100644 index 0000000000000..94b79480346f8 --- /dev/null +++ b/airbyte-db/lib/src/test/java/io/airbyte/db/instance/jobs/migrations/V0_35_5_001__Add_failureSummary_col_to_AttemptsTest.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.db.instance.jobs.migrations; + +import io.airbyte.db.Database; +import io.airbyte.db.instance.jobs.AbstractJobsDatabaseTest; +import java.io.IOException; +import java.sql.SQLException; +import org.jooq.DSLContext; +import org.jooq.impl.DSL; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class V0_35_5_001__Add_failureSummary_col_to_AttemptsTest extends AbstractJobsDatabaseTest { + + @Test + public void test() throws SQLException, IOException { + final Database database = getDatabase(); + final DSLContext context = DSL.using(database.getDataSource().getConnection()); + Assertions.assertFalse(failureSummaryColumnExists(context)); + V0_35_5_001__Add_failureSummary_col_to_Attempts.addFailureSummaryColumn(context); + Assertions.assertTrue(failureSummaryColumnExists(context)); + } + + protected static boolean failureSummaryColumnExists(final DSLContext ctx) { + return ctx.fetchExists(DSL.select() + .from("information_schema.columns") + .where(DSL.field("table_name").eq("attempts") + .and(DSL.field("column_name").eq("failure_summary")))); + } + +} From 111131a193359027d0081de1290eb4bb846662ef Mon Sep 17 00:00:00 2001 From: Parker Mossman Date: Thu, 20 Jan 2022 14:42:33 -0800 Subject: [PATCH 178/215] change severity of state delta tracker exception and clarify messaging (#9657) --- .../workers/protocols/airbyte/AirbyteMessageTracker.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTracker.java b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTracker.java index 4d4f93ed6f0af..f020778e3e2bc 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTracker.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/AirbyteMessageTracker.java @@ -117,7 +117,9 @@ private void handleSourceEmittedState(final AirbyteStateMessage stateMessage) { stateDeltaTracker.addState(stateHash, streamToRunningCount); } } catch (final StateDeltaTrackerException e) { - log.error(e.getMessage(), e); + log.warn("The message tracker encountered an issue that prevents committed record counts from being reliably computed."); + log.warn("This only impacts metadata and does not indicate a problem with actual sync data."); + log.warn(e.getMessage(), e); unreliableCommittedCounts = true; } streamToRunningCount.clear(); @@ -134,7 +136,9 @@ private void handleDestinationEmittedState(final AirbyteStateMessage stateMessag stateDeltaTracker.commitStateHash(getStateHashCode(stateMessage)); } } catch (final StateDeltaTrackerException e) { - log.error(e.getMessage(), e); + log.warn("The message tracker encountered an issue that prevents committed record counts from being reliably computed."); + log.warn("This only impacts metadata and does not indicate a problem with actual sync data."); + log.warn(e.getMessage(), e); unreliableCommittedCounts = true; } } From 0bad09965024329cf47bda43b53989edcb3d20d2 Mon Sep 17 00:00:00 2001 From: Parker Mossman Date: Thu, 20 Jan 2022 18:31:51 -0800 Subject: [PATCH 179/215] use attemptNumber instead of attemptId where appropriate (#9671) --- .../scheduler/persistence/JobPersistence.java | 4 ++-- .../ConnectionManagerWorkflowImpl.java | 4 ++-- .../JobCreationAndStatusUpdateActivity.java | 4 ++-- ...obCreationAndStatusUpdateActivityImpl.java | 9 ++++----- ...obCreationAndStatusUpdateActivityTest.java | 19 ++++++++++--------- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java index 6abb06991082b..c2d8b46d9e05f 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java @@ -89,7 +89,7 @@ public interface JobPersistence { * will not be changed if it is already in a terminal state. * * @param jobId job id - * @param attemptNumber attempt id + * @param attemptNumber attempt number * @throws IOException exception due to interaction with persistence */ void failAttempt(long jobId, int attemptNumber) throws IOException; @@ -99,7 +99,7 @@ public interface JobPersistence { * is changed regardless of what state it is in. * * @param jobId job id - * @param attemptNumber attempt id + * @param attemptNumber attempt number * @throws IOException exception due to interaction with persistence */ void succeedAttempt(long jobId, int attemptNumber) throws IOException; diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java index 058bb927188d1..d6c53efdf9c7d 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java @@ -185,7 +185,7 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr private void reportSuccess(final ConnectionUpdaterInput connectionUpdaterInput) { jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput( maybeJobId.get(), - maybeAttemptId.get(), + connectionUpdaterInput.getAttemptNumber(), standardSyncOutput.orElse(null))); connectionUpdaterInput.setJobId(null); @@ -196,7 +196,7 @@ private void reportSuccess(final ConnectionUpdaterInput connectionUpdaterInput) private void reportFailure(final ConnectionUpdaterInput connectionUpdaterInput) { jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput( connectionUpdaterInput.getJobId(), - connectionUpdaterInput.getAttemptId())); + connectionUpdaterInput.getAttemptNumber())); final int maxAttempt = configFetchActivity.getMaxAttempt().getMaxAttempt(); final int attemptNumber = connectionUpdaterInput.getAttemptNumber(); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java index 385f952eb56a8..285f349d2cdcb 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java @@ -77,7 +77,7 @@ class AttemptCreationOutput { class JobSuccessInput { private long jobId; - private int attemptId; + private int attemptNumber; private StandardSyncOutput standardSyncOutput; } @@ -110,7 +110,7 @@ class JobFailureInput { class AttemptFailureInput { private long jobId; - private int attemptId; + private int attemptNumber; } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java index bfd122a8d8089..4e9150b8411c2 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java @@ -110,11 +110,11 @@ public void jobSuccess(final JobSuccessInput input) { try { if (input.getStandardSyncOutput() != null) { final JobOutput jobOutput = new JobOutput().withSync(input.getStandardSyncOutput()); - jobPersistence.writeOutput(input.getJobId(), input.getAttemptId(), jobOutput); + jobPersistence.writeOutput(input.getJobId(), input.getAttemptNumber(), jobOutput); } else { - log.warn("The job {} doesn't have an input for the attempt {}", input.getJobId(), input.getAttemptId()); + log.warn("The job {} doesn't have an input for attempt number {}", input.getJobId(), input.getAttemptNumber()); } - jobPersistence.succeedAttempt(input.getJobId(), input.getAttemptId()); + jobPersistence.succeedAttempt(input.getJobId(), input.getAttemptNumber()); final Job job = jobPersistence.getJob(input.getJobId()); jobNotifier.successJob(job); trackCompletion(job, JobStatus.SUCCEEDED); @@ -138,8 +138,7 @@ public void jobFailure(final JobFailureInput input) { @Override public void attemptFailure(final AttemptFailureInput input) { try { - jobPersistence.failAttempt(input.getJobId(), input.getAttemptId()); - final Job job = jobPersistence.getJob(input.getJobId()); + jobPersistence.failAttempt(input.getJobId(), input.getAttemptNumber()); } catch (final IOException e) { throw new RetryableException(e); } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java index 6cb059dee1187..1eb94fcef1676 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java @@ -72,6 +72,7 @@ public class JobCreationAndStatusUpdateActivityTest { private static final UUID CONNECTION_ID = UUID.randomUUID(); private static final long JOB_ID = 123L; private static final int ATTEMPT_ID = 321; + private static final int ATTEMPT_NUMBER = 2; private static final StandardSyncOutput standardSyncOutput = new StandardSyncOutput() .withStandardSyncSummary( new StandardSyncSummary() @@ -146,11 +147,11 @@ class Update { @Test public void setJobSuccess() throws IOException { - jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_ID, standardSyncOutput)); + jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_NUMBER, standardSyncOutput)); final JobOutput jobOutput = new JobOutput().withSync(standardSyncOutput); - Mockito.verify(mJobPersistence).writeOutput(JOB_ID, ATTEMPT_ID, jobOutput); - Mockito.verify(mJobPersistence).succeedAttempt(JOB_ID, ATTEMPT_ID); + Mockito.verify(mJobPersistence).writeOutput(JOB_ID, ATTEMPT_NUMBER, jobOutput); + Mockito.verify(mJobPersistence).succeedAttempt(JOB_ID, ATTEMPT_NUMBER); Mockito.verify(mJobNotifier).successJob(Mockito.any()); Mockito.verify(mJobtracker).trackSync(Mockito.any(), Mockito.eq(JobState.SUCCEEDED)); } @@ -158,9 +159,9 @@ public void setJobSuccess() throws IOException { @Test public void setJobSuccessWrapException() throws IOException { Mockito.doThrow(new IOException()) - .when(mJobPersistence).succeedAttempt(JOB_ID, ATTEMPT_ID); + .when(mJobPersistence).succeedAttempt(JOB_ID, ATTEMPT_NUMBER); - Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_ID, null))) + Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_NUMBER, null))) .isInstanceOf(RetryableException.class) .hasCauseInstanceOf(IOException.class); } @@ -185,17 +186,17 @@ public void setJobFailureWrapException() throws IOException { @Test public void setAttemptFailure() throws IOException { - jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_ID)); + jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_NUMBER)); - Mockito.verify(mJobPersistence).failAttempt(JOB_ID, ATTEMPT_ID); + Mockito.verify(mJobPersistence).failAttempt(JOB_ID, ATTEMPT_NUMBER); } @Test public void setAttemptFailureWrapException() throws IOException { Mockito.doThrow(new IOException()) - .when(mJobPersistence).failAttempt(JOB_ID, ATTEMPT_ID); + .when(mJobPersistence).failAttempt(JOB_ID, ATTEMPT_NUMBER); - Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_ID))) + Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_NUMBER))) .isInstanceOf(RetryableException.class) .hasCauseInstanceOf(IOException.class); } From f6f0c33cd1fdcbc4ccfa745b239150d6bc4e1f2a Mon Sep 17 00:00:00 2001 From: Augustin Date: Fri, 21 Jan 2022 09:14:08 +0100 Subject: [PATCH 180/215] =?UTF-8?q?=F0=9F=90=99=20octavia-cli:=20list=20co?= =?UTF-8?q?nnectors=20(#9546)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- octavia-cli/README.md | 9 +- octavia-cli/octavia_cli/entrypoint.py | 21 ++- octavia-cli/octavia_cli/list/__init__.py | 3 + octavia-cli/octavia_cli/list/commands.py | 48 ++++++ .../list/connectors_definitions.py | 121 ++++++++++++++ octavia-cli/unit_tests/test_entrypoint.py | 12 +- .../unit_tests/test_list/test_commands.py | 34 ++++ .../test_list/test_connectors_definitions.py | 151 ++++++++++++++++++ 8 files changed, 388 insertions(+), 11 deletions(-) create mode 100644 octavia-cli/octavia_cli/list/__init__.py create mode 100644 octavia-cli/octavia_cli/list/commands.py create mode 100644 octavia-cli/octavia_cli/list/connectors_definitions.py create mode 100644 octavia-cli/unit_tests/test_list/test_commands.py create mode 100644 octavia-cli/unit_tests/test_list/test_connectors_definitions.py diff --git a/octavia-cli/README.md b/octavia-cli/README.md index 3589ea1512f85..701d9a73f07ae 100644 --- a/octavia-cli/README.md +++ b/octavia-cli/README.md @@ -20,11 +20,11 @@ SUB_BUILD=OCTAVIA_CLI ./gradlew build #from the root of the repo ``` 2. Run the CLI from docker: ```bash -docker run octavia-cli:dev +docker run airbyte/octavia-cli:dev ```` 3. Create an `octavia` alias in your `.bashrc` or `.zshrc`: ````bash -echo 'alias octavia="docker run octavia-cli:dev"' >> ~/.zshrc +echo 'alias octavia="docker run airbyte/octavia-cli:dev"' >> ~/.zshrc source ~/.zshrc octavia ```` @@ -38,7 +38,8 @@ We welcome community contributions! | Date | Milestone | |------------|-------------------------------------| -| 2022-01-06 | Generate an API Python client from our Open API spec | +| 2022-01-17 | Implement `octavia list connectors source` and `octavia list connectors destinations`| +| 2022-01-17 | Generate an API Python client from our Open API spec | | 2021-12-22 | Bootstrapping the project's code base | # Developing locally @@ -48,7 +49,7 @@ We welcome community contributions! 4. Install dev dependencies: `pip install -e .\[dev\]` 5. Install `pre-commit` hooks: `pre-commit install` 6. Run the test suite: `pytest --cov=octavia_cli unit_tests` -7. Iterate; please check the [Contributing](#contributing) for instructions on contributing. +7. Iterate: please check the [Contributing](#contributing) for instructions on contributing. # Contributing 1. Please sign up to [Airbyte's Slack workspace](https://slack.airbyte.io/) and join the `#octavia-cli`. We'll sync up community efforts in this channel. diff --git a/octavia-cli/octavia_cli/entrypoint.py b/octavia-cli/octavia_cli/entrypoint.py index b0de9afb55171..819f75bef0cd1 100644 --- a/octavia-cli/octavia_cli/entrypoint.py +++ b/octavia-cli/octavia_cli/entrypoint.py @@ -2,10 +2,16 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # +from typing import List + import airbyte_api_client import click from airbyte_api_client.api import workspace_api +from .list import commands as list_commands + +AVAILABLE_COMMANDS: List[click.Command] = [list_commands._list] + @click.group() @click.option("--airbyte-url", envvar="AIRBYTE_URL", default="http://localhost:8000", help="The URL of your Airbyte instance.") @@ -27,14 +33,14 @@ def octavia(ctx: click.Context, airbyte_url: str) -> None: ctx.obj["WORKSPACE_ID"] = workspace_id -@octavia.command(help="Scaffolds a local project directories.") -def init() -> None: - raise click.ClickException("The init command is not yet implemented.") +def add_commands_to_octavia(): + for command in AVAILABLE_COMMANDS: + octavia.add_command(command) -@octavia.command(name="list", help="List existing resources on the Airbyte instance.") -def _list() -> None: - raise click.ClickException("The list command is not yet implemented.") +@octavia.command(help="Scaffolds a local project directories.") +def init(): + raise click.ClickException("The init command is not yet implemented.") @octavia.command(name="import", help="Import an existing resources from the Airbyte instance.") @@ -55,3 +61,6 @@ def apply() -> None: @octavia.command(help="Delete resources") def delete() -> None: raise click.ClickException("The delete command is not yet implemented.") + + +add_commands_to_octavia() diff --git a/octavia-cli/octavia_cli/list/__init__.py b/octavia-cli/octavia_cli/list/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/octavia-cli/octavia_cli/list/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/octavia-cli/octavia_cli/list/commands.py b/octavia-cli/octavia_cli/list/commands.py new file mode 100644 index 0000000000000..1eac6a5303086 --- /dev/null +++ b/octavia-cli/octavia_cli/list/commands.py @@ -0,0 +1,48 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from typing import List + +import click + +from .connectors_definitions import DestinationConnectorsDefinitions, SourceConnectorsDefinitions + + +@click.group("list", help="List existing Airbyte resources.") +@click.pass_context +def _list(ctx: click.Context): # pragma: no cover + pass + + +@click.group("connectors", help="Latest information on supported sources and destinations connectors.") +@click.pass_context +def connectors(ctx: click.Context): # pragma: no cover + pass + + +@connectors.command(help="Latest information on supported sources.") +@click.pass_context +def sources(ctx: click.Context): + api_client = ctx.obj["API_CLIENT"] + definitions = SourceConnectorsDefinitions(api_client) + click.echo(definitions) + + +@connectors.command(help="Latest information on supported destinations.") +@click.pass_context +def destinations(ctx: click.Context): + api_client = ctx.obj["API_CLIENT"] + definitions = DestinationConnectorsDefinitions(api_client) + click.echo(definitions) + + +AVAILABLE_COMMANDS: List[click.Command] = [connectors] + + +def add_commands_to_list(): + for command in AVAILABLE_COMMANDS: + _list.add_command(command) + + +add_commands_to_list() diff --git a/octavia-cli/octavia_cli/list/connectors_definitions.py b/octavia-cli/octavia_cli/list/connectors_definitions.py new file mode 100644 index 0000000000000..4820adf905e3a --- /dev/null +++ b/octavia-cli/octavia_cli/list/connectors_definitions.py @@ -0,0 +1,121 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import abc +from enum import Enum +from typing import Callable, List, Union + +import airbyte_api_client +from airbyte_api_client.api import destination_definition_api, source_definition_api + + +class DefinitionType(Enum): + SOURCE = "source" + DESTINATION = "destination" + + +class ConnectorsDefinitions(abc.ABC): + LIST_LATEST_DEFINITIONS_KWARGS = {"_check_return_type": False} + + @property + @abc.abstractmethod + def api( + self, + ) -> Union[source_definition_api.SourceDefinitionApi, destination_definition_api.DestinationDefinitionApi]: # pragma: no cover + pass + + def __init__(self, definition_type: DefinitionType, api_client: airbyte_api_client.ApiClient, list_latest_definitions: Callable): + self.definition_type = definition_type + self.api_instance = self.api(api_client) + self.list_latest_definitions = list_latest_definitions + + @property + def fields_to_display(self) -> List[str]: + return ["name", "dockerRepository", "dockerImageTag", f"{self.definition_type.value}DefinitionId"] + + @property + def response_definition_list_field(self) -> str: + return f"{self.definition_type.value}_definitions" + + def _parse_response(self, api_response) -> List[List[str]]: + definitions = [ + [definition[field] for field in self.fields_to_display] for definition in api_response[self.response_definition_list_field] + ] + return definitions + + @property + def latest_definitions(self) -> List[List[str]]: + api_response = self.list_latest_definitions(self.api_instance, **self.LIST_LATEST_DEFINITIONS_KWARGS) + return self._parse_response(api_response) + + # TODO alafanechere: declare in a specific formatting module because it will probably be reused + @staticmethod + def _compute_col_width(data: List[List[str]], padding: int = 2) -> int: + """Compute column width for display purposes: + Find largest column size, add a padding of two characters. + Returns: + data (List[List[str]]): Tabular data containing rows and columns. + padding (int): Number of character to adds to create space between columns. + Returns: + col_width (int): The computed column width according to input data. + """ + col_width = max(len(col) for row in data for col in row) + padding + return col_width + + # TODO alafanechere: declare in a specific formatting module because it will probably be reused + @staticmethod + def _camelcased_to_uppercased_spaced(camelcased: str) -> str: + """Util function to transform a camelCase string to a UPPERCASED SPACED string + e.g: dockerImageName -> DOCKER IMAGE NAME + Args: + camelcased (str): The camel cased string to convert. + + Returns: + (str): The converted UPPERCASED SPACED string + """ + return "".join(map(lambda x: x if x.islower() else " " + x, camelcased)).upper() + + # TODO alafanechere: declare in a specific formatting module because it will probably be reused + @staticmethod + def _display_as_table(data: List[List[str]]) -> str: + """Formats tabular input data into a displayable table with columns. + Args: + data (List[List[str]]): Tabular data containing rows and columns. + Returns: + table (str): String representation of input tabular data. + """ + col_width = ConnectorsDefinitions._compute_col_width(data) + table = "\n".join(["".join(col.ljust(col_width) for col in row) for row in data]) + return table + + # TODO alafanechere: declare in a specific formatting module because it will probably be reused + @staticmethod + def _format_column_names(camelcased_column_names: List[str]) -> List[str]: + """Format camel cased column names to uppercased spaced column names + + Args: + camelcased_column_names (List[str]): Column names in camel case. + + Returns: + (List[str]): Column names in uppercase with spaces. + """ + return [ConnectorsDefinitions._camelcased_to_uppercased_spaced(column_name) for column_name in camelcased_column_names] + + def __repr__(self): + definitions = [self._format_column_names(self.fields_to_display)] + self.latest_definitions + return self._display_as_table(definitions) + + +class SourceConnectorsDefinitions(ConnectorsDefinitions): + api = source_definition_api.SourceDefinitionApi + + def __init__(self, api_client: airbyte_api_client.ApiClient): + super().__init__(DefinitionType.SOURCE, api_client, self.api.list_latest_source_definitions) + + +class DestinationConnectorsDefinitions(ConnectorsDefinitions): + api = destination_definition_api.DestinationDefinitionApi + + def __init__(self, api_client: airbyte_api_client.ApiClient): + super().__init__(DefinitionType.DESTINATION, api_client, self.api.list_latest_destination_definitions) diff --git a/octavia-cli/unit_tests/test_entrypoint.py b/octavia-cli/unit_tests/test_entrypoint.py index 50d5ad68af118..00531bfaf16ef 100644 --- a/octavia-cli/unit_tests/test_entrypoint.py +++ b/octavia-cli/unit_tests/test_entrypoint.py @@ -36,12 +36,22 @@ def test_octavia(mocker): assert result.exit_code == 0 +def test_commands_in_octavia_group(): + octavia_commands = entrypoint.octavia.commands.values() + for command in entrypoint.AVAILABLE_COMMANDS: + assert command in octavia_commands + + @pytest.mark.parametrize( "command", - [entrypoint.init, entrypoint.apply, entrypoint.create, entrypoint.delete, entrypoint._list, entrypoint._import], + [entrypoint.init, entrypoint.apply, entrypoint.create, entrypoint.delete, entrypoint._import], ) def test_not_implemented_commands(command): runner = CliRunner() result = runner.invoke(command) assert result.exit_code == 1 assert result.output.endswith("not yet implemented.\n") + + +def test_available_commands(): + assert entrypoint.AVAILABLE_COMMANDS == [entrypoint.list_commands._list] diff --git a/octavia-cli/unit_tests/test_list/test_commands.py b/octavia-cli/unit_tests/test_list/test_commands.py new file mode 100644 index 0000000000000..9547ae2914a03 --- /dev/null +++ b/octavia-cli/unit_tests/test_list/test_commands.py @@ -0,0 +1,34 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from click.testing import CliRunner +from octavia_cli.list import commands + + +def test_available_commands(): + assert commands.AVAILABLE_COMMANDS == [commands.connectors] + + +def test_commands_in_list_group(): + list_commands = commands._list.commands.values() + for command in commands.AVAILABLE_COMMANDS: + assert command in list_commands + + +def test_connectors_sources(mocker): + mocker.patch.object(commands, "SourceConnectorsDefinitions", mocker.Mock(return_value="SourceConnectorsDefinitionsRepr")) + context_object = {"API_CLIENT": mocker.Mock()} + runner = CliRunner() + result = runner.invoke((commands.sources), obj=context_object) + commands.SourceConnectorsDefinitions.assert_called_with(context_object["API_CLIENT"]) + assert result.output == "SourceConnectorsDefinitionsRepr\n" + + +def test_connectors_destinations(mocker): + mocker.patch.object(commands, "DestinationConnectorsDefinitions", mocker.Mock(return_value="DestinationConnectorsDefinitionsRepr")) + context_object = {"API_CLIENT": mocker.Mock()} + runner = CliRunner() + result = runner.invoke((commands.destinations), obj=context_object) + commands.DestinationConnectorsDefinitions.assert_called_with(context_object["API_CLIENT"]) + assert result.output == "DestinationConnectorsDefinitionsRepr\n" diff --git a/octavia-cli/unit_tests/test_list/test_connectors_definitions.py b/octavia-cli/unit_tests/test_list/test_connectors_definitions.py new file mode 100644 index 0000000000000..0a7687b2b72e0 --- /dev/null +++ b/octavia-cli/unit_tests/test_list/test_connectors_definitions.py @@ -0,0 +1,151 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import pytest +from airbyte_api_client.api import destination_definition_api, source_definition_api +from octavia_cli.list.connectors_definitions import ( + ConnectorsDefinitions, + DefinitionType, + DestinationConnectorsDefinitions, + SourceConnectorsDefinitions, +) + + +def test_definition_type(): + assert [definition_type.value for definition_type in DefinitionType] == ["source", "destination"] + + +class TestConnectorsDefinitions: + @pytest.fixture + def mock_api(self, mocker): + return mocker.Mock() + + @pytest.fixture + def patch_base_class(self, mocker, mock_api): + # Mock abstract methods to enable instantiating abstract class + mocker.patch.object(ConnectorsDefinitions, "api", mock_api) + mocker.patch.object(ConnectorsDefinitions, "__abstractmethods__", set()) + + @pytest.fixture + def connectors_definitions_mock_args(self, mocker): + return (mocker.Mock(value="my_definition_type"), mocker.Mock(), mocker.Mock()) + + def test_init(self, patch_base_class, mock_api, connectors_definitions_mock_args): + mock_definition_type, mock_api_client, mock_list_latest_definitions = connectors_definitions_mock_args + definitions = ConnectorsDefinitions(*connectors_definitions_mock_args) + assert definitions.definition_type == mock_definition_type + mock_api.assert_called_with(mock_api_client) + assert definitions.api_instance == mock_api.return_value + assert definitions.list_latest_definitions == mock_list_latest_definitions + + def test_abstract_methods(self, connectors_definitions_mock_args): + assert ConnectorsDefinitions.__abstractmethods__ == {"api"} + with pytest.raises(TypeError): + ConnectorsDefinitions(*connectors_definitions_mock_args) + + def test_fields_to_display(self, patch_base_class, connectors_definitions_mock_args): + definitions = ConnectorsDefinitions(*connectors_definitions_mock_args) + expected_field_to_display = ["name", "dockerRepository", "dockerImageTag", "my_definition_typeDefinitionId"] + assert definitions.fields_to_display == expected_field_to_display + + def test_response_definition_list_field(self, patch_base_class, connectors_definitions_mock_args): + definitions = ConnectorsDefinitions(*connectors_definitions_mock_args) + expected_response_definition_list_field = "my_definition_type_definitions" + assert definitions.response_definition_list_field == expected_response_definition_list_field + + def test_parse_response(self, patch_base_class, connectors_definitions_mock_args): + definitions = ConnectorsDefinitions(*connectors_definitions_mock_args) + api_response = {definitions.response_definition_list_field: []} + for i in range(5): + definition = {field: f"{field}_value_{i}" for field in definitions.fields_to_display} + definition["discarded_field"] = "discarded_value" + api_response[definitions.response_definition_list_field].append(definition) + parsed_definitions = definitions._parse_response(api_response) + assert len(parsed_definitions) == 5 + for i in range(5): + assert parsed_definitions[i] == [f"{field}_value_{i}" for field in definitions.fields_to_display] + assert "discarded_value" not in parsed_definitions[i] + + def test_latest_definitions(self, patch_base_class, mocker, connectors_definitions_mock_args): + mock_list_latest_definitions = connectors_definitions_mock_args[-1] + mocker.patch.object(ConnectorsDefinitions, "_parse_response") + definitions = ConnectorsDefinitions(*connectors_definitions_mock_args) + assert definitions.latest_definitions == definitions._parse_response.return_value + mock_list_latest_definitions.assert_called_with(definitions.api_instance, **definitions.LIST_LATEST_DEFINITIONS_KWARGS) + definitions._parse_response.assert_called_with(mock_list_latest_definitions.return_value) + + def test_repr(self, patch_base_class, mocker, connectors_definitions_mock_args): + headers = ["fieldA", "fieldB", "fieldC"] + latest_definitions = [["a", "b", "c"]] + mocker.patch.object(ConnectorsDefinitions, "fields_to_display", headers) + mocker.patch.object(ConnectorsDefinitions, "latest_definitions", latest_definitions) + mocker.patch.object(ConnectorsDefinitions, "_display_as_table") + mocker.patch.object(ConnectorsDefinitions, "_format_column_names") + + definitions = ConnectorsDefinitions(*connectors_definitions_mock_args) + representation = definitions.__repr__() + definitions._display_as_table.assert_called_with([definitions._format_column_names.return_value] + latest_definitions) + assert representation == definitions._display_as_table.return_value + + @pytest.mark.parametrize( + "test_data,padding,expected_col_width", + [([["a", "___10chars"], ["e", "f"]], 2, 2 + 10), ([["a", "___10chars"], ["e", "____11chars"]], 2, 2 + 11), ([[""]], 2, 2)], + ) + def test_compute_col_width(self, test_data, padding, expected_col_width): + col_width = ConnectorsDefinitions._compute_col_width(test_data, padding) + assert col_width == expected_col_width + + @pytest.mark.parametrize( + "test_data,col_width,expected_output", + [ + ([["a", "___10chars"], ["e", "____11chars"]], 13, "a ___10chars \ne ____11chars "), + ], + ) + def test_display_as_table(self, mocker, test_data, col_width, expected_output): + mocker.patch.object(ConnectorsDefinitions, "_compute_col_width", mocker.Mock(return_value=col_width)) + assert ConnectorsDefinitions._display_as_table(test_data) == expected_output + + @pytest.mark.parametrize("input_camelcased,expected_output", [("camelCased", "CAMEL CASED"), ("notcamelcased", "NOTCAMELCASED")]) + def test_camelcased_to_uppercased_spaced(self, input_camelcased, expected_output): + assert ConnectorsDefinitions._camelcased_to_uppercased_spaced(input_camelcased) == expected_output + + def test_format_column_names(self, mocker): + columns_to_format = ["camelCased"] + formatted_columns = ConnectorsDefinitions._format_column_names(columns_to_format) + assert len(formatted_columns) == 1 + for i, c in enumerate(formatted_columns): + assert c == ConnectorsDefinitions._camelcased_to_uppercased_spaced(columns_to_format[i]) + + +class TestSubConnectorsDefinitions: + @pytest.fixture + def mock_api_client(self, mocker): + return mocker.Mock() + + @pytest.mark.parametrize( + "definition_type,SubDefinitionClass,list_latest_definitions", + [ + (DefinitionType.SOURCE, SourceConnectorsDefinitions, source_definition_api.SourceDefinitionApi.list_latest_source_definitions), + ( + DefinitionType.DESTINATION, + DestinationConnectorsDefinitions, + destination_definition_api.DestinationDefinitionApi.list_latest_destination_definitions, + ), + ], + ) + def test_init(self, mocker, mock_api_client, definition_type, SubDefinitionClass, list_latest_definitions): + definitions_init = mocker.Mock() + mocker.patch.object(ConnectorsDefinitions, "__init__", definitions_init) + SubDefinitionClass(mock_api_client) + definitions_init.assert_called_with(definition_type, mock_api_client, list_latest_definitions) + + @pytest.mark.parametrize( + "SubDefinitionClass,expected_api", + [ + (SourceConnectorsDefinitions, source_definition_api.SourceDefinitionApi), + (DestinationConnectorsDefinitions, destination_definition_api.DestinationDefinitionApi), + ], + ) + def test_class_attributes(self, SubDefinitionClass, expected_api): + assert SubDefinitionClass.api == expected_api From 3dac7aef426351be5de64f5eda73fd760600d12f Mon Sep 17 00:00:00 2001 From: Artem Astapenko <3767150+Jamakase@users.noreply.github.com> Date: Fri, 21 Jan 2022 12:05:50 +0300 Subject: [PATCH 181/215] Fix demo link in onboarding (#9558) * Add demo link to config * Fix demo link in onboarding --- airbyte-webapp/src/config/uiConfig.ts | 2 + .../OnboardingPage/components/FinalStep.tsx | 4 +- .../components/VideoItem/VideoItem.tsx | 19 ++++++---- .../VideoItem/components/PlayButton.tsx | 37 +++++++++++++------ .../OnboardingPage/components/WelcomeStep.tsx | 5 ++- 5 files changed, 45 insertions(+), 22 deletions(-) diff --git a/airbyte-webapp/src/config/uiConfig.ts b/airbyte-webapp/src/config/uiConfig.ts index 85ef70569594d..6b9b3555bf095 100644 --- a/airbyte-webapp/src/config/uiConfig.ts +++ b/airbyte-webapp/src/config/uiConfig.ts @@ -15,6 +15,7 @@ type UiConfig = { technicalSupport: string; statusLink: string; recipesLink: string; + demoLink: string; }; const uiConfig: UiConfig = { @@ -32,6 +33,7 @@ const uiConfig: UiConfig = { tutorialLink: "https://www.youtube.com/watch?v=Rcpt5SVsMpk&feature=emb_logo", statusLink: "https://status.airbyte.io/", recipesLink: "https://airbyte.io/recipes", + demoLink: "https://demo.airbyte.io", }; export type { UiConfig }; diff --git a/airbyte-webapp/src/pages/OnboardingPage/components/FinalStep.tsx b/airbyte-webapp/src/pages/OnboardingPage/components/FinalStep.tsx index b78904a27b883..bc756da49090c 100644 --- a/airbyte-webapp/src/pages/OnboardingPage/components/FinalStep.tsx +++ b/airbyte-webapp/src/pages/OnboardingPage/components/FinalStep.tsx @@ -13,6 +13,7 @@ import SyncCompletedModal from "views/Feedback/SyncCompletedModal"; import { useOnboardingService } from "hooks/services/Onboarding/OnboardingService"; import Status from "core/statuses"; import useWorkspace from "hooks/services/useWorkspace"; +import { useConfig } from "config"; type FinalStepProps = { connectionId: string; @@ -35,6 +36,7 @@ const Videos = styled.div` `; const FinalStep: React.FC = ({ connectionId, onSync }) => { + const config = useConfig(); const { sendFeedback } = useWorkspace(); const { feedbackPassed, @@ -81,7 +83,7 @@ const FinalStep: React.FC = ({ connectionId, onSync }) => { } - videoId="sKDviQrOAbU" + link={config.ui.demoLink} img="/videoCover.png" /> diff --git a/airbyte-webapp/src/pages/OnboardingPage/components/VideoItem/VideoItem.tsx b/airbyte-webapp/src/pages/OnboardingPage/components/VideoItem/VideoItem.tsx index a112c1de191b8..030ea26c32bb1 100644 --- a/airbyte-webapp/src/pages/OnboardingPage/components/VideoItem/VideoItem.tsx +++ b/airbyte-webapp/src/pages/OnboardingPage/components/VideoItem/VideoItem.tsx @@ -7,12 +7,14 @@ import PlayButton from "./components/PlayButton"; type VideoItemProps = { small?: boolean; videoId?: string; + link?: string; img?: string; description?: React.ReactNode; }; const Content = styled.div<{ small?: boolean }>` width: ${({ small }) => (small ? 158 : 317)}px; + text-decoration: none; `; const VideoBlock = styled.div<{ small?: boolean }>` @@ -81,21 +83,22 @@ const VideoItem: React.FC = ({ small, videoId, img, + link, }) => { const [isVideoOpen, setIsVideoOpen] = useState(false); + const onOpenVideo = () => videoId && setIsVideoOpen(true); + const isLink = !!link && !videoId; + + const contentProps = isLink ? { href: link, target: "_blanc" } : {}; return ( - + - setIsVideoOpen(true)} - > - setIsVideoOpen(true)} /> + + - setIsVideoOpen(true)}> + {description} {isVideoOpen ? ( diff --git a/airbyte-webapp/src/pages/OnboardingPage/components/VideoItem/components/PlayButton.tsx b/airbyte-webapp/src/pages/OnboardingPage/components/VideoItem/components/PlayButton.tsx index 4770d5464cb23..1c75771706c86 100644 --- a/airbyte-webapp/src/pages/OnboardingPage/components/VideoItem/components/PlayButton.tsx +++ b/airbyte-webapp/src/pages/OnboardingPage/components/VideoItem/components/PlayButton.tsx @@ -4,6 +4,7 @@ import styled, { keyframes } from "styled-components"; type PlayButtonProps = { small?: boolean; onClick: () => void; + isLink?: boolean; }; export const BigCircleAnimation = keyframes` @@ -45,23 +46,35 @@ const MainCircle = styled.div` width: ${({ small }) => (small ? 42 : 85)}px; border-radius: 50%; background: ${({ theme }) => theme.primaryColor}; - padding: ${({ small }) => (small ? "10px 0 0 16px" : "20px 0 0 32px")}; + padding: ${({ small, isLink }) => + isLink ? "0" : small ? "10px 0 0 16px" : "20px 0 0 32px"}; box-shadow: 0 2.4px 4.8px ${({ theme }) => theme.cardShadowColor}, 0 16.2px 7.2px -10.2px ${({ theme }) => theme.cardShadowColor}; + display: ${({ isLink }) => (isLink ? "flex" : "block")}; + justify-content: center; + align-items: center; - &:hover { - display: flex; + & div { + display: ${({ isLink }) => (isLink ? "flex" : "none")}; justify-content: center; align-items: center; + } + + &:hover { + display: flex; padding: 0; & > img { display: none; } & div { + animation-direction: alternate; + animation-duration: 0.5s; + animation-timing-function: linear; + animation-iteration-count: infinite; + animation-delay: 0s; + display: flex; - justify-content: center; - align-items: center; } } `; @@ -72,7 +85,7 @@ const BigCircle = styled.div<{ small?: boolean }>` border-radius: 50%; background: rgba(255, 255, 255, 0.5); display: none; - animation: ${BigCircleAnimation} alternate 0.5s linear 0s infinite; + animation-name: ${BigCircleAnimation}; `; const MiddleCircle = styled(BigCircle)` @@ -87,13 +100,13 @@ const SmallCircle = styled(BigCircle)` animation-name: ${SmallCircleAnimation}; `; -const PlayButton: React.FC = ({ small, onClick }) => { +const PlayButton: React.FC = ({ small, onClick, isLink }) => { return ( - - play - - - + + {!isLink && play} + + + diff --git a/airbyte-webapp/src/pages/OnboardingPage/components/WelcomeStep.tsx b/airbyte-webapp/src/pages/OnboardingPage/components/WelcomeStep.tsx index ec47b07535049..831bfc80ab0c9 100644 --- a/airbyte-webapp/src/pages/OnboardingPage/components/WelcomeStep.tsx +++ b/airbyte-webapp/src/pages/OnboardingPage/components/WelcomeStep.tsx @@ -6,6 +6,7 @@ import TitlesBlock from "./TitlesBlock"; import HighlightedText from "./HighlightedText"; import VideoItem from "./VideoItem"; import { BigButton } from "components/CenteredPageComponents"; +import { useConfig } from "config"; type WelcomeStepProps = { onSubmit: () => void; @@ -23,6 +24,8 @@ const Videos = styled.div` `; const WelcomeStep: React.FC = ({ userName, onSubmit }) => { + const config = useConfig(); + return ( <> = ({ userName, onSubmit }) => { } - videoId="sKDviQrOAbU" img="/videoCover.png" + link={config.ui.demoLink} /> From 16eb06439be72c3fa3b11d662ca877ebc425a6ab Mon Sep 17 00:00:00 2001 From: Artem Astapenko <3767150+Jamakase@users.noreply.github.com> Date: Fri, 21 Jan 2022 12:06:32 +0300 Subject: [PATCH 182/215] Hide for now settings/configuration (#9559) --- .../cloud/views/settings/CloudSettingsPage.tsx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/airbyte-webapp/src/packages/cloud/views/settings/CloudSettingsPage.tsx b/airbyte-webapp/src/packages/cloud/views/settings/CloudSettingsPage.tsx index 57ef1e568cfe7..9d04131f8458a 100644 --- a/airbyte-webapp/src/packages/cloud/views/settings/CloudSettingsPage.tsx +++ b/airbyte-webapp/src/packages/cloud/views/settings/CloudSettingsPage.tsx @@ -8,7 +8,7 @@ import { SourcesPage as SettingsSourcesPage, } from "pages/SettingsPage/pages/ConnectorsPage"; import SettingsPage from "pages/SettingsPage"; -import ConfigurationsPage from "pages/SettingsPage/pages/ConfigurationsPage"; +// import ConfigurationsPage from "pages/SettingsPage/pages/ConfigurationsPage"; import NotificationPage from "pages/SettingsPage/pages/NotificationPage"; import { AccountSettingsView } from "packages/cloud/views/users/AccountSettingsView"; import { WorkspaceSettingsView } from "packages/cloud/views/workspaces/WorkspaceSettingsView"; @@ -63,11 +63,11 @@ export const CloudSettingsPage: React.FC = () => { // indicatorCount: countNewDestinationVersion, component: SettingsDestinationPage, }, - { - path: CloudSettingsRoutes.Configuration, - name: , - component: ConfigurationsPage, - }, + // { + // path: CloudSettingsRoutes.Configuration, + // name: , + // component: ConfigurationsPage, + // }, { path: CloudSettingsRoutes.AccessManagement, name: , From 858748757177eaf8b4bb8d56f9418dc1f2a06d52 Mon Sep 17 00:00:00 2001 From: Baz Date: Fri, 21 Jan 2022 13:50:52 +0200 Subject: [PATCH 183/215] unblocked shopify from user selection (#9691) --- .../components/Controls/ConnectorServiceTypeControl.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/ConnectorServiceTypeControl.tsx b/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/ConnectorServiceTypeControl.tsx index ea8704fbae4a2..62665582b3d9e 100644 --- a/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/ConnectorServiceTypeControl.tsx +++ b/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/ConnectorServiceTypeControl.tsx @@ -90,7 +90,6 @@ const ConnectorServiceTypeControl: React.FC<{ ? [ "200330b2-ea62-4d11-ac6d-cfe3e3f8ab2b", // Snapchat "2470e835-feaf-4db6-96f3-70fd645acc77", // Salesforce Singer - "9da77001-af33-4bcd-be46-6252bf9342b9", // Shopify ] : []; const sortedDropDownData = useMemo( From 2ddf0bcf6515686a638a62d62f91cc438a2b4e2f Mon Sep 17 00:00:00 2001 From: Yevhenii <34103125+yevhenii-ldv@users.noreply.github.com> Date: Fri, 21 Jan 2022 15:22:16 +0200 Subject: [PATCH 184/215] =?UTF-8?q?=F0=9F=90=9B=20Source=20Github:=20add?= =?UTF-8?q?=20custom=20pagination=20size=20for=20large=20streams=20(#9664)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Source Github: add custom pagination size for large streams --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 11 +++++++++- .../connectors/source-github/Dockerfile | 2 +- .../source-github/source_github/source.py | 5 ++++- .../source-github/source_github/spec.json | 9 +++++++- .../source-github/source_github/streams.py | 22 +++++++++++++------ .../source-github/unit_tests/test_stream.py | 2 +- docs/integrations/sources/github.md | 1 + 8 files changed, 41 insertions(+), 13 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index aed21d4979a6b..e52b190ba9bf2 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -224,7 +224,7 @@ - name: GitHub sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e dockerRepository: airbyte/source-github - dockerImageTag: 0.2.13 + dockerImageTag: 0.2.14 documentationUrl: https://docs.airbyte.io/integrations/sources/github icon: github.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index fbc4d170850a6..75a3af21dd63c 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2012,7 +2012,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-github:0.2.13" +- dockerImage: "airbyte/source-github:0.2.14" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/github" connectionSpecification: @@ -2089,6 +2089,15 @@ description: "Space-delimited list of GitHub repository branches to pull\ \ commits for, e.g. `airbytehq/airbyte/master`. If no branches are specified\ \ for a repository, the default branch will be pulled." + page_size_for_large_streams: + type: "integer" + title: "Page size for large streams" + minimum: 1 + maximum: 100 + default: 10 + description: "The Github connector contains several streams with a large\ + \ load. The page size of such streams depends on the size of your repository.\ + \ Recommended to specify values between 10 and 30." supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index 160005032c190..9823d188cb033 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.13 +LABEL io.airbyte.version=0.2.14 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/source_github/source.py b/airbyte-integrations/connectors/source-github/source_github/source.py index 56970d252c1ed..62af8d981ecc5 100644 --- a/airbyte-integrations/connectors/source-github/source_github/source.py +++ b/airbyte-integrations/connectors/source-github/source_github/source.py @@ -44,6 +44,7 @@ ) TOKEN_SEPARATOR = "," +DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM = 10 # To scan all the repos within orgnaization, organization name could be # specified by using asteriks i.e. "airbytehq/*" ORGANIZATION_PATTERN = re.compile("^.*/\\*$") @@ -140,6 +141,7 @@ def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> repository_stats_stream = RepositoryStats( authenticator=authenticator, repositories=repositories, + page_size_for_large_streams=config.get("page_size_for_large_streams", DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM), ) for stream_slice in repository_stats_stream.stream_slices(sync_mode=SyncMode.full_refresh): next(repository_stats_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice), None) @@ -153,9 +155,10 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: repositories = repos + organization_repos organizations = list({org.split("/")[0] for org in repositories}) + page_size = config.get("page_size_for_large_streams", DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM) organization_args = {"authenticator": authenticator, "organizations": organizations} - repository_args = {"authenticator": authenticator, "repositories": repositories} + repository_args = {"authenticator": authenticator, "repositories": repositories, "page_size_for_large_streams": page_size} repository_args_with_start_date = {**repository_args, "start_date": config["start_date"]} default_branches, branches_to_pull = self._get_branches_data(config.get("branch", ""), repository_args) diff --git a/airbyte-integrations/connectors/source-github/source_github/spec.json b/airbyte-integrations/connectors/source-github/source_github/spec.json index 1b5728f29700a..58d02c8655326 100644 --- a/airbyte-integrations/connectors/source-github/source_github/spec.json +++ b/airbyte-integrations/connectors/source-github/source_github/spec.json @@ -52,7 +52,6 @@ } ] }, - "repository": { "type": "string", "examples": ["airbytehq/airbyte", "airbytehq/*"], @@ -71,6 +70,14 @@ "title": "Branch", "examples": ["airbytehq/airbyte/master"], "description": "Space-delimited list of GitHub repository branches to pull commits for, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled." + }, + "page_size_for_large_streams": { + "type": "integer", + "title": "Page size for large streams", + "minimum": 1, + "maximum": 100, + "default": 10, + "description": "The Github connector contains several streams with a large load. The page size of such streams depends on the size of your repository. Recommended to specify values between 10 and 30." } } }, diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 1186609456eaa..24a064166977e 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -13,6 +13,8 @@ from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream from requests.exceptions import HTTPError +DEFAULT_PAGE_SIZE = 100 + class GithubStream(HttpStream, ABC): url_base = "https://api.github.com/" @@ -20,15 +22,18 @@ class GithubStream(HttpStream, ABC): primary_key = "id" use_cache = True - # GitHub pagination could be from 1 to 100. - page_size = 100 + # Detect streams with high API load + large_stream = False stream_base_params = {} - def __init__(self, repositories: List[str], **kwargs): + def __init__(self, repositories: List[str], page_size_for_large_streams: int, **kwargs): super().__init__(**kwargs) self.repositories = repositories + # GitHub pagination could be from 1 to 100. + self.page_size = page_size_for_large_streams if self.large_stream else DEFAULT_PAGE_SIZE + MAX_RETRIES = 3 adapter = requests.adapters.HTTPAdapter(max_retries=MAX_RETRIES) self._session.mount("https://", adapter) @@ -295,6 +300,9 @@ class Organizations(GithubStream): API docs: https://docs.github.com/en/rest/reference/orgs#get-an-organization """ + # GitHub pagination could be from 1 to 100. + page_size = 100 + def __init__(self, organizations: List[str], **kwargs): super(GithubStream, self).__init__(**kwargs) self.organizations = organizations @@ -394,7 +402,7 @@ class PullRequests(SemiIncrementalGithubStream): API docs: https://docs.github.com/en/rest/reference/pulls#list-pull-requests """ - page_size = 50 + large_stream = True first_read_override_key = "first_read_override" def __init__(self, **kwargs): @@ -524,7 +532,7 @@ class Comments(IncrementalGithubStream): API docs: https://docs.github.com/en/rest/reference/issues#list-issue-comments-for-a-repository """ - page_size = 30 # `comments` is a large stream so it's better to set smaller page size. + large_stream = True def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: return f"repos/{stream_slice['repository']}/issues/comments" @@ -637,7 +645,7 @@ class Issues(IncrementalGithubStream): API docs: https://docs.github.com/en/rest/reference/issues#list-repository-issues """ - page_size = 50 # `issues` is a large stream so it's better to set smaller page size. + large_stream = True stream_base_params = { "state": "all", @@ -651,7 +659,7 @@ class ReviewComments(IncrementalGithubStream): API docs: https://docs.github.com/en/rest/reference/pulls#list-review-comments-in-a-repository """ - page_size = 30 # `review-comments` is a large stream so it's better to set smaller page size. + large_stream = True def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: return f"repos/{stream_slice['repository']}/pulls/comments" diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 9bc04e8db2729..5854d8e186214 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -16,7 +16,7 @@ @responses.activate @patch("time.sleep") def test_bad_gateway_retry(time_mock): - args = {"authenticator": None, "repositories": ["test_repo"], "start_date": "start_date"} + args = {"authenticator": None, "repositories": ["test_repo"], "start_date": "start_date", "page_size_for_large_streams": 30} stream = PullRequestCommentReactions(**args) stream_slice = {"repository": "test_repo", "id": "id"} diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 5f68b1dec9ed9..2cbb098c11f41 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -92,6 +92,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.14 | 2021-01-21 | [9664](https://github.com/airbytehq/airbyte/pull/9664) | Add custom pagination size for large streams | | 0.2.13 | 2021-01-20 | [9619](https://github.com/airbytehq/airbyte/pull/9619) | Fix logging for function `should_retry` | | 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response | | 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental | From e9dba45b41b1ac920e3294ebfec22bbea0956235 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Fri, 21 Jan 2022 08:22:45 -0800 Subject: [PATCH 185/215] fix docker process factory test on ci (#9695) * fix docker process factory test on ci * fmt * fix compilation --- .../ContainerOrchestratorApp.java | 3 +- .../java/io/airbyte/workers/WorkerApp.java | 3 +- .../workers/process/DockerProcessFactory.java | 70 +++++++------------ .../process/DockerProcessFactoryTest.java | 25 +++---- 4 files changed, 37 insertions(+), 64 deletions(-) diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java index 476f049d608f6..62fb1711fd166 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java @@ -196,8 +196,7 @@ private static ProcessFactory getProcessBuilderFactory(final Configs configs, fi configs.getWorkspaceRoot(), configs.getWorkspaceDockerMount(), configs.getLocalDockerMount(), - configs.getDockerNetwork(), - false); + configs.getDockerNetwork()); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java index bc9e89b01e69b..e27dd65e3130e 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java @@ -269,8 +269,7 @@ private static ProcessFactory getJobProcessFactory(final Configs configs) throws configs.getWorkspaceRoot(), configs.getWorkspaceDockerMount(), configs.getLocalDockerMount(), - configs.getDockerNetwork(), - false); + configs.getDockerNetwork()); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java index 62ff643d00b88..6184690b5ad14 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java @@ -38,7 +38,6 @@ public class DockerProcessFactory implements ProcessFactory { private final Path workspaceRoot; private final String localMountSource; private final String networkName; - private final boolean isOrchestrator; private final Path imageExistsScriptPath; /** @@ -48,20 +47,17 @@ public class DockerProcessFactory implements ProcessFactory { * @param workspaceMountSource workspace volume * @param localMountSource local volume * @param networkName docker network - * @param isOrchestrator if the process needs to be able to launch containers */ public DockerProcessFactory(final WorkerConfigs workerConfigs, final Path workspaceRoot, final String workspaceMountSource, final String localMountSource, - final String networkName, - final boolean isOrchestrator) { + final String networkName) { this.workerConfigs = workerConfigs; this.workspaceRoot = workspaceRoot; this.workspaceMountSource = workspaceMountSource; this.localMountSource = localMountSource; this.networkName = networkName; - this.isOrchestrator = isOrchestrator; this.imageExistsScriptPath = prepareImageExistsScript(); } @@ -105,46 +101,30 @@ public Process create(final String jobId, IOs.writeFile(jobRoot, file.getKey(), file.getValue()); } - List cmd; - - // todo: add --expose 80 to each - - if (isOrchestrator) { - cmd = Lists.newArrayList( - "docker", - "run", - "--rm", - "--init", - "-i", - "-v", - String.format("%s:%s", workspaceMountSource, workspaceRoot), // real workspace root, not a rebased version - "-v", - String.format("%s:%s", localMountSource, LOCAL_MOUNT_DESTINATION), - "-v", - "/var/run/docker.sock:/var/run/docker.sock", // needs to be able to run docker in docker - "-w", - jobRoot.toString(), // real jobroot, not rebased version - "--network", - networkName, - "--log-driver", - "none"); - } else { - cmd = Lists.newArrayList( - "docker", - "run", - "--rm", - "--init", - "-i", - "-v", - String.format("%s:%s", workspaceMountSource, DATA_MOUNT_DESTINATION), // uses job data mount - "-v", - String.format("%s:%s", localMountSource, LOCAL_MOUNT_DESTINATION), - "-w", - rebasePath(jobRoot).toString(), // rebases the job root on the job data mount - "--network", - networkName, - "--log-driver", - "none"); + final List cmd = Lists.newArrayList( + "docker", + "run", + "--rm", + "--init", + "-i", + "-w", + rebasePath(jobRoot).toString(), // rebases the job root on the job data mount + "--log-driver", + "none"); + + if (networkName != null) { + cmd.add("--network"); + cmd.add(networkName); + } + + if (workspaceMountSource != null) { + cmd.add("-v"); + cmd.add(String.format("%s:%s", workspaceMountSource, DATA_MOUNT_DESTINATION)); + } + + if (localMountSource != null) { + cmd.add("-v"); + cmd.add(String.format("%s:%s", localMountSource, LOCAL_MOUNT_DESTINATION)); } for (final var envEntry : workerConfigs.getEnvMap().entrySet()) { diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/process/DockerProcessFactoryTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/process/DockerProcessFactoryTest.java index 53d33a21955b5..4b9d9d51e2065 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/process/DockerProcessFactoryTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/process/DockerProcessFactoryTest.java @@ -24,8 +24,6 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; // todo (cgardens) - these are not truly "unit" tests as they are check resources on the internet. // we should move them to "integration" tests, when we have facility to do so. @@ -61,7 +59,7 @@ public void testJqExists() throws IOException { public void testImageExists() throws IOException, WorkerException { final Path workspaceRoot = Files.createTempDirectory(Files.createDirectories(TEST_ROOT), "process_factory"); - final DockerProcessFactory processFactory = new DockerProcessFactory(new WorkerConfigs(new EnvConfigs()), workspaceRoot, "", "", "", false); + final DockerProcessFactory processFactory = new DockerProcessFactory(new WorkerConfigs(new EnvConfigs()), workspaceRoot, null, null, null); assertTrue(processFactory.checkImageExists("busybox")); } @@ -69,18 +67,17 @@ public void testImageExists() throws IOException, WorkerException { public void testImageDoesNotExist() throws IOException, WorkerException { final Path workspaceRoot = Files.createTempDirectory(Files.createDirectories(TEST_ROOT), "process_factory"); - final DockerProcessFactory processFactory = new DockerProcessFactory(new WorkerConfigs(new EnvConfigs()), workspaceRoot, "", "", "", false); + final DockerProcessFactory processFactory = new DockerProcessFactory(new WorkerConfigs(new EnvConfigs()), workspaceRoot, null, null, null); assertFalse(processFactory.checkImageExists("airbyte/fake:0.1.2")); } - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testFileWriting(boolean isOrchestrator) throws IOException, WorkerException { + @Test + public void testFileWriting() throws IOException, WorkerException { final Path workspaceRoot = Files.createTempDirectory(Files.createDirectories(TEST_ROOT), "process_factory"); final Path jobRoot = workspaceRoot.resolve("job"); final DockerProcessFactory processFactory = - new DockerProcessFactory(new WorkerConfigs(new EnvConfigs()), workspaceRoot, "", "", "", isOrchestrator); + new DockerProcessFactory(new WorkerConfigs(new EnvConfigs()), workspaceRoot, null, null, null); processFactory.create("job_id", 0, jobRoot, "busybox", false, ImmutableMap.of("config.json", "{\"data\": 2}"), "echo hi", new WorkerConfigs(new EnvConfigs()).getResourceRequirements(), Map.of(), Map.of()); @@ -92,9 +89,8 @@ public void testFileWriting(boolean isOrchestrator) throws IOException, WorkerEx /** * Tests that the env var map passed in is accessible within the process. */ - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testEnvMapSet(boolean isOrchestrator) throws IOException, WorkerException { + @Test + public void testEnvMapSet() throws IOException, WorkerException { final Path workspaceRoot = Files.createTempDirectory(Files.createDirectories(TEST_ROOT), "process_factory"); final Path jobRoot = workspaceRoot.resolve("job"); @@ -105,10 +101,9 @@ public void testEnvMapSet(boolean isOrchestrator) throws IOException, WorkerExce new DockerProcessFactory( workerConfigs, workspaceRoot, - "", - "", - "host", - isOrchestrator); + null, + null, + "host"); final Process process = processFactory.create( "job_id", From c524e0af64789dc47bc1bbcc810dfe94c22d3c9f Mon Sep 17 00:00:00 2001 From: Serhii Chvaliuk Date: Fri, 21 Jan 2022 19:33:17 +0200 Subject: [PATCH 186/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20HubSpot:=20Add?= =?UTF-8?q?=20more=20fields=20for=20`email=5Fevents`=20stream=20(#9641)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * email_events.json updated * bugfix: "date-time" -> "date" Signed-off-by: Sergey Chvalyuk --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-hubspot/Dockerfile | 2 +- .../source-hubspot/source_hubspot/api.py | 2 +- .../source_hubspot/schemas/email_events.json | 79 +++++++++++++++++++ .../unit_tests/test_field_type_converting.py | 2 +- docs/integrations/sources/hubspot.md | 4 +- 7 files changed, 86 insertions(+), 7 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index e52b190ba9bf2..b285ea9ec0923 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -300,7 +300,7 @@ - name: HubSpot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot - dockerImageTag: 0.1.33 + dockerImageTag: 0.1.34 documentationUrl: https://docs.airbyte.io/integrations/sources/hubspot icon: hubspot.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 75a3af21dd63c..c2657bf9c3894 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2909,7 +2909,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-hubspot:0.1.33" +- dockerImage: "airbyte/source-hubspot:0.1.34" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index 2f786fa761296..572784f1391ba 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -34,5 +34,5 @@ COPY source_hubspot ./source_hubspot ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.33 +LABEL io.airbyte.version=0.1.34 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py index 4f12dbc2a9882..480a9daea261e 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py @@ -37,7 +37,7 @@ KNOWN_CONVERTIBLE_SCHEMA_TYPES = { "bool": ("boolean", None), "enumeration": ("string", None), - "date": ("string", "date-time"), + "date": ("string", "date"), "date-time": ("string", "date-time"), "datetime": ("string", "date-time"), "json": ("string", None), diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/email_events.json b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/email_events.json index 53e62e3acb547..9c67db956f2a8 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/email_events.json +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/email_events.json @@ -8,6 +8,12 @@ "appName": { "type": ["null", "string"] }, + "attempt": { + "type": ["null", "integer"] + }, + "bounced": { + "type": ["null", "boolean"] + }, "browser": { "type": ["null", "object"], "properties": { @@ -28,6 +34,26 @@ }, "url": { "type": ["null", "string"] + }, + "version": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + } + } + }, + "category": { + "type": ["null", "string"] + }, + "causedBy": { + "type": ["null", "object"], + "properties": { + "created": { + "type": ["null", "integer"] + }, + "id": { + "type": ["null", "string"] } } }, @@ -37,6 +63,12 @@ "deviceType": { "type": ["null", "string"] }, + "dropMessage": { + "type": ["null", "string"] + }, + "dropReason": { + "type": ["null", "string"] + }, "duration": { "type": ["null", "integer"] }, @@ -73,17 +105,55 @@ "country": { "type": ["null", "string"] }, + "latitude": { + "type": ["null", "number"] + }, + "longitude": { + "type": ["null", "number"] + }, "state": { "type": ["null", "string"] + }, + "zipcode": { + "type": ["null", "string"] + } + } + }, + "obsoletedBy": { + "type": ["null", "object"], + "properties": { + "created": { + "type": ["null", "integer"] + }, + "id": { + "type": ["null", "string"] } } }, "portalId": { "type": ["null", "integer"] }, + "portalSubscriptionStatus": { + "type": ["null", "string"] + }, "recipient": { "type": ["null", "string"] }, + "referer": { + "type": ["null", "string"] + }, + "replyTo": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "requestedBy": { + "type": ["null", "string"] + }, + "requestedByUserId": { + "type": ["null", "integer"] + }, "response": { "type": ["null", "string"] }, @@ -101,6 +171,15 @@ "smtpId": { "type": ["null", "string"] }, + "source": { + "type": ["null", "string"] + }, + "sourceId": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, "subject": { "type": ["null", "string"] }, diff --git a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py index f55391f49fec0..d08a0ad6b7385 100644 --- a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py +++ b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py @@ -17,7 +17,7 @@ ("enumeration", {"type": ["null", "string"]}), ("object", {"type": ["null", "object"]}), ("array", {"type": ["null", "array"]}), - ("date", {"type": ["null", "string"], "format": "date-time"}), + ("date", {"type": ["null", "string"], "format": "date"}), ("date-time", {"type": ["null", "string"], "format": "date-time"}), ("datetime", {"type": ["null", "string"], "format": "date-time"}), ("json", {"type": ["null", "string"]}), diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index ffa18c68000ce..8b12c7e32e2da 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -110,10 +110,10 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:-----------------------------------------------------------------------------------------------------------------------------------------------| -| 0.1.33 | 2021-01-14 | [8887](https://github.com/airbytehq/airbyte/pull/8887) | More efficient support for incremental updates on Companies, Contact, Deals and Engagement streams | +| 0.1.34 | 2022-01-20 | [9641](https://github.com/airbytehq/airbyte/pull/9641) | Add more fields for `email_events` stream | +| 0.1.33 | 2022-01-14 | [8887](https://github.com/airbytehq/airbyte/pull/8887) | More efficient support for incremental updates on Companies, Contact, Deals and Engagement streams | | 0.1.32 | 2022-01-13 | [8011](https://github.com/airbytehq/airbyte/pull/8011) | Add new stream form_submissions | | 0.1.31 | 2022-01-11 | [9385](https://github.com/airbytehq/airbyte/pull/9385) | Remove auto-generated `properties` from `Engagements` stream | - | 0.1.30 | 2021-01-10 | [9129](https://github.com/airbytehq/airbyte/pull/9129) | Created Contacts list memberships streams | | 0.1.29 | 2021-12-17 | [8699](https://github.com/airbytehq/airbyte/pull/8699) | Add incremental sync support for `companies`, `contact_lists`, `contacts`, `deals`, `line_items`, `products`, `quotes`, `tickets` streams | | 0.1.28 | 2021-12-15 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update fields and descriptions | From aa31f6abadb013ebffbde3f130fcb7e97570450d Mon Sep 17 00:00:00 2001 From: Maksym Pavlenok Date: Fri, 21 Jan 2022 19:39:49 +0200 Subject: [PATCH 187/215] =?UTF-8?q?=F0=9F=8E=89=20=20Collapse=20sonar=20re?= =?UTF-8?q?ports=20(#9699)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/sonar-scan.yml | 6 +++++- tools/ci_code_validator/ci_sonar_qube/sonar_qube_api.py | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sonar-scan.yml b/.github/workflows/sonar-scan.yml index db57aef0b7f69..7c1f40ec216cf 100644 --- a/.github/workflows/sonar-scan.yml +++ b/.github/workflows/sonar-scan.yml @@ -1,7 +1,7 @@ name: Sonar Scan on: pull_request: - types: [opened, synchronize, reopened, closed, ready_for_review] + types: [opened, synchronize, closed] jobs: @@ -12,6 +12,10 @@ jobs: outputs: changed-modules: ${{ steps.detect-changed-modules.outputs.changed-modules }} steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.9.1 + with: + workflow_id: ${{ github.event.workflow.id }} - name: Checkout Airbyte uses: actions/checkout@v2 with: diff --git a/tools/ci_code_validator/ci_sonar_qube/sonar_qube_api.py b/tools/ci_code_validator/ci_sonar_qube/sonar_qube_api.py index 1b96f5ec6e785..bcbb42dd371eb 100644 --- a/tools/ci_code_validator/ci_sonar_qube/sonar_qube_api.py +++ b/tools/ci_code_validator/ci_sonar_qube/sonar_qube_api.py @@ -171,6 +171,9 @@ def generate_report(self, project_name: str, report_file: str) -> bool: project_data = self.prepare_project_settings(project_name) md_file = MdUtils(file_name=report_file) + md_file.new_line("
    SonarQube Report ") + md_file.new_line("

    ") + md_file.new_line("") md_file.new_line(f'### SonarQube report for {project_data["name"]}') project_name = project_data["project"] @@ -311,6 +314,9 @@ def generate_report(self, project_name: str, report_file: str) -> bool: coverage_files.append(("", "")) table_items = ["File", "Coverage"] * 2 + list(itertools.chain.from_iterable(coverage_files)) md_file.new_table(columns=4, rows=int(len(coverage_files) / 2 + 1), text=table_items, text_align='left') + md_file.new_line("") + md_file.new_line("

    ") + md_file.new_line("
    ") md_file.create_md_file() self.logger.info(f"The {report_file} was generated") return True From 1a74de6156a5bdaa1b160d4effd3e6e08b66a420 Mon Sep 17 00:00:00 2001 From: Zaimwa9 Date: Fri, 21 Jan 2022 19:40:17 +0100 Subject: [PATCH 188/215] =?UTF-8?q?=F0=9F=8E=89=20New=20Source:=20PersistI?= =?UTF-8?q?q=20(#9515)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../3052c77e-8b91-47e2-97a0-a29a22794b4b.json | 8 + .../src/main/resources/icons/persistiq.svg | 8 + .../resources/seed/source_definitions.yaml | 7 + .../src/main/resources/seed/source_specs.yaml | 19 ++ airbyte-integrations/builds.md | 1 + .../connectors/source-persistiq/.dockerignore | 6 + .../connectors/source-persistiq/Dockerfile | 38 ++++ .../connectors/source-persistiq/README.md | 133 ++++++++++++ .../acceptance-test-config.yml | 23 +++ .../acceptance-test-docker.sh | 16 ++ .../connectors/source-persistiq/bootstrap.md | 18 ++ .../connectors/source-persistiq/build.gradle | 9 + .../integration_tests/__init__.py | 3 + .../integration_tests/acceptance.py | 14 ++ .../integration_tests/catalog.json | 195 ++++++++++++++++++ .../integration_tests/configured_catalog.json | 31 +++ .../expected_campaigns_stream.txt | 3 + .../integration_tests/invalid_config.json | 3 + .../integration_tests/sample_config.json | 3 + .../connectors/source-persistiq/main.py | 13 ++ .../source-persistiq/requirements.txt | 2 + .../connectors/source-persistiq/setup.py | 30 +++ .../source_persistiq/__init__.py | 8 + .../source_persistiq/schemas/campaigns.json | 52 +++++ .../source_persistiq/schemas/leads.json | 93 +++++++++ .../source_persistiq/schemas/users.json | 25 +++ .../source_persistiq/source.py | 96 +++++++++ .../source_persistiq/spec.json | 17 ++ .../source-persistiq/unit_tests/__init__.py | 3 + .../unit_tests/test_source.py | 38 ++++ .../unit_tests/test_streams.py | 64 ++++++ docs/SUMMARY.md | 1 + docs/integrations/README.md | 1 + docs/integrations/sources/persistiq.md | 38 ++++ 34 files changed, 1019 insertions(+) create mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/3052c77e-8b91-47e2-97a0-a29a22794b4b.json create mode 100644 airbyte-config/init/src/main/resources/icons/persistiq.svg create mode 100644 airbyte-integrations/connectors/source-persistiq/.dockerignore create mode 100644 airbyte-integrations/connectors/source-persistiq/Dockerfile create mode 100644 airbyte-integrations/connectors/source-persistiq/README.md create mode 100644 airbyte-integrations/connectors/source-persistiq/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-persistiq/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-persistiq/bootstrap.md create mode 100644 airbyte-integrations/connectors/source-persistiq/build.gradle create mode 100644 airbyte-integrations/connectors/source-persistiq/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-persistiq/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-persistiq/integration_tests/catalog.json create mode 100644 airbyte-integrations/connectors/source-persistiq/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-persistiq/integration_tests/expected_campaigns_stream.txt create mode 100644 airbyte-integrations/connectors/source-persistiq/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-persistiq/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-persistiq/main.py create mode 100644 airbyte-integrations/connectors/source-persistiq/requirements.txt create mode 100644 airbyte-integrations/connectors/source-persistiq/setup.py create mode 100644 airbyte-integrations/connectors/source-persistiq/source_persistiq/__init__.py create mode 100644 airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/campaigns.json create mode 100644 airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/leads.json create mode 100644 airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/users.json create mode 100644 airbyte-integrations/connectors/source-persistiq/source_persistiq/source.py create mode 100644 airbyte-integrations/connectors/source-persistiq/source_persistiq/spec.json create mode 100644 airbyte-integrations/connectors/source-persistiq/unit_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-persistiq/unit_tests/test_source.py create mode 100644 airbyte-integrations/connectors/source-persistiq/unit_tests/test_streams.py create mode 100644 docs/integrations/sources/persistiq.md diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/3052c77e-8b91-47e2-97a0-a29a22794b4b.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/3052c77e-8b91-47e2-97a0-a29a22794b4b.json new file mode 100644 index 0000000000000..4f8e9e308887d --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/3052c77e-8b91-47e2-97a0-a29a22794b4b.json @@ -0,0 +1,8 @@ +{ + "sourceDefinitionId": "3052c77e-8b91-47e2-97a0-a29a22794b4b", + "name": "PersistIq", + "dockerRepository": "airbyte/source-persistiq", + "dockerImageTag": "0.1.0", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/persistiq", + "icon": "persistiq.svg" +} diff --git a/airbyte-config/init/src/main/resources/icons/persistiq.svg b/airbyte-config/init/src/main/resources/icons/persistiq.svg new file mode 100644 index 0000000000000..e10a9374a771b --- /dev/null +++ b/airbyte-config/init/src/main/resources/icons/persistiq.svg @@ -0,0 +1,8 @@ + + + + + + diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index b285ea9ec0923..89b8be04ceb7e 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -512,6 +512,13 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/paystack icon: paystack.svg sourceType: api +- name: PersistIq + sourceDefinitionId: 3052c77e-8b91-47e2-97a0-a29a22794b4b + dockerRepository: airbyte/source-persistiq + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/persistiq + icon: persistiq.svg + sourceType: api - name: Pinterest sourceDefinitionId: 5cb7e5fe-38c2-11ec-8d3d-0242ac130003 dockerRepository: airbyte/source-pinterest diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index c2657bf9c3894..8d3f111959a9a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -5489,6 +5489,25 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-persistiq:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/persistiq" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Persistiq Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "PersistIq API Key. See the docs for more information on where to find that key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] - dockerImage: "airbyte/source-pinterest:0.1.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/pinterest" diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index 93437d64b079e..5f73847934dfc 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -70,6 +70,7 @@ | Oracle DB | [![source-oracle](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-oracle%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-oracle) | | Paypal Transaction | [![paypal-transaction](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-paypal-transaction%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-paypal-transaction) | | Paystack | [![source-paystack](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-paystack%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-paystack) | +| PersistIq | [![source-persistiq](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-persistiq%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-persistiq/) | | Pipedrive | [![source-pipedrive](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-pipedrive%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-pipedrive) | | Plaid | [![source-plaid](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-plaid%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-plaid) | | Postgres | [![source-postgres](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-postgres%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-postgres) | diff --git a/airbyte-integrations/connectors/source-persistiq/.dockerignore b/airbyte-integrations/connectors/source-persistiq/.dockerignore new file mode 100644 index 0000000000000..7754a0a84ebcd --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!main.py +!source_persistiq +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-persistiq/Dockerfile b/airbyte-integrations/connectors/source-persistiq/Dockerfile new file mode 100644 index 0000000000000..e533a190826d1 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_persistiq ./source_persistiq + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-persistiq diff --git a/airbyte-integrations/connectors/source-persistiq/README.md b/airbyte-integrations/connectors/source-persistiq/README.md new file mode 100644 index 0000000000000..f36570bb419c2 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/README.md @@ -0,0 +1,133 @@ +# Persistiq Source + +This is the repository for the Persistiq source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/persistiq). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-persistiq:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/persistiq) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_persistiq/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. +To obtain credentials, create an account on PersistIq and follow the [documentation](https://apidocs.persistiq.com/#authentication) + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source persistiq test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-persistiq:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-persistiq:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-persistiq:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-persistiq:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-persistiq:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-persistiq:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-persistiq:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-persistiq:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-persistiq/acceptance-test-config.yml b/airbyte-integrations/connectors/source-persistiq/acceptance-test-config.yml new file mode 100644 index 0000000000000..4446ede08e8ec --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/acceptance-test-config.yml @@ -0,0 +1,23 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-persistiq:dev +tests: + spec: + - spec_path: "source_persistiq/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + expect_records: + path: "integration_tests/expected_campaigns_stream.txt" + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + diff --git a/airbyte-integrations/connectors/source-persistiq/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-persistiq/acceptance-test-docker.sh new file mode 100644 index 0000000000000..c51577d10690c --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-persistiq/bootstrap.md b/airbyte-integrations/connectors/source-persistiq/bootstrap.md new file mode 100644 index 0000000000000..c4f0a41ca68a7 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/bootstrap.md @@ -0,0 +1,18 @@ +# PersistIq +PersistIq is an outbound automation tool designed for small teams to find, reach, and organize customers all in one simple platform. + +## Streams + +This Source is capable of syncing the following streams: +* [Users](https://apidocs.persistiq.com/#users) +* [Leads](https://apidocs.persistiq.com/#leads) +* [Campaigns](https://apidocs.persistiq.com/#campaigns) + +### Incremental streams +Incremental streams were not implemented in the initial version. + +### Next steps +Implement incremental sync and additional streams (`Lead status`, `Lead fields`, `Events`). + +### Rate limits +The API rate limit is at 100 requests/minutes. Read [Rate Limits](https://apidocs.persistiq.com/#error-codes) for more informations. diff --git a/airbyte-integrations/connectors/source-persistiq/build.gradle b/airbyte-integrations/connectors/source-persistiq/build.gradle new file mode 100644 index 0000000000000..a7700be96ab8e --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_persistiq' +} diff --git a/airbyte-integrations/connectors/source-persistiq/integration_tests/__init__.py b/airbyte-integrations/connectors/source-persistiq/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-persistiq/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-persistiq/integration_tests/acceptance.py new file mode 100644 index 0000000000000..0347f2a0b143d --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-persistiq/integration_tests/catalog.json b/airbyte-integrations/connectors/source-persistiq/integration_tests/catalog.json new file mode 100644 index 0000000000000..3a8f681f82c78 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/integration_tests/catalog.json @@ -0,0 +1,195 @@ +{ + "streams": [ + { + "stream": { + "name": "leads", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "bounced": { + "type": ["null", "boolean"] + }, + "owner_id": { + "type": ["null", "string"] + }, + "optedout": { + "type": ["null", "boolean"] + }, + "sent_count": { + "type": ["null", "integer"] + }, + "replied_count": { + "type": ["null", "integer"] + }, + "last_sent_at": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, + "data": { + "company_name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"], + "format": "email" + }, + "first_name": { + "type": ["null", "string"] + }, + "last_name": { + "type": ["null", "string"] + }, + "address": { + "type": ["null", "string"] + }, + "city": { + "type": ["null", "string"] + }, + "state": { + "type": ["null", "string"] + }, + "phone": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "industry": { + "type": ["null", "string"] + }, + "snippet": { + "type": ["null", "string"] + }, + "snippet1": { + "type": ["null", "string"] + }, + "snippet2": { + "type": ["null", "string"] + }, + "snippet3": { + "type": ["null", "string"] + }, + "snippet4": { + "type": ["null", "string"] + }, + "twitch_name": { + "type": ["null", "string"] + }, + "linkedin": { + "type": ["null", "string"] + }, + "twitter": { + "type": ["null", "string"] + }, + "facebook": { + "type": ["null", "string"] + }, + "salesforce_id": { + "type": ["null", "string"] + } + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "campaigns", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "creator": { + "id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + } + }, + "stats": { + "prospects_contacted": { + "type": ["null", "integer"] + }, + "prospects_reached": { + "type": ["null", "integer"] + }, + "prospects_opened": { + "type": ["null", "integer"] + }, + "prospects_replied": { + "type": ["null", "integer"] + }, + "prospects_bounced": { + "type": ["null", "integer"] + }, + "prospects_optedout": { + "type": ["null", "integer"] + }, + "total_contacted": { + "type": ["null", "integer"] + } + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "users", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"], + "format": "email" + }, + "activated": { + "type": ["null", "boolean"] + }, + "default_mailbox_id": { + "type": ["null", "string"] + }, + "salesforce_id": { + "type": ["null", "string"] + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "source_defined_primary_key": [["id"]] + } + ] +} diff --git a/airbyte-integrations/connectors/source-persistiq/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-persistiq/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..6719f174b0bbf --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/integration_tests/configured_catalog.json @@ -0,0 +1,31 @@ +{ + "streams": [ + { + "stream": { + "name": "campaigns", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "users", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "leads", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-persistiq/integration_tests/expected_campaigns_stream.txt b/airbyte-integrations/connectors/source-persistiq/integration_tests/expected_campaigns_stream.txt new file mode 100644 index 0000000000000..47c985a587c9b --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/integration_tests/expected_campaigns_stream.txt @@ -0,0 +1,3 @@ +{"stream": "campaigns", "data": {"id": "c_3kqMZZ","name": "Schedule Meetings with Reps (sample campaign)","creator": {"id": "u_pdKWk3", "name": "Sherif Nada", "email": "integration-test@airbyte.io"},"stats": {"prospects_contacted": 0, "prospects_reached": 0, "prospects_opened": 0, "prospects_replied": 0, "prospects_bounced": 0, "prospects_optedout": 0, "total_prospects": 3}}, "emitted_at": 1629119628000} +{"stream": "campaigns", "data": {"id": "c_ljDgZB","name": "Schedule Meetings with Managers (sample campaign)","creator": {"id": "u_pdKWk3", "name": "Sherif Nada", "email": "integration-test@airbyte.io"},"stats": {"prospects_contacted": 0, "prospects_reached": 0, "prospects_opened": 0, "prospects_replied": 0, "prospects_bounced": 0, "prospects_optedout": 0, "total_prospects": 3}}, "emitted_at": 1629119628000} +{"stream": "campaigns", "data": {"id": "c_3e01Kb","name": "Schedule Meetings with CEOs (sample campaign)","creator": {"id": "u_pdKWk3", "name": "Sherif Nada", "email": "integration-test@airbyte.io"},"stats": {"prospects_contacted": 0, "prospects_reached": 0, "prospects_opened": 0, "prospects_replied": 0, "prospects_bounced": 0, "prospects_optedout": 0, "total_prospects": 3}}, "emitted_at": 1629119628000} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-persistiq/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-persistiq/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..dc521ade7acf9 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/integration_tests/invalid_config.json @@ -0,0 +1,3 @@ +{ + "api_key": "" +} diff --git a/airbyte-integrations/connectors/source-persistiq/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-persistiq/integration_tests/sample_config.json new file mode 100644 index 0000000000000..c16cd6d40596a --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/integration_tests/sample_config.json @@ -0,0 +1,3 @@ +{ + "api_key": "" +} diff --git a/airbyte-integrations/connectors/source-persistiq/main.py b/airbyte-integrations/connectors/source-persistiq/main.py new file mode 100644 index 0000000000000..3f871b1110903 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_persistiq import SourcePersistiq + +if __name__ == "__main__": + source = SourcePersistiq() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-persistiq/requirements.txt b/airbyte-integrations/connectors/source-persistiq/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-persistiq/setup.py b/airbyte-integrations/connectors/source-persistiq/setup.py new file mode 100644 index 0000000000000..af1420f9687cc --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/setup.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.1", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "requests_mock==1.8.0", + "source-acceptance-test", +] + +setup( + name="source_persistiq", + description="Source implementation for Persistiq.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-persistiq/source_persistiq/__init__.py b/airbyte-integrations/connectors/source-persistiq/source_persistiq/__init__.py new file mode 100644 index 0000000000000..a3923d95bbe87 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/source_persistiq/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourcePersistiq + +__all__ = ["SourcePersistiq"] diff --git a/airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/campaigns.json b/airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/campaigns.json new file mode 100644 index 0000000000000..d602148216150 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/campaigns.json @@ -0,0 +1,52 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["string"] + }, + "name": { + "type": ["null", "string"] + }, + "creator": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + } + } + }, + "stats": { + "type": ["null", "object"], + "properties": { + "prospects_contacted": { + "type": ["null", "integer"] + }, + "prospects_reached": { + "type": ["null", "integer"] + }, + "prospects_opened": { + "type": ["null", "integer"] + }, + "prospects_replied": { + "type": ["null", "integer"] + }, + "prospects_bounced": { + "type": ["null", "integer"] + }, + "prospects_optedout": { + "type": ["null", "integer"] + }, + "total_contacted": { + "type": ["null", "integer"] + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/leads.json b/airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/leads.json new file mode 100644 index 0000000000000..cb6ba052a8828 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/leads.json @@ -0,0 +1,93 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["string"] + }, + "bounced": { + "type": ["null", "boolean"] + }, + "owner_id": { + "type": ["string"] + }, + "optedout": { + "type": ["null", "boolean"] + }, + "sent_count": { + "type": ["null", "integer"] + }, + "replied_count": { + "type": ["null", "integer"] + }, + "last_sent_at": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, + "data": { + "company_name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"], + "format": "email" + }, + "first_name": { + "type": ["null", "string"] + }, + "last_name": { + "type": ["null", "string"] + }, + "address": { + "type": ["null", "string"] + }, + "city": { + "type": ["null", "string"] + }, + "state": { + "type": ["null", "string"] + }, + "phone": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "industry": { + "type": ["null", "string"] + }, + "snippet": { + "type": ["null", "string"] + }, + "snippet1": { + "type": ["null", "string"] + }, + "snippet2": { + "type": ["null", "string"] + }, + "snippet3": { + "type": ["null", "string"] + }, + "snippet4": { + "type": ["null", "string"] + }, + "twitch_name": { + "type": ["null", "string"] + }, + "linkedin": { + "type": ["null", "string"] + }, + "twitter": { + "type": ["null", "string"] + }, + "facebook": { + "type": ["null", "string"] + }, + "salesforce_id": { + "type": ["null", "string"] + } + } + } +} diff --git a/airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/users.json b/airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/users.json new file mode 100644 index 0000000000000..8a4d6b307689e --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/source_persistiq/schemas/users.json @@ -0,0 +1,25 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["string"] + }, + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["string"], + "format": "email" + }, + "activated": { + "type": ["null", "boolean"] + }, + "default_mailbox_id": { + "type": ["null", "string"] + }, + "salesforce_id": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-persistiq/source_persistiq/source.py b/airbyte-integrations/connectors/source-persistiq/source_persistiq/source.py new file mode 100644 index 0000000000000..b4dc493d4e57f --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/source_persistiq/source.py @@ -0,0 +1,96 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.auth import NoAuth + +# Basic full refresh stream + + +class PersistiqStream(HttpStream, ABC): + def __init__(self, api_key: str, **kwargs): + super().__init__(**kwargs) + self.api_key = api_key + + url_base = "https://api.persistiq.com/v1/" + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + json_response = response.json() + if not json_response.get("has_more", False): + return None + + return {"page": json_response.get("next_page")[-1]} + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + return {"page": 1 if not next_page_token else next_page_token["page"]} + + def request_headers(self, **kwargs) -> MutableMapping[str, Any]: + return {"x-api-key": self.api_key} + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + yield response.json() + + +class Users(PersistiqStream): + primary_key = "id" + + def path(self, **kwargs) -> str: + return "users" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json = response.json() + yield from json["users"] + + +class Leads(PersistiqStream): + primary_key = "id" + + def path(self, **kwargs) -> str: + return "leads" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json = response.json() + yield from json["leads"] + + +class Campaigns(PersistiqStream): + primary_key = "id" + + def path(self, **kwargs) -> str: + return "campaigns" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json = response.json() + yield from json["campaigns"] + + +# Source + + +class SourcePersistiq(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + headers = {"x-api-key": config["api_key"]} + url = "https://api.persistiq.com/v1/users" + try: + response = requests.get(url, headers=headers) + response.raise_for_status() + return True, None + except requests.exceptions.RequestException as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = NoAuth() + return [ + Users(authenticator=auth, api_key=config["api_key"]), + Leads(authenticator=auth, api_key=config["api_key"]), + Campaigns(authenticator=auth, api_key=config["api_key"]), + ] diff --git a/airbyte-integrations/connectors/source-persistiq/source_persistiq/spec.json b/airbyte-integrations/connectors/source-persistiq/source_persistiq/spec.json new file mode 100644 index 0000000000000..bf3260203357e --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/source_persistiq/spec.json @@ -0,0 +1,17 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/sources/persistiq", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Persistiq Spec", + "type": "object", + "required": ["api_key"], + "additionalProperties": false, + "properties": { + "api_key": { + "type": "string", + "description": "PersistIq API Key. See the docs for more information on where to find that key.", + "airbyte_secret": true + } + } + } +} diff --git a/airbyte-integrations/connectors/source-persistiq/unit_tests/__init__.py b/airbyte-integrations/connectors/source-persistiq/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-persistiq/unit_tests/test_source.py b/airbyte-integrations/connectors/source-persistiq/unit_tests/test_source.py new file mode 100644 index 0000000000000..b230eb6c9fd32 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/unit_tests/test_source.py @@ -0,0 +1,38 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import requests +from source_persistiq.source import SourcePersistiq + + +def test_check_connection(mocker, requests_mock): + source = SourcePersistiq() + mock_logger = mocker.Mock() + test_config = {"api_key": "mybeautifulkey"} + # success + requests_mock.get( + "https://api.persistiq.com/v1/users", + json={ + "id": "u_3an2Jp", + "name": "Gabriel Rossmann", + "email": "gabriel@punctual.cc", + "activated": "true", + "default_mailbox_id": "mbox_38ymEp", + "salesforce_id": "", + }, + ) + assert source.check_connection(mock_logger, test_config) == (True, None) + + # failure + requests_mock.get("https://api.persistiq.com/v1/users", status_code=500) + connection_success, connection_failure = source.check_connection(mock_logger, test_config) + assert not connection_success + assert isinstance(connection_failure, requests.exceptions.HTTPError) + + +def test_streams(): + source = SourcePersistiq() + config = {"api_key": "my-api-key"} + streams = source.streams(config) + assert len(streams) == 3 diff --git a/airbyte-integrations/connectors/source-persistiq/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-persistiq/unit_tests/test_streams.py new file mode 100644 index 0000000000000..32f5fe3153909 --- /dev/null +++ b/airbyte-integrations/connectors/source-persistiq/unit_tests/test_streams.py @@ -0,0 +1,64 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import pytest +from source_persistiq.source import PersistiqStream + + +def mocked_requests_get(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + return MockResponse(json_data=kwargs["json_data"], status_code=kwargs["status_code"]) + + +@pytest.fixture +def patch_base_class(mocker): + # Mock abstract methods to enable instantiating abstract class + mocker.patch.object(PersistiqStream, "path", "v0/example_endpoint") + mocker.patch.object(PersistiqStream, "primary_key", "test_primary_key") + mocker.patch.object(PersistiqStream, "__abstractmethods__", set()) + + +def test_request_params(patch_base_class): + stream = PersistiqStream(api_key="mybeautifulkey") + inputs = {"next_page_token": {"page": 1}} + expected_params = {"page": 1} + assert stream.request_params(stream_state=None, **inputs) == expected_params + + +def test_next_page_token(patch_base_class): + stream = PersistiqStream(api_key="mybeautifulkey") + # With next page + response = mocked_requests_get(json_data={"has_more": True, "next_page": "https://api.persistiq.com/v1/users?page=2"}, status_code=200) + expected_token = "2" + assert stream.next_page_token(response=response) == {"page": expected_token} + # Without next page + response = mocked_requests_get(json_data={}, status_code=200) + expected_token = None + assert stream.next_page_token(response=response) == expected_token + + +def test_parse_response(patch_base_class): + stream = PersistiqStream(api_key="mybeautifulkey") + response = mocked_requests_get(json_data={"users": [{"id": 1, "name": "John Doe"}]}, status_code=200) + expected_parsed_object = {"users": [{"id": 1, "name": "John Doe"}]} + assert next(stream.parse_response(response=response)) == expected_parsed_object + + +def test_request_headers(patch_base_class): + stream = PersistiqStream(api_key="mybeautifulkey") + expected_headers = {"x-api-key": "mybeautifulkey"} + assert stream.request_headers() == expected_headers + + +def test_http_method(patch_base_class): + stream = PersistiqStream(api_key="mybeautifulkey") + expected_method = "GET" + assert stream.http_method == expected_method diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index ae380d54c6de6..1670c75bfcbc5 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -116,6 +116,7 @@ * [Outreach](integrations/sources/outreach.md) * [Paypal Transaction](integrations/sources/paypal-transaction.md) * [Paystack](integrations/sources/paystack.md) + * [Persistiq](integrations/sources/persistiq.md) * [Plaid](integrations/sources/plaid.md) * [Pinterest](integrations/sources/pinterest.md) * [Pipedrive](integrations/sources/pipedrive.md) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index ed4b9618e65a9..37ba4b21c894c 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -89,6 +89,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [Outreach](./sources/outreach.md)| Alpha | | [PayPal Transaction](sources/paypal-transaction.md) | Beta | | [Paystack](sources/paystack.md) | Alpha | +| [PersistIq](sources/persistiq.md) | Alpha | | [Pinterest](sources/pinterest.md) | Alpha | | [Pipedrive](sources/pipedrive.md) | Alpha | | [Plaid](sources/plaid.md) | Alpha | diff --git a/docs/integrations/sources/persistiq.md b/docs/integrations/sources/persistiq.md new file mode 100644 index 0000000000000..fa466116718ed --- /dev/null +++ b/docs/integrations/sources/persistiq.md @@ -0,0 +1,38 @@ +# PersistIq + +## Sync overview + +The PersistIq source supports Full Refresh syncs only. + +This source syncs data for the [PersistIq API](https://apidocs.persistiq.com/#introduction). + + +### Output schema + +This Source is capable of syncing the following streams: + +* [Users](https://apidocs.persistiq.com/#users) +* [Leads](https://apidocs.persistiq.com/#leads) +* [Campaigns](https://apidocs.persistiq.com/#campaigns) + +### Features + +| Feature | Supported?\(Yes/No\) +| :--- | :--- | +| Full Refresh Sync | Yes | +| Incremental - Append Sync | No | +| Namespaces | No | + +### Performance considerations + +The PersistIq connector should not run into PersistIq API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. + +## Getting started + +### Requirements + +* PersistIq API Key + +### Setup guide + +Please read [How to find your API key](https://apidocs.persistiq.com/#introduction). From 7e23ee2eeb0a02160cd322b8439f382733ab2562 Mon Sep 17 00:00:00 2001 From: ron-damon <48366185+ron-damon@users.noreply.github.com> Date: Fri, 21 Jan 2022 15:52:12 -0300 Subject: [PATCH 189/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20Amazon=20Seller?= =?UTF-8?q?=20Partner:=20Add=20GET=5FFLAT=5FFILE=5FALL=5FORDERS=5FDATA=5FB?= =?UTF-8?q?Y=5FLAST=5FUPDATE=5FGENERAL=20report=20(#9621)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../e55879a8-0ef8-4557-abcf-ab34c53ec460.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../source-amazon-seller-partner/Dockerfile | 2 +- ...ll_orders_data_by_last_update_general.json | 118 ++++++++++++++++++ .../integration_tests/sample_state.json | 3 + ...LL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL.json | 103 +++++++++++++++ .../source_amazon_seller_partner/source.py | 2 + .../source_amazon_seller_partner/streams.py | 9 ++ .../sources/amazon-seller-partner.md | 5 +- 10 files changed, 242 insertions(+), 6 deletions(-) create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_flat_file_all_orders_data_by_last_update_general.json create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FLAT_FILE_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL.json diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json index 93080de9d5a53..1c597b0010e99 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e55879a8-0ef8-4557-abcf-ab34c53ec460.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "e55879a8-0ef8-4557-abcf-ab34c53ec460", "name": "Amazon Seller Partner", "dockerRepository": "airbyte/source-amazon-seller-partner", - "dockerImageTag": "0.2.13", + "dockerImageTag": "0.2.14", "documentationUrl": "https://docs.airbyte.io/integrations/sources/amazon-seller-partner", "icon": "amazonsellerpartner.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 89b8be04ceb7e..7d535ac41666a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -22,7 +22,7 @@ - name: Amazon Seller Partner sourceDefinitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460 dockerRepository: airbyte/source-amazon-seller-partner - dockerImageTag: 0.2.13 + dockerImageTag: 0.2.14 sourceType: api documentationUrl: https://docs.airbyte.io/integrations/sources/amazon-seller-partner icon: amazonsellerpartner.svg diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 8d3f111959a9a..64a2fd2273fe6 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -157,7 +157,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-amazon-seller-partner:0.2.13" +- dockerImage: "airbyte/source-amazon-seller-partner:0.2.14" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" changelogUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile index c4b24cd2a828d..a4d3bbabe01b2 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.13 +LABEL io.airbyte.version=0.2.14 LABEL io.airbyte.name=airbyte/source-amazon-seller-partner diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_flat_file_all_orders_data_by_last_update_general.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_flat_file_all_orders_data_by_last_update_general.json new file mode 100644 index 0000000000000..10a33012e2ead --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_flat_file_all_orders_data_by_last_update_general.json @@ -0,0 +1,118 @@ +{ + "streams": [ + { + "stream": { + "name": "GET_FLAT_FILE_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL", + "json_schema": { + "title": "Flat File All Orders Data Reports (by last update)", + "description": "Flat File All Orders Data by Last Update Date General Reports", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "amazon-order-id": { + "type": "string" + }, + "asin": { + "type": ["null", "string"] + }, + "currency": { + "type": ["null", "string"] + }, + "fulfillment-channel": { + "type": ["null", "string"] + }, + "gift-wrap-price": { + "type": ["null", "string"] + }, + "gift-wrap-tax": { + "type": ["null", "string"] + }, + "is-business-order": { + "type": ["null", "string"] + }, + "item-price": { + "type": ["null", "string"] + }, + "item-promotion-discount": { + "type": ["null", "string"] + }, + "item-status": { + "type": ["null", "string"] + }, + "item-tax": { + "type": ["null", "string"] + }, + "last-updated-date": { + "type": "string", + "format": "date-time" + }, + "merchant-order-id": { + "type": ["null", "string"] + }, + "order-channel": { + "type": ["null", "string"] + }, + "order-status": { + "type": ["null", "string"] + }, + "price-designation": { + "type": ["null", "string"] + }, + "product-name": { + "type": ["null", "string"] + }, + "promotion-ids": { + "type": ["null", "string"] + }, + "purchase-date": { + "type": ["null", "string"], + "format": "date-time" + }, + "purchase-order-number": { + "type": ["null", "string"] + }, + "quantity": { + "type": ["null", "string"] + }, + "sales-channel": { + "type": ["null", "string"] + }, + "ship-city": { + "type": ["null", "string"] + }, + "ship-country": { + "type": ["null", "string"] + }, + "ship-postal-code": { + "type": ["null", "string"] + }, + "ship-promotion-discount": { + "type": ["null", "string"] + }, + "ship-service-level": { + "type": ["null", "string"] + }, + "ship-state": { + "type": ["null", "string"] + }, + "shipping-price": { + "type": ["null", "string"] + }, + "shipping-tax": { + "type": ["null", "string"] + }, + "sku": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["last-updated-date"] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append", + "cursor_field": ["last-updated-date"] + } + ] +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json index bedd80958e13b..01cd8190917fc 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json @@ -5,6 +5,9 @@ "GET_FLAT_FILE_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL": { "createdTime": "2021-07-01T00:00:00Z" }, + "GET_FLAT_FILE_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL": { + "last-updated-date": "2021-07-01T00:00:00+00:00" + }, "GET_MERCHANT_LISTINGS_ALL_DATA": { "createdTime": "2021-07-01T00:00:00Z" }, diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FLAT_FILE_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FLAT_FILE_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL.json new file mode 100644 index 0000000000000..f9fd0947e7ff2 --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FLAT_FILE_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL.json @@ -0,0 +1,103 @@ +{ + "title": "Flat File All Orders Data Reports (by last update)", + "description": "Flat File All Orders Data by Last Update Date General Reports", + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "amazon-order-id": { + "type": "string" + }, + "asin": { + "type": ["null", "string"] + }, + "currency": { + "type": ["null", "string"] + }, + "fulfillment-channel": { + "type": ["null", "string"] + }, + "gift-wrap-price": { + "type": ["null", "string"] + }, + "gift-wrap-tax": { + "type": ["null", "string"] + }, + "is-business-order": { + "type": ["null", "string"] + }, + "item-price": { + "type": ["null", "string"] + }, + "item-promotion-discount": { + "type": ["null", "string"] + }, + "item-status": { + "type": ["null", "string"] + }, + "item-tax": { + "type": ["null", "string"] + }, + "last-updated-date": { + "type": "string", + "format": "date-time" + }, + "merchant-order-id": { + "type": ["null", "string"] + }, + "order-channel": { + "type": ["null", "string"] + }, + "order-status": { + "type": ["null", "string"] + }, + "price-designation": { + "type": ["null", "string"] + }, + "product-name": { + "type": ["null", "string"] + }, + "promotion-ids": { + "type": ["null", "string"] + }, + "purchase-date": { + "type": ["null", "string"], + "format": "date-time" + }, + "purchase-order-number": { + "type": ["null", "string"] + }, + "quantity": { + "type": ["null", "string"] + }, + "sales-channel": { + "type": ["null", "string"] + }, + "ship-city": { + "type": ["null", "string"] + }, + "ship-country": { + "type": ["null", "string"] + }, + "ship-postal-code": { + "type": ["null", "string"] + }, + "ship-promotion-discount": { + "type": ["null", "string"] + }, + "ship-service-level": { + "type": ["null", "string"] + }, + "ship-state": { + "type": ["null", "string"] + }, + "shipping-price": { + "type": ["null", "string"] + }, + "shipping-tax": { + "type": ["null", "string"] + }, + "sku": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py index e46d83e575f3f..caa8d1b4b96e5 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py @@ -25,6 +25,7 @@ FbaShipmentsReports, FlatFileOpenListingsReports, FlatFileOrdersReports, + FlatFileOrdersReportsByLastUpdate, FulfilledShipmentsReports, MerchantListingsReports, Orders, @@ -148,6 +149,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: FbaShipmentsReports(**stream_kwargs), FlatFileOpenListingsReports(**stream_kwargs), FlatFileOrdersReports(**stream_kwargs), + FlatFileOrdersReportsByLastUpdate(**stream_kwargs), FulfilledShipmentsReports(**stream_kwargs), MerchantListingsReports(**stream_kwargs), VendorDirectFulfillmentShipping(**stream_kwargs), diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py index c91c815e45843..4a6dd4b66e9b3 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py @@ -604,6 +604,15 @@ def parse_document(document): return reader +class FlatFileOrdersReportsByLastUpdate(IncrementalReportsAmazonSPStream): + """ + Field definitions: https://sellercentral.amazon.com/gp/help/help.html?itemID=201648780 + """ + + name = "GET_FLAT_FILE_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL" + cursor_field = "last-updated-date" + + class Orders(IncrementalAmazonSPStream): """ API docs: https://github.com/amzn/selling-partner-api-docs/blob/main/references/orders-api/ordersV0.md diff --git a/docs/integrations/sources/amazon-seller-partner.md b/docs/integrations/sources/amazon-seller-partner.md index f2aeb90f4dc02..0bcbbe6fca0df 100644 --- a/docs/integrations/sources/amazon-seller-partner.md +++ b/docs/integrations/sources/amazon-seller-partner.md @@ -13,7 +13,7 @@ This source syncs data from the [Amazon Seller Partner API](https://github.com/a This source is capable of syncing the following streams: -- [Order Reports](https://sellercentral.amazon.com/gp/help/help.html?itemID=201648780) +- [Order Report (by order date and by last update)](https://sellercentral.amazon.com/gp/help/help.html?itemID=201648780) - [All Listings](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#inventory-reports) - [FBA Inventory Reports](https://sellercentral.amazon.com/gp/help/200740930) - [Amazon-Fulfilled Shipments Report](https://sellercentral.amazon.com/gp/help/help.html?itemID=200453120) @@ -67,7 +67,8 @@ Information about rate limits you may find [here](https://github.com/amzn/sellin | Version | Date | Pull Request | Subject | | :------- | :--------- | :------------------------------------------------------- | :--------------------------------------------------------------------- | -| `0.2.13` | 2022-01-18 | [\#9581](https://github.com/airbytehq/airbyte/pull/9581) | Change createdSince parameter to dataStartTime | +| `0.2.14` | 2022-01-19 | [\#9621](https://github.com/airbytehq/airbyte/pull/9621) | Add GET_FLAT_FILE_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL report | +| `0.2.13` | 2022-01-18 | [\#9581](https://github.com/airbytehq/airbyte/pull/9581) | Change createdSince parameter to dataStartTime | | `0.2.12` | 2022-01-05 | [\#9312](https://github.com/airbytehq/airbyte/pull/9312) | Add all remaining brand analytics report streams | | `0.2.11` | 2022-01-05 | [\#9115](https://github.com/airbytehq/airbyte/pull/9115) | Fix reading only 100 orders | | `0.2.10` | 2021-12-31 | [\#9236](https://github.com/airbytehq/airbyte/pull/9236) | Fix NoAuth deprecation warning | From 6490bf7b7fb29954182d4e640185bfb1803d7aee Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Fri, 21 Jan 2022 12:18:38 -0800 Subject: [PATCH 190/215] fix constructor for connector builds (#9704) --- .../standardtest/destination/DestinationAcceptanceTest.java | 2 +- .../standardtest/source/AbstractSourceConnectorTest.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java index 52f4633168f91..c5c4d9107a02b 100644 --- a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java +++ b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java @@ -310,7 +310,7 @@ void setUpInternal() throws Exception { setup(testEnv); - processFactory = new DockerProcessFactory(workerConfigs, workspaceRoot, workspaceRoot.toString(), localRoot.toString(), "host", false); + processFactory = new DockerProcessFactory(workerConfigs, workspaceRoot, workspaceRoot.toString(), localRoot.toString(), "host"); } @AfterEach diff --git a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/AbstractSourceConnectorTest.java b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/AbstractSourceConnectorTest.java index fa0c0e888c062..02aeaacb3fde8 100644 --- a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/AbstractSourceConnectorTest.java +++ b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/AbstractSourceConnectorTest.java @@ -118,8 +118,7 @@ public void setUpInternal() throws Exception { workspaceRoot, workspaceRoot.toString(), localRoot.toString(), - "host", - false); + "host"); } @AfterEach From 85299282c74c82b0e944c2569e8d5511b5d09e2b Mon Sep 17 00:00:00 2001 From: Anna Lvova <37615075+annalvova05@users.noreply.github.com> Date: Fri, 21 Jan 2022 22:01:25 +0100 Subject: [PATCH 191/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20TikTok=20Market?= =?UTF-8?q?ing:=20support=20oauth=20flow=20(#7636)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add oauth * bump version * upd schema * upd oauth in spec * add java part * change oauth params * change spec * change spec * upd java * upd spec * upd extractOAuthOutput * upd spec.py * upd spec.py * bump version * remove spec.json * upd _prepare_stream_args after review * format * format * isort * bump version --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 179 +++++++++++++----- .../source-tiktok-marketing/Dockerfile | 2 +- .../acceptance-test-config.yml | 16 ++ .../acceptance-test-docker.sh | 1 - .../source-tiktok-marketing/bootstrap.md | 2 - .../invalid_config_access_token.json | 8 + .../invalid_config_oauth.json | 8 + .../integration_tests/spec.json | 178 ++++++++++++----- .../source-tiktok-marketing/setup.py | 8 +- .../source_tiktok_marketing/source.py | 56 ++++-- .../source_tiktok_marketing/spec.py | 84 +++++--- .../source_tiktok_marketing/streams.py | 20 +- .../unit_tests/unit_test.py | 11 +- .../oauth/OAuthImplementationFactory.java | 1 + .../oauth/flows/TikTokMarketingOAuthFlow.java | 99 ++++++++++ docs/integrations/sources/tiktok-marketing.md | 11 +- 17 files changed, 522 insertions(+), 164 deletions(-) create mode 100644 airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/invalid_config_access_token.json create mode 100644 airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/invalid_config_oauth.json create mode 100644 airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TikTokMarketingOAuthFlow.java diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 7d535ac41666a..8cb515efb9b54 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -724,7 +724,7 @@ - name: TikTok Marketing sourceDefinitionId: 4bfac00d-ce15-44ff-95b9-9e3c3e8fbd35 dockerRepository: airbyte/source-tiktok-marketing - dockerImageTag: 0.1.3 + dockerImageTag: 0.1.4 documentationUrl: https://docs.airbyte.io/integrations/sources/tiktok-marketing icon: tiktok.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 64a2fd2273fe6..c8a2ae1581c24 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -7536,7 +7536,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-tiktok-marketing:0.1.3" +- dockerImage: "airbyte/source-tiktok-marketing:0.1.4" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/tiktok-marketing" changelogUrl: "https://docs.airbyte.io/integrations/sources/tiktok-marketing" @@ -7544,21 +7544,74 @@ title: "TikTok Marketing Source Spec" type: "object" properties: - environment: - title: "Environment" - default: "Production" - order: 2 + start_date: + title: "Start Date" + description: "The Start Date in format: YYYY-MM-DD. Any data before this\ + \ date will not be replicated. If this parameter is not set, all data\ + \ will be replicated." + default: "2016-09-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + order: 0 + type: "string" + report_granularity: + title: "Report Granularity" + description: "Which time granularity should be grouped by; for LIFETIME\ + \ there will be no grouping. This option is used for reports' streams\ + \ only." + default: "DAY" + enum: + - "LIFETIME" + - "DAY" + - "HOUR" + order: 1 + type: "string" + credentials: + title: "Authorization Method" + default: {} + order: 3 type: "object" oneOf: - - title: "Production" + - title: "OAuth2.0" type: "object" properties: - environment: - title: "Environment" - const: "prod" + auth_type: + title: "Auth Type" + const: "oauth2.0" + order: 0 + enum: + - "oauth2.0" type: "string" app_id: - title: "App Id" + title: "App ID" + description: "The App ID applied by the developer." + airbyte_secret: true + type: "string" + secret: + title: "Secret" + description: "The private key of the developer's application." + airbyte_secret: true + type: "string" + access_token: + title: "Access Token" + description: "Long-term Authorized Access Token." + airbyte_secret: true + type: "string" + required: + - "app_id" + - "secret" + - "access_token" + - title: "Production Access Token" + type: "object" + properties: + auth_type: + title: "Auth Type" + const: "prod_access_token" + order: 0 + enum: + - "prod_access_token" + type: "string" + app_id: + title: "App ID" description: "The App ID applied by the developer." type: "string" secret: @@ -7566,52 +7619,38 @@ description: "The private key of the developer application." airbyte_secret: true type: "string" + access_token: + title: "Access Token" + description: "The Long-term Authorized Access Token." + airbyte_secret: true + type: "string" required: - "app_id" - "secret" - - title: "Sandbox" + - "access_token" + - title: "Sandbox Access Token" type: "object" properties: - environment: - title: "Environment" - const: "sandbox" + auth_type: + title: "Auth Type" + const: "sandbox_access_token" + order: 0 + enum: + - "sandbox_access_token" type: "string" advertiser_id: - title: "Advertiser Id" + title: "Advertiser ID" description: "The Advertiser ID which generated for the developer's\ \ Sandbox application." type: "string" + access_token: + title: "Access Token" + description: "The Long-term Authorized Access Token." + airbyte_secret: true + type: "string" required: - "advertiser_id" - access_token: - title: "Access Token" - description: "The Long-term Authorized Access Token." - order: 1 - airbyte_secret: true - type: "string" - start_date: - title: "Start Date" - description: "The Start Date in format: YYYY-MM-DD. Any data before this\ - \ date will not be replicated. If this parameter is not set, all data\ - \ will be replicated." - default: "2016-09-01" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - order: 3 - type: "string" - report_granularity: - title: "Report Granularity" - description: "Which time granularity should be grouped by; for LIFETIME\ - \ there will be no grouping. This option is used for reports' streams\ - \ only." - default: "DAY" - enum: - - "LIFETIME" - - "DAY" - - "HOUR" - order: 4 - type: "string" - required: - - "access_token" + - "access_token" supportsIncremental: true supportsNormalization: false supportsDBT: false @@ -7619,6 +7658,58 @@ - "overwrite" - "append" - "append_dedup" + advanced_auth: + auth_flow_type: "oauth2.0" + predicate_key: + - "credentials" + - "auth_type" + predicate_value: "oauth2.0" + oauth_config_specification: + complete_oauth_output_specification: + title: "CompleteOauthOutputSpecification" + type: "object" + properties: + access_token: + title: "Access Token" + path_in_connector_config: + - "credentials" + - "access_token" + type: "string" + required: + - "access_token" + complete_oauth_server_input_specification: + title: "CompleteOauthServerInputSpecification" + type: "object" + properties: + app_id: + title: "App Id" + type: "string" + secret: + title: "Secret" + type: "string" + required: + - "app_id" + - "secret" + complete_oauth_server_output_specification: + title: "CompleteOauthServerOutputSpecification" + type: "object" + properties: + app_id: + title: "App Id" + path_in_connector_config: + - "credentials" + - "app_id" + type: "string" + secret: + title: "Secret" + path_in_connector_config: + - "credentials" + - "secret" + type: "string" + required: + - "app_id" + - "secret" + additionalProperties: true - dockerImage: "airbyte/source-trello:0.1.6" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/trello" diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/Dockerfile b/airbyte-integrations/connectors/source-tiktok-marketing/Dockerfile index 9b6c89eafe868..6b72d87400746 100644 --- a/airbyte-integrations/connectors/source-tiktok-marketing/Dockerfile +++ b/airbyte-integrations/connectors/source-tiktok-marketing/Dockerfile @@ -32,5 +32,5 @@ COPY source_tiktok_marketing ./source_tiktok_marketing ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-tiktok-marketing diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/acceptance-test-config.yml b/airbyte-integrations/connectors/source-tiktok-marketing/acceptance-test-config.yml index c7287e6b34eb3..23c0b2df5158d 100644 --- a/airbyte-integrations/connectors/source-tiktok-marketing/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-tiktok-marketing/acceptance-test-config.yml @@ -10,17 +10,33 @@ tests: status: "succeed" - config_path: "secrets/config.json" status: "succeed" + - config_path: "secrets/new_config_prod.json" + status: "succeed" + - config_path: "secrets/new_config_sandbox.json" + status: "succeed" + - config_path: "secrets/config_oauth.json" + status: "succeed" - config_path: "integration_tests/invalid_config.json" status: "failed" + - config_path: "integration_tests/invalid_config_access_token.json" + status: "failed" + - config_path: "integration_tests/invalid_config_oauth.json" + status: "failed" discovery: - config_path: "secrets/config.json" - config_path: "secrets/prod_config.json" + - config_path: "secrets/new_config_prod.json" + - config_path: "secrets/config_oauth.json" basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" empty_streams: [ "ads" , "ads_reports", "ad_groups_reports", "campaigns_reports" ] - config_path: "secrets/prod_config.json" configured_catalog_path: "integration_tests/configured_prod_catalog.json" + - config_path: "secrets/new_config_prod.json" + configured_catalog_path: "integration_tests/configured_prod_catalog.json" + - config_path: "secrets/config_oauth.json" + configured_catalog_path: "integration_tests/configured_prod_catalog.json" incremental: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-tiktok-marketing/acceptance-test-docker.sh index 817da8ed3edf5..10fb6991808fc 100644 --- a/airbyte-integrations/connectors/source-tiktok-marketing/acceptance-test-docker.sh +++ b/airbyte-integrations/connectors/source-tiktok-marketing/acceptance-test-docker.sh @@ -13,4 +13,3 @@ docker run --rm -it \ -v $(pwd):/test_input \ airbyte/source-acceptance-test:latest \ --acceptance-test-config /test_input - diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/bootstrap.md b/airbyte-integrations/connectors/source-tiktok-marketing/bootstrap.md index 5dccedf09b5e8..95bf87a6de807 100644 --- a/airbyte-integrations/connectors/source-tiktok-marketing/bootstrap.md +++ b/airbyte-integrations/connectors/source-tiktok-marketing/bootstrap.md @@ -11,5 +11,3 @@ The basic entity is 'advertiser'. All other streams use this required parameter Dependent streams have required parameter advertiser_id. As cursor field this connector uses "modify_time" values. But endpoints don't provide any mechanism for correct data filtering and sorting thus for incremental sync this connector tries to load all data and to validate a cursor field value on own side. - - diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/invalid_config_access_token.json b/airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/invalid_config_access_token.json new file mode 100644 index 0000000000000..cc955b8aa2b7f --- /dev/null +++ b/airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/invalid_config_access_token.json @@ -0,0 +1,8 @@ +{ + "credentials": { + "auth_type": "sandbox_access_token", + "access_token": "", + "start_date": "2021-01-01", + "advertiser_id": "11111111" + } +} diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/invalid_config_oauth.json b/airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/invalid_config_oauth.json new file mode 100644 index 0000000000000..e9b101630545e --- /dev/null +++ b/airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/invalid_config_oauth.json @@ -0,0 +1,8 @@ +{ + "credentials": { + "auth_type": "oauth2.0", + "app_id": "1", + "secret": "2", + "access_token": "" + } +} diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/spec.json b/airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/spec.json index 236e6dd2efea9..d9e55db849cbb 100644 --- a/airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-tiktok-marketing/integration_tests/spec.json @@ -5,22 +5,73 @@ "title": "TikTok Marketing Source Spec", "type": "object", "properties": { - "environment": { - "title": "Environment", - "default": "Production", - "order": 2, + "start_date": { + "title": "Start Date", + "description": "The Start Date in format: YYYY-MM-DD. Any data before this date will not be replicated. If this parameter is not set, all data will be replicated.", + "default": "2016-09-01", + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", + "order": 0, + "type": "string" + }, + "report_granularity": { + "title": "Report Granularity", + "description": "Which time granularity should be grouped by; for LIFETIME there will be no grouping. This option is used for reports' streams only.", + "default": "DAY", + "enum": ["LIFETIME", "DAY", "HOUR"], + "order": 1, + "type": "string" + }, + "credentials": { + "title": "Authorization Method", + "default": {}, + "order": 3, + "type": "object", "oneOf": [ { - "title": "Production", + "title": "OAuth2.0", "type": "object", "properties": { - "environment": { - "title": "Environment", - "const": "prod", + "auth_type": { + "title": "Auth Type", + "const": "oauth2.0", + "order": 0, + "enum": ["oauth2.0"], "type": "string" }, "app_id": { - "title": "App Id", + "title": "App ID", + "description": "The App ID applied by the developer.", + "airbyte_secret": true, + "type": "string" + }, + "secret": { + "title": "Secret", + "description": "The private key of the developer's application.", + "airbyte_secret": true, + "type": "string" + }, + "access_token": { + "title": "Access Token", + "description": "Long-term Authorized Access Token.", + "airbyte_secret": true, + "type": "string" + } + }, + "required": ["app_id", "secret", "access_token"] + }, + { + "title": "Production Access Token", + "type": "object", + "properties": { + "auth_type": { + "title": "Auth Type", + "const": "prod_access_token", + "order": 0, + "enum": ["prod_access_token"], + "type": "string" + }, + "app_id": { + "title": "App ID", "description": "The App ID applied by the developer.", "type": "string" }, @@ -29,56 +80,97 @@ "description": "The private key of the developer application.", "airbyte_secret": true, "type": "string" + }, + "access_token": { + "title": "Access Token", + "description": "The Long-term Authorized Access Token.", + "airbyte_secret": true, + "type": "string" } }, - "required": ["app_id", "secret"] + "required": ["app_id", "secret", "access_token"] }, { - "title": "Sandbox", + "title": "Sandbox Access Token", "type": "object", "properties": { - "environment": { - "title": "Environment", - "const": "sandbox", + "auth_type": { + "title": "Auth Type", + "const": "sandbox_access_token", + "order": 0, + "enum": ["sandbox_access_token"], "type": "string" }, "advertiser_id": { - "title": "Advertiser Id", + "title": "Advertiser ID", "description": "The Advertiser ID which generated for the developer's Sandbox application.", "type": "string" + }, + "access_token": { + "title": "Access Token", + "description": "The Long-term Authorized Access Token.", + "airbyte_secret": true, + "type": "string" } }, - "required": ["advertiser_id"] + "required": ["advertiser_id", "access_token"] } - ], - "type": "object" - }, - "access_token": { - "title": "Access Token", - "description": "The Long-term Authorized Access Token.", - "order": 1, - "airbyte_secret": true, - "type": "string" + ] + } + } + }, + "supportsIncremental": true, + "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], + "advanced_auth": { + "auth_flow_type": "oauth2.0", + "predicate_key": ["credentials", "auth_type"], + "predicate_value": "oauth2.0", + "oauth_config_specification": { + "complete_oauth_output_specification": { + "title": "CompleteOauthOutputSpecification", + "type": "object", + "properties": { + "access_token": { + "title": "Access Token", + "path_in_connector_config": ["credentials", "access_token"], + "type": "string" + } + }, + "required": ["access_token"] }, - "start_date": { - "title": "Start Date", - "description": "The Start Date in format: YYYY-MM-DD. Any data before this date will not be replicated. If this parameter is not set, all data will be replicated.", - "default": "2016-09-01", - "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", - "order": 3, - "type": "string" + "complete_oauth_server_input_specification": { + "title": "CompleteOauthServerInputSpecification", + "type": "object", + "properties": { + "app_id": { + "title": "App Id", + "type": "string" + }, + "secret": { + "title": "Secret", + "type": "string" + } + }, + "required": ["app_id", "secret"] }, - "report_granularity": { - "title": "Report Granularity", - "description": "Which time granularity should be grouped by; for LIFETIME there will be no grouping. This option is used for reports' streams only.", - "default": "DAY", - "enum": ["LIFETIME", "DAY", "HOUR"], - "order": 4, - "type": "string" + "complete_oauth_server_output_specification": { + "title": "CompleteOauthServerOutputSpecification", + "type": "object", + "properties": { + "app_id": { + "title": "App Id", + "path_in_connector_config": ["credentials", "app_id"], + "type": "string" + }, + "secret": { + "title": "Secret", + "path_in_connector_config": ["credentials", "secret"], + "type": "string" + } + }, + "required": ["app_id", "secret"] } - }, - "required": ["access_token"] + } }, - "supportsIncremental": true, - "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"] + "additionalProperties": true } diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/setup.py b/airbyte-integrations/connectors/source-tiktok-marketing/setup.py index e94f26b43a5da..97df49b1e6f6c 100644 --- a/airbyte-integrations/connectors/source-tiktok-marketing/setup.py +++ b/airbyte-integrations/connectors/source-tiktok-marketing/setup.py @@ -5,9 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = [ - "airbyte-cdk~=0.1.42", -] +MAIN_REQUIREMENTS = ["airbyte-cdk~=0.1.42"] TEST_REQUIREMENTS = ["pytest~=6.1", "source-acceptance-test", "requests-mock==1.9.3", "timeout-decorator==0.5.0"] @@ -19,7 +17,5 @@ packages=find_packages(), install_requires=MAIN_REQUIREMENTS, package_data={"": ["*.json"]}, - extras_require={ - "tests": TEST_REQUIREMENTS, - }, + extras_require={"tests": TEST_REQUIREMENTS}, ) diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/source.py b/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/source.py index 3eaa05ad70b62..6cc3da7b4fbe7 100644 --- a/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/source.py +++ b/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/source.py @@ -5,25 +5,21 @@ from typing import Any, List, Mapping, Tuple from airbyte_cdk.logger import AirbyteLogger -from airbyte_cdk.models import ConnectorSpecification, SyncMode +from airbyte_cdk.models import (AdvancedAuth, AuthFlowType, + ConnectorSpecification, + OAuthConfigSpecification, SyncMode) from airbyte_cdk.models.airbyte_protocol import DestinationSyncMode from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator -from .spec import SourceTiktokMarketingSpec -from .streams import ( - DEFAULT_START_DATE, - AdGroups, - AdGroupsReports, - Ads, - AdsReports, - Advertisers, - AdvertisersReports, - Campaigns, - CampaignsReports, - ReportGranularity, -) +from .spec import (CompleteOauthOutputSpecification, + CompleteOauthServerInputSpecification, + CompleteOauthServerOutputSpecification, + SourceTiktokMarketingSpec) +from .streams import (DEFAULT_START_DATE, AdGroups, AdGroupsReports, Ads, + AdsReports, Advertisers, AdvertisersReports, Campaigns, + CampaignsReports, ReportGranularity) DOCUMENTATION_URL = "https://docs.airbyte.io/integrations/sources/tiktok-marketing" @@ -50,17 +46,41 @@ def spec(self, *args, **kwargs) -> ConnectorSpecification: supportsIncremental=True, supported_destination_sync_modes=[DestinationSyncMode.overwrite, DestinationSyncMode.append, DestinationSyncMode.append_dedup], connectionSpecification=SourceTiktokMarketingSpec.schema(), + additionalProperties=True, + advanced_auth=AdvancedAuth( + auth_flow_type=AuthFlowType.oauth2_0, + predicate_key=["credentials", "auth_type"], + predicate_value="oauth2.0", + oauth_config_specification=OAuthConfigSpecification( + complete_oauth_output_specification=CompleteOauthOutputSpecification.schema(), + complete_oauth_server_input_specification=CompleteOauthServerInputSpecification.schema(), + complete_oauth_server_output_specification=CompleteOauthServerOutputSpecification.schema(), + ), + ), ) @staticmethod def _prepare_stream_args(config: Mapping[str, Any]) -> Mapping[str, Any]: """Converts an input configure to stream arguments""" + credentials = config.get("credentials") + if credentials: + # used for new config format + access_token = credentials["access_token"] + secret = credentials.get("secret") + app_id = int(credentials.get("app_id", 0)) + advertiser_id = int(credentials.get("advertiser_id", 0)) + else: + access_token = config["access_token"] + secret = config.get("environment", {}).get("secret") + app_id = int(config.get("environment", {}).get("app_id", 0)) + advertiser_id = int(config.get("environment", {}).get("advertiser_id", 0)) + return { - "authenticator": TiktokTokenAuthenticator(config["access_token"]), + "authenticator": TiktokTokenAuthenticator(access_token), "start_date": config.get("start_date") or DEFAULT_START_DATE, - "advertiser_id": int(config["environment"].get("advertiser_id", 0)), - "app_id": int(config["environment"].get("app_id", 0)), - "secret": config["environment"].get("secret"), + "advertiser_id": advertiser_id, + "app_id": app_id, + "secret": secret, } def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, any]: diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/spec.py b/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/spec.py index d1648f5e16b75..e393ae2ba73c6 100644 --- a/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/spec.py +++ b/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/spec.py @@ -8,67 +8,89 @@ from typing import Union from jsonschema import RefResolver + from pydantic import BaseModel, Field from .streams import DEFAULT_START_DATE, ReportGranularity +class OauthCredSpec(BaseModel): + class Config: + title = "OAuth2.0" + + auth_type: str = Field(default="oauth2.0", const=True, order=0, enum=["oauth2.0"]) + + app_id: str = Field(title="App ID", description="The App ID applied by the developer.", airbyte_secret=True) + + secret: str = Field(title="Secret", description="The private key of the developer's application.", airbyte_secret=True) + + access_token: str = Field(title="Access Token", description="Long-term Authorized Access Token.", airbyte_secret=True) + + class SandboxEnvSpec(BaseModel): class Config: - title = "Sandbox" + title = "Sandbox Access Token" - environment: str = Field("sandbox", const=True) + auth_type: str = Field(default="sandbox_access_token", const=True, order=0, enum=["sandbox_access_token"]) # it is string because UI has the bug https://github.com/airbytehq/airbyte/issues/6875 advertiser_id: str = Field( - description="The Advertiser ID which generated for the developer's Sandbox application.", + title="Advertiser ID", description="The Advertiser ID which generated for the developer's Sandbox application." ) + access_token: str = Field(title="Access Token", description="The Long-term Authorized Access Token.", airbyte_secret=True) + class ProductionEnvSpec(BaseModel): class Config: - title = "Production" + title = "Production Access Token" - environment: str = Field("prod", const=True) + auth_type: str = Field(default="prod_access_token", const=True, order=0, enum=["prod_access_token"]) # it is float because UI has the bug https://github.com/airbytehq/airbyte/issues/6875 - app_id: str = Field( - description="The App ID applied by the developer.", - ) - secret: str = Field(description="The private key of the developer application.", airbyte_secret=True) + app_id: str = Field(description="The App ID applied by the developer.", title="App ID") + secret: str = Field(title="Secret", description="The private key of the developer application.", airbyte_secret=True) + + access_token: str = Field(title="Access Token", description="The Long-term Authorized Access Token.", airbyte_secret=True) class SourceTiktokMarketingSpec(BaseModel): class Config: title = "TikTok Marketing Source Spec" - environment: Union[ProductionEnvSpec, SandboxEnvSpec] = Field(default=ProductionEnvSpec.Config.title, order=2) - - access_token: str = Field(description="The Long-term Authorized Access Token.", order=1, airbyte_secret=True) - start_date: str = Field( - description="The Start Date in format: YYYY-MM-DD. Any data before this date will not be replicated. If this parameter is not set, all data will be replicated.", + title="Start Date", default=DEFAULT_START_DATE, pattern="^[0-9]{4}-[0-9]{2}-[0-9]{2}$", - order=3, + description="The Start Date in format: YYYY-MM-DD. Any data before this date will not be replicated. " + "If this parameter is not set, all data will be replicated.", + order=0, ) report_granularity: str = Field( + title="Report Granularity", description="Which time granularity should be grouped by; for LIFETIME there will be no grouping. " "This option is used for reports' streams only.", default=ReportGranularity.default().value, enum=[g.value for g in ReportGranularity], - order=4, + order=1, ) - @staticmethod - def change_format_to_oneOf(schema: dict, field_name: str) -> dict: - - schema["properties"][field_name]["type"] = "object" - if "oneOf" not in schema["properties"][field_name]: - schema["properties"][field_name]["oneOf"] = schema["properties"][field_name].pop("anyOf") + credentials: Union[OauthCredSpec, ProductionEnvSpec, SandboxEnvSpec] = Field( + title="Authorization Method", order=3, default={}, type="object" + ) - return schema + @classmethod + def change_format_to_oneOf(cls, schema: dict) -> dict: + new_schema = {} + for key, value in schema.items(): + if isinstance(value, dict): + value = cls.change_format_to_oneOf(value) + if key == "anyOf": + new_schema["oneOf"] = value + else: + new_schema[key] = value + return new_schema @staticmethod def resolve_refs(schema: dict) -> dict: @@ -85,5 +107,19 @@ def resolve_refs(schema: dict) -> dict: def schema(cls) -> dict: """we're overriding the schema classmethod to enable some post-processing""" schema = super().schema() - schema = cls.change_format_to_oneOf(schema, "environment") + schema = cls.change_format_to_oneOf(schema) return cls.resolve_refs(schema) + + +class CompleteOauthOutputSpecification(BaseModel): + access_token: str = Field(path_in_connector_config=["credentials", "access_token"]) + + +class CompleteOauthServerInputSpecification(BaseModel): + app_id: str = Field() + secret: str = Field() + + +class CompleteOauthServerOutputSpecification(BaseModel): + app_id: str = Field(path_in_connector_config=["credentials", "app_id"]) + secret: str = Field(path_in_connector_config=["credentials", "secret"]) diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/streams.py b/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/streams.py index 61924938d6328..79434d53c0152 100644 --- a/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/streams.py +++ b/airbyte-integrations/connectors/source-tiktok-marketing/source_tiktok_marketing/streams.py @@ -9,17 +9,20 @@ from decimal import Decimal from enum import Enum from functools import total_ordering -from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Tuple, TypeVar, Union +from typing import (Any, Dict, Iterable, List, Mapping, MutableMapping, + Optional, Tuple, TypeVar, Union) import pendulum -import pydantic import requests + +import pydantic from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams.core import package_name_from_class from airbyte_cdk.sources.streams.http import HttpStream from airbyte_cdk.sources.streams.http.auth import NoAuth from airbyte_cdk.sources.utils.schema_helpers import ResourceSchemaLoader -from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer +from airbyte_cdk.sources.utils.transform import (TransformConfig, + TypeTransformer) # TikTok Initial release date is September 2016 DEFAULT_START_DATE = "2016-09-01" @@ -198,11 +201,7 @@ def request_params( self, stream_state: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None, **kwargs ) -> MutableMapping[str, Any]: - return { - "access_token": self._access_token, - "secret": self._secret, - "app_id": self._app_id, - } + return {"access_token": self._access_token, "secret": self._secret, "app_id": self._app_id} def path(self, *args, **kwargs) -> str: return "oauth2/advertiser/get/" @@ -455,10 +454,7 @@ def _get_reporting_dimensions(self): ReportLevel.ADGROUP: "adgroup_id", ReportLevel.AD: "ad_id", } - spec_time_dimensions = { - ReportGranularity.DAY: "stat_time_day", - ReportGranularity.HOUR: "stat_time_hour", - } + spec_time_dimensions = {ReportGranularity.DAY: "stat_time_day", ReportGranularity.HOUR: "stat_time_hour"} if self.report_level and self.report_level in spec_id_dimensions: result.append(spec_id_dimensions[self.report_level]) diff --git a/airbyte-integrations/connectors/source-tiktok-marketing/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-tiktok-marketing/unit_tests/unit_test.py index 87ff4b5971408..594c8d3c08546 100644 --- a/airbyte-integrations/connectors/source-tiktok-marketing/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-tiktok-marketing/unit_tests/unit_test.py @@ -7,10 +7,12 @@ from typing import Any, Dict, Iterable, List, Mapping, Tuple import pendulum + import pytest import requests_mock import timeout_decorator -from airbyte_cdk.sources.streams.http.exceptions import UserDefinedBackoffException +from airbyte_cdk.sources.streams.http.exceptions import \ + UserDefinedBackoffException from source_tiktok_marketing import SourceTiktokMarketing from source_tiktok_marketing.streams import Ads, Advertisers, JsonUpdatedState @@ -59,12 +61,7 @@ def generate_pages(items: List[Mapping[str, Any]], page_size: int, last_empty: b "code": 0, "request_id": "unique_request_id", "data": { - "page_info": { - "total_number": total_number, - "page": page_number, - "page_size": page_size, - "total_page": len(page_items), - }, + "page_info": {"total_number": total_number, "page": page_number, "page_size": page_size, "total_page": len(page_items)}, "list": page_items, }, }, diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java index 2c2498c132c90..8331cef0b68b9 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java @@ -58,6 +58,7 @@ public OAuthImplementationFactory(final ConfigRepository configRepository, final .put("airbyte/source-zendesk-sunshine", new ZendeskSunshineOAuthFlow(configRepository, httpClient)) .put("airbyte/source-mailchimp", new MailchimpOAuthFlow(configRepository, httpClient)) .put("airbyte/source-shopify", new ShopifyOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-tiktok-marketing", new TikTokMarketingOAuthFlow(configRepository, httpClient)) .build(); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TikTokMarketingOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TikTokMarketingOAuthFlow.java new file mode 100644 index 0000000000000..d9f7bddf1358d --- /dev/null +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TikTokMarketingOAuthFlow.java @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.oauth.BaseOAuth2Flow; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.http.HttpClient; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; +import java.util.function.Supplier; +import org.apache.http.client.utils.URIBuilder; + +/** + * Following docs from + * https://ads.tiktok.com/marketing_api/docs?id=1701890912382977&is_new_connect=0&is_new_user=0&rid=sta6fe2yww + */ +public class TikTokMarketingOAuthFlow extends BaseOAuth2Flow { + + private static final String ACCESS_TOKEN_URL = "https://business-api.tiktok.com/open_api/v1.2/oauth2/access_token/"; + + protected String getClientIdUnsafe(final JsonNode oauthConfig) { + return getConfigValueUnsafe(oauthConfig, "app_id"); + } + + protected String getClientSecretUnsafe(final JsonNode oauthConfig) { + return getConfigValueUnsafe(oauthConfig, "secret"); + } + + public TikTokMarketingOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient) { + super(configRepository, httpClient); + } + + @VisibleForTesting + public TikTokMarketingOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient, final Supplier stateSupplier) { + super(configRepository, httpClient, stateSupplier); + } + + @Override + protected String formatConsentUrl(final UUID definitionId, + final String appId, + final String redirectUrl, + final JsonNode inputOAuthConfiguration) + throws IOException { + + final URIBuilder builder = new URIBuilder() + .setScheme("https") + .setHost("ads.tiktok.com") + .setPath("marketing_api/auth") + // required + .addParameter("app_id", appId) + .addParameter("redirect_uri", redirectUrl) + .addParameter("state", getState()); + + try { + return builder.build().toString(); + } catch (final URISyntaxException e) { + throw new IOException("Failed to format Consent URL for OAuth flow", e); + } + } + + @Override + protected Map getAccessTokenQueryParameters(String appId, + String secret, + String authCode, + String redirectUrl) { + return ImmutableMap.builder() + // required + .put("auth_code", authCode) + .put("app_id", appId) + .put("secret", secret) + .build(); + } + + @Override + protected String getAccessTokenUrl(final JsonNode inputOAuthConfiguration) { + return ACCESS_TOKEN_URL; + } + + @Override + protected Map extractOAuthOutput(final JsonNode data, final String accessTokenUrl) throws IOException { + final Map result = new HashMap<>(); + // getting out access_token + if ((data.has("data")) && (data.get("data").has("access_token"))) { + result.put("access_token", data.get("data").get("access_token").asText()); + } else { + throw new IOException(String.format("Missing 'access_token' in query params from %s", accessTokenUrl)); + } + return result; + } + +} diff --git a/docs/integrations/sources/tiktok-marketing.md b/docs/integrations/sources/tiktok-marketing.md index 4f6f572661cd3..001d7bd5df4fd 100644 --- a/docs/integrations/sources/tiktok-marketing.md +++ b/docs/integrations/sources/tiktok-marketing.md @@ -50,8 +50,9 @@ Please read [How to get your AppID, Secret and Access Token](https://ads.tiktok. ## Changelog | Version | Date | Pull Request | Subject | -| :------ | :-------- | :----- | :------ | -| 0.1.3 | 2021-12-10 | [8425](https://github.com/airbytehq/airbyte/pull/8425) | Update title, description fields in spec | -| 0.1.2 | 2021-12-02 | [8292](https://github.com/airbytehq/airbyte/pull/8292) | Support reports | -| 0.1.1 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | -| 0.1.0 | 2021-09-18 | [5887](https://github.com/airbytehq/airbyte/pull/5887) | Release TikTok Marketing CDK Connector | +|:--------|:-----------| :----- | :------ | +| 0.1.4 | 2021-12-30 | [7636](https://github.com/airbytehq/airbyte/pull/7636) | Add OAuth support | +| 0.1.3 | 2021-12-10 | [8425](https://github.com/airbytehq/airbyte/pull/8425) | Update title, description fields in spec | +| 0.1.2 | 2021-12-02 | [8292](https://github.com/airbytehq/airbyte/pull/8292) | Support reports | +| 0.1.1 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | +| 0.1.0 | 2021-09-18 | [5887](https://github.com/airbytehq/airbyte/pull/5887) | Release TikTok Marketing CDK Connector | From 0864c0039fed371f0590ee2ff11e73fea25addfd Mon Sep 17 00:00:00 2001 From: Parker Mossman Date: Fri, 21 Jan 2022 14:48:13 -0800 Subject: [PATCH 192/215] Revert "use attemptNumber instead of attemptId where appropriate (#9671)" (#9706) This reverts commit 0bad09965024329cf47bda43b53989edcb3d20d2. --- .../scheduler/persistence/JobPersistence.java | 4 ++-- .../ConnectionManagerWorkflowImpl.java | 4 ++-- .../JobCreationAndStatusUpdateActivity.java | 4 ++-- ...obCreationAndStatusUpdateActivityImpl.java | 9 +++++---- ...obCreationAndStatusUpdateActivityTest.java | 19 +++++++++---------- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java index c2d8b46d9e05f..6abb06991082b 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java @@ -89,7 +89,7 @@ public interface JobPersistence { * will not be changed if it is already in a terminal state. * * @param jobId job id - * @param attemptNumber attempt number + * @param attemptNumber attempt id * @throws IOException exception due to interaction with persistence */ void failAttempt(long jobId, int attemptNumber) throws IOException; @@ -99,7 +99,7 @@ public interface JobPersistence { * is changed regardless of what state it is in. * * @param jobId job id - * @param attemptNumber attempt number + * @param attemptNumber attempt id * @throws IOException exception due to interaction with persistence */ void succeedAttempt(long jobId, int attemptNumber) throws IOException; diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java index d6c53efdf9c7d..058bb927188d1 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java @@ -185,7 +185,7 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr private void reportSuccess(final ConnectionUpdaterInput connectionUpdaterInput) { jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput( maybeJobId.get(), - connectionUpdaterInput.getAttemptNumber(), + maybeAttemptId.get(), standardSyncOutput.orElse(null))); connectionUpdaterInput.setJobId(null); @@ -196,7 +196,7 @@ private void reportSuccess(final ConnectionUpdaterInput connectionUpdaterInput) private void reportFailure(final ConnectionUpdaterInput connectionUpdaterInput) { jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput( connectionUpdaterInput.getJobId(), - connectionUpdaterInput.getAttemptNumber())); + connectionUpdaterInput.getAttemptId())); final int maxAttempt = configFetchActivity.getMaxAttempt().getMaxAttempt(); final int attemptNumber = connectionUpdaterInput.getAttemptNumber(); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java index 285f349d2cdcb..385f952eb56a8 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java @@ -77,7 +77,7 @@ class AttemptCreationOutput { class JobSuccessInput { private long jobId; - private int attemptNumber; + private int attemptId; private StandardSyncOutput standardSyncOutput; } @@ -110,7 +110,7 @@ class JobFailureInput { class AttemptFailureInput { private long jobId; - private int attemptNumber; + private int attemptId; } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java index 4e9150b8411c2..bfd122a8d8089 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java @@ -110,11 +110,11 @@ public void jobSuccess(final JobSuccessInput input) { try { if (input.getStandardSyncOutput() != null) { final JobOutput jobOutput = new JobOutput().withSync(input.getStandardSyncOutput()); - jobPersistence.writeOutput(input.getJobId(), input.getAttemptNumber(), jobOutput); + jobPersistence.writeOutput(input.getJobId(), input.getAttemptId(), jobOutput); } else { - log.warn("The job {} doesn't have an input for attempt number {}", input.getJobId(), input.getAttemptNumber()); + log.warn("The job {} doesn't have an input for the attempt {}", input.getJobId(), input.getAttemptId()); } - jobPersistence.succeedAttempt(input.getJobId(), input.getAttemptNumber()); + jobPersistence.succeedAttempt(input.getJobId(), input.getAttemptId()); final Job job = jobPersistence.getJob(input.getJobId()); jobNotifier.successJob(job); trackCompletion(job, JobStatus.SUCCEEDED); @@ -138,7 +138,8 @@ public void jobFailure(final JobFailureInput input) { @Override public void attemptFailure(final AttemptFailureInput input) { try { - jobPersistence.failAttempt(input.getJobId(), input.getAttemptNumber()); + jobPersistence.failAttempt(input.getJobId(), input.getAttemptId()); + final Job job = jobPersistence.getJob(input.getJobId()); } catch (final IOException e) { throw new RetryableException(e); } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java index 1eb94fcef1676..6cb059dee1187 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java @@ -72,7 +72,6 @@ public class JobCreationAndStatusUpdateActivityTest { private static final UUID CONNECTION_ID = UUID.randomUUID(); private static final long JOB_ID = 123L; private static final int ATTEMPT_ID = 321; - private static final int ATTEMPT_NUMBER = 2; private static final StandardSyncOutput standardSyncOutput = new StandardSyncOutput() .withStandardSyncSummary( new StandardSyncSummary() @@ -147,11 +146,11 @@ class Update { @Test public void setJobSuccess() throws IOException { - jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_NUMBER, standardSyncOutput)); + jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_ID, standardSyncOutput)); final JobOutput jobOutput = new JobOutput().withSync(standardSyncOutput); - Mockito.verify(mJobPersistence).writeOutput(JOB_ID, ATTEMPT_NUMBER, jobOutput); - Mockito.verify(mJobPersistence).succeedAttempt(JOB_ID, ATTEMPT_NUMBER); + Mockito.verify(mJobPersistence).writeOutput(JOB_ID, ATTEMPT_ID, jobOutput); + Mockito.verify(mJobPersistence).succeedAttempt(JOB_ID, ATTEMPT_ID); Mockito.verify(mJobNotifier).successJob(Mockito.any()); Mockito.verify(mJobtracker).trackSync(Mockito.any(), Mockito.eq(JobState.SUCCEEDED)); } @@ -159,9 +158,9 @@ public void setJobSuccess() throws IOException { @Test public void setJobSuccessWrapException() throws IOException { Mockito.doThrow(new IOException()) - .when(mJobPersistence).succeedAttempt(JOB_ID, ATTEMPT_NUMBER); + .when(mJobPersistence).succeedAttempt(JOB_ID, ATTEMPT_ID); - Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_NUMBER, null))) + Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_ID, null))) .isInstanceOf(RetryableException.class) .hasCauseInstanceOf(IOException.class); } @@ -186,17 +185,17 @@ public void setJobFailureWrapException() throws IOException { @Test public void setAttemptFailure() throws IOException { - jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_NUMBER)); + jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_ID)); - Mockito.verify(mJobPersistence).failAttempt(JOB_ID, ATTEMPT_NUMBER); + Mockito.verify(mJobPersistence).failAttempt(JOB_ID, ATTEMPT_ID); } @Test public void setAttemptFailureWrapException() throws IOException { Mockito.doThrow(new IOException()) - .when(mJobPersistence).failAttempt(JOB_ID, ATTEMPT_NUMBER); + .when(mJobPersistence).failAttempt(JOB_ID, ATTEMPT_ID); - Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_NUMBER))) + Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_ID))) .isInstanceOf(RetryableException.class) .hasCauseInstanceOf(IOException.class); } From 19ea1b66be1727d3d3632162e03c072ff7b98fb6 Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Fri, 21 Jan 2022 15:02:15 -0800 Subject: [PATCH 193/215] Update the cancellation (#9705) --- .../scheduling/ConnectionManagerWorkflowImpl.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java index 058bb927188d1..d07189309050c 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java @@ -167,6 +167,7 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr } else if (workflowState.isCancelled()) { jobCreationAndStatusUpdateActivity.jobCancelled(new JobCancelledInput( maybeJobId.get())); + resetNewConnectionInput(connectionUpdaterInput); } else if (workflowState.isFailed()) { reportFailure(connectionUpdaterInput); } else { @@ -188,9 +189,7 @@ private void reportSuccess(final ConnectionUpdaterInput connectionUpdaterInput) maybeAttemptId.get(), standardSyncOutput.orElse(null))); - connectionUpdaterInput.setJobId(null); - connectionUpdaterInput.setAttemptNumber(1); - connectionUpdaterInput.setFromFailure(false); + resetNewConnectionInput(connectionUpdaterInput); } private void reportFailure(final ConnectionUpdaterInput connectionUpdaterInput) { @@ -212,12 +211,16 @@ private void reportFailure(final ConnectionUpdaterInput connectionUpdaterInput) Workflow.await(Duration.ofMinutes(1), () -> skipScheduling()); - connectionUpdaterInput.setJobId(null); - connectionUpdaterInput.setAttemptNumber(1); - connectionUpdaterInput.setFromFailure(false); + resetNewConnectionInput(connectionUpdaterInput); } } + private void resetNewConnectionInput(ConnectionUpdaterInput connectionUpdaterInput) { + connectionUpdaterInput.setJobId(null); + connectionUpdaterInput.setAttemptNumber(1); + connectionUpdaterInput.setFromFailure(false); + } + @Override public void submitManualSync() { if (workflowState.isRunning()) { From 3d469083cacd3d8657269fc568a60d91246dcd2a Mon Sep 17 00:00:00 2001 From: Octavia Squidington III <90398440+octavia-squidington-iii@users.noreply.github.com> Date: Sat, 22 Jan 2022 07:43:54 +0800 Subject: [PATCH 194/215] Bump Airbyte version from 0.35.6-alpha to 0.35.7-alpha (#9708) Co-authored-by: benmoriceau --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-bootloader/Dockerfile | 4 ++-- airbyte-container-orchestrator/Dockerfile | 6 +++--- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 4 ++-- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 10 +++++----- charts/airbyte/values.yaml | 10 +++++----- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 12 ++++++------ kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 12 ++++++------ 17 files changed, 42 insertions(+), 42 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 598d64116a47d..5595158a9dc05 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.35.6-alpha +current_version = 0.35.7-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index 4e4bd18d3cc47..c372b8e1b765e 100644 --- a/.env +++ b/.env @@ -10,7 +10,7 @@ ### SHARED ### -VERSION=0.35.6-alpha +VERSION=0.35.7-alpha # When using the airbyte-db via default docker image CONFIG_ROOT=/data diff --git a/airbyte-bootloader/Dockerfile b/airbyte-bootloader/Dockerfile index e481d8e8cef43..9ebbe745a1878 100644 --- a/airbyte-bootloader/Dockerfile +++ b/airbyte-bootloader/Dockerfile @@ -5,6 +5,6 @@ ENV APPLICATION airbyte-bootloader WORKDIR /app -ADD bin/${APPLICATION}-0.35.6-alpha.tar /app +ADD bin/${APPLICATION}-0.35.7-alpha.tar /app -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}"] diff --git a/airbyte-container-orchestrator/Dockerfile b/airbyte-container-orchestrator/Dockerfile index 4fba8521e0287..b7bf4ac54676f 100644 --- a/airbyte-container-orchestrator/Dockerfile +++ b/airbyte-container-orchestrator/Dockerfile @@ -26,12 +26,12 @@ RUN echo "deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] htt RUN apt-get update && apt-get install -y kubectl ENV APPLICATION airbyte-container-orchestrator -ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}" +ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}" WORKDIR /app # Move orchestrator app -ADD bin/${APPLICATION}-0.35.6-alpha.tar /app +ADD bin/${APPLICATION}-0.35.7-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index e452774f31aa7..cfa671ee239f9 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD bin/${APPLICATION}-0.35.6-alpha.tar /app +ADD bin/${APPLICATION}-0.35.7-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 0c65522957ebd..97653e2aa38f4 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD bin/${APPLICATION}-0.35.6-alpha.tar /app +ADD bin/${APPLICATION}-0.35.7-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 25d3848a54641..57b6ac718b7fe 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,12 +1,12 @@ { "name": "airbyte-webapp", - "version": "0.35.6-alpha", + "version": "0.35.7-alpha", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "airbyte-webapp", - "version": "0.35.6-alpha", + "version": "0.35.7-alpha", "dependencies": { "@fortawesome/fontawesome-svg-core": "^1.2.36", "@fortawesome/free-brands-svg-icons": "^5.15.4", diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 3a0ed25c36035..715d3c9edb798 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.35.6-alpha", + "version": "0.35.7-alpha", "private": true, "engines": { "node": ">=16.0.0" diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index ce3aff6106938..ecda6a937e4e7 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -30,7 +30,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD bin/${APPLICATION}-0.35.6-alpha.tar /app +ADD bin/${APPLICATION}-0.35.7-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.6-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index 8695d030dc10d..987d6f3186bf3 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.35.6-alpha" +appVersion: "0.35.7-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 141fe87e20b59..8db97e10a1b7f 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.6-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.7-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -55,7 +55,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.6-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.7-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -86,7 +86,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.6-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.7-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -120,7 +120,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.6-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.7-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -148,7 +148,7 @@ | ----------------------------- | -------------------------------------------------------------------- | -------------------- | | `bootloader.image.repository` | The repository to use for the airbyte bootloader image. | `airbyte/bootloader` | | `bootloader.image.pullPolicy` | the pull policy to use for the airbyte bootloader image | `IfNotPresent` | -| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.6-alpha` | +| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.7-alpha` | ### Temporal parameters diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 6d113532487d1..33518de80fad1 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -43,7 +43,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.35.6-alpha + tag: 0.35.7-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -140,7 +140,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.35.6-alpha + tag: 0.35.7-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -245,7 +245,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.35.6-alpha + tag: 0.35.7-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -357,7 +357,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.35.6-alpha + tag: 0.35.7-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## @@ -446,7 +446,7 @@ bootloader: image: repository: airbyte/bootloader pullPolicy: IfNotPresent - tag: 0.35.6-alpha + tag: 0.35.7-alpha ## @section Temporal parameters ## TODO: Move to consuming temporal from a dedicated helm chart diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 3edc0edc0e70c..3bf2ff704facf 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -101,7 +101,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.35.6-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.35.7-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index 4d6d391b63159..c964b7994200f 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.6-alpha +AIRBYTE_VERSION=0.35.7-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 0293dcff62822..87a3aaf8f7b44 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: airbyte/bootloader - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: airbyte/scheduler - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: airbyte/server - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: airbyte/webapp - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: airbyte/worker - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index 4d6d391b63159..c964b7994200f 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.6-alpha +AIRBYTE_VERSION=0.35.7-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 4b4fe415b6ee4..02e8f673497b5 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: airbyte/bootloader - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: airbyte/scheduler - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: airbyte/server - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: airbyte/webapp - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: airbyte/worker - newTag: 0.35.6-alpha + newTag: 0.35.7-alpha - name: temporalio/auto-setup newTag: 1.7.0 From 961f2f6caf6ec30a540bc3ec9b0f94cb1011b5e7 Mon Sep 17 00:00:00 2001 From: Yevhenii <34103125+yevhenii-ldv@users.noreply.github.com> Date: Sun, 23 Jan 2022 20:08:25 +0200 Subject: [PATCH 195/215] =?UTF-8?q?=F0=9F=8E=89=20Codecov:=20Implement=20C?= =?UTF-8?q?odecove=20into=20Sonar=20(#9666)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Implement Codecov into Sonar config --- .github/actions/ci-py-tests/action.yml | 80 ++++++++++--------- .../source-github/unit_tests/__init__.py | 3 + codecov.yml | 18 +++++ 3 files changed, 63 insertions(+), 38 deletions(-) create mode 100644 airbyte-integrations/connectors/source-github/unit_tests/__init__.py create mode 100644 codecov.yml diff --git a/.github/actions/ci-py-tests/action.yml b/.github/actions/ci-py-tests/action.yml index c1f50251d5579..d23e0c4465340 100644 --- a/.github/actions/ci-py-tests/action.yml +++ b/.github/actions/ci-py-tests/action.yml @@ -29,53 +29,57 @@ runs: shell: bash working-directory: ${{ inputs.module-folder }} run: | - virtualenv .venv - source .venv/bin/activate - JSON_CONFIG='{"module": "${{ inputs.module-name }}", "folder": "${{ inputs.module-folder }}", "lang": "py"}' - pip install coverage[toml]~=6.2 - mkdir -p .venv/source-acceptance-test - mkdir -p reports - SAT_DIR=$(git rev-parse --show-toplevel)/airbyte-integrations/bases/source-acceptance-test - PYPROJECT_CONFIG=$(git rev-parse --show-toplevel)/pyproject.toml - git ls-tree -r HEAD --name-only $SAT_DIR | while read src; do cp -f $src .venv/source-acceptance-test; done - pip install build - python -m build .venv/source-acceptance-test - pip install .venv/source-acceptance-test/dist/source_acceptance_test-*.whl - [ -f requirements.txt ] && pip install --quiet -r requirements.txt - pip install .[tests] - coverage run --rcfile=${PYPROJECT_CONFIG} -m pytest ./unit_tests || true - coverage xml --rcfile=${PYPROJECT_CONFIG} -o reports/coverage.xml || true + virtualenv .venv + source .venv/bin/activate + JSON_CONFIG='{"module": "${{ inputs.module-name }}", "folder": "${{ inputs.module-folder }}", "lang": "py"}' + pip install coverage[toml]~=6.2 + mkdir -p .venv/source-acceptance-test + mkdir -p reports + SAT_DIR=$(git rev-parse --show-toplevel)/airbyte-integrations/bases/source-acceptance-test + PYPROJECT_CONFIG=$(git rev-parse --show-toplevel)/pyproject.toml + git ls-tree -r HEAD --name-only $SAT_DIR | while read src; do cp -f $src .venv/source-acceptance-test; done + pip install build + python -m build .venv/source-acceptance-test + pip install .venv/source-acceptance-test/dist/source_acceptance_test-*.whl + [ -f requirements.txt ] && pip install --quiet -r requirements.txt + pip install .[tests] + coverage run --rcfile=${PYPROJECT_CONFIG} -m pytest ./unit_tests || true + coverage xml --rcfile=${PYPROJECT_CONFIG} -o reports/coverage.xml || true - rm -rf .venv - echo "::set-output name=coverage-paths::reports/coverage.xml" + rm -rf .venv + echo "::set-output name=coverage-paths::reports/coverage.xml" + + - name: Upload coverage to Codecov + if: ${{ always() }} + uses: codecov/codecov-action@v2 - name: Build Linter Reports id: build-linter-reports shell: bash working-directory: ${{ inputs.module-folder }} run: | - JSON_CONFIG='{"module": "${{ inputs.module-name }}", "folder": "${{ inputs.module-folder }}", "lang": "py"}' - REPORT_FOLDER=reports - PYPROJECT_CONFIG=$(git rev-parse --show-toplevel)/pyproject.toml + JSON_CONFIG='{"module": "${{ inputs.module-name }}", "folder": "${{ inputs.module-folder }}", "lang": "py"}' + REPORT_FOLDER=reports + PYPROJECT_CONFIG=$(git rev-parse --show-toplevel)/pyproject.toml - # run mypy - pip install lxml~=4.7 mypy~=0.910 . - mypy . --config-file=${PYPROJECT_CONFIG} | tee reports/mypy.log || true + # run mypy + pip install lxml~=4.7 mypy~=0.910 . + mypy . --config-file=${PYPROJECT_CONFIG} | tee reports/mypy.log || true - # run black - pip install black~=21.12b0 - XDG_CACHE_HOME=/dev/null black --config ${PYPROJECT_CONFIG} --diff . | tee reports/black.diff + # run black + pip install black~=21.12b0 + XDG_CACHE_HOME=/dev/null black --config ${PYPROJECT_CONFIG} --diff . | tee reports/black.diff - # run isort - pip install isort~=5.10.1 - cp ${PYPROJECT_CONFIG} ./pyproject.toml - isort --diff . | tee reports/isort.diff + # run isort + pip install isort~=5.10.1 + cp ${PYPROJECT_CONFIG} ./pyproject.toml + isort --diff . | tee reports/isort.diff - # run flake8 - pip install mccabe~=0.6.1 pyproject-flake8~=0.0.1a2 - pflake8 --exit-zero . | grep ^. | tee reports/flake.txt + # run flake8 + pip install mccabe~=0.6.1 pyproject-flake8~=0.0.1a2 + pflake8 --exit-zero . | grep ^. | tee reports/flake.txt - echo "::set-output name=mypy-logs::reports/mypy.log" - echo "::set-output name=black-diff::reports/black.diff" - echo "::set-output name=isort-diff::reports/isort.diff" - echo "::set-output name=flake8-logs::reports/flake.txt" + echo "::set-output name=mypy-logs::reports/mypy.log" + echo "::set-output name=black-diff::reports/black.diff" + echo "::set-output name=isort-diff::reports/isort.diff" + echo "::set-output name=flake8-logs::reports/flake.txt" diff --git a/airbyte-integrations/connectors/source-github/unit_tests/__init__.py b/airbyte-integrations/connectors/source-github/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-github/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000000000..9ab8ce596340d --- /dev/null +++ b/codecov.yml @@ -0,0 +1,18 @@ +codecov: + notify: + require_ci_to_pass: no + +coverage: + status: + patch: + default: + target: 100% + if_no_uploads: error + if_not_found: failure + if_ci_failed: failure + project: + default: + target: 100% + if_no_uploads: error + if_not_found: failure + if_ci_failed: failure From 05c84f5f215a18805617b3ff9c68e91ba036bd2b Mon Sep 17 00:00:00 2001 From: Artem Astapenko <3767150+Jamakase@users.noreply.github.com> Date: Mon, 24 Jan 2022 00:24:39 +0300 Subject: [PATCH 196/215] Improve jobs logs error handling (#9585) * Improve jobs logs error handling * minor naming fix --- .../CreateConnectionContent.tsx | 10 +- .../src/components/JobItem/JobItem.tsx | 57 +++++-- .../src/components/JobItem/JobsLogItem.tsx | 61 ------- .../JobItem/components/AttemptDetails.tsx | 2 +- .../JobItem/components/CenteredDetails.tsx | 12 -- .../JobItem/components/DownloadButton.tsx | 9 +- .../JobItem/components/JobCurrenLogs.tsx | 71 -------- .../components/JobItem/components/JobLogs.tsx | 48 +++--- .../components/JobItem/components/Logs.tsx | 4 +- .../JobItem/components/LogsDetails.tsx | 36 +++++ .../JobItem/components/MainInfo.tsx | 39 +++-- .../components/JobItem/components/Tabs.tsx | 2 +- .../src/components/JobItem/index.tsx | 2 - .../src/components/StepsMenu/index.tsx | 1 + airbyte-webapp/src/core/domain/catalog/api.ts | 2 +- airbyte-webapp/src/core/domain/job/Job.ts | 44 +++++ .../src/core/domain/job/JobsService.tsx | 37 +++++ airbyte-webapp/src/core/domain/job/index.ts | 2 + .../src/core/request/LogsRequestError.ts | 10 +- airbyte-webapp/src/core/resources/Job.ts | 151 ------------------ .../src/core/resources/Scheduler.ts | 6 +- airbyte-webapp/src/core/resources/Schema.ts | 4 +- .../src/core/resources/Workspace.ts | 11 -- airbyte-webapp/src/hooks/services/useJob.tsx | 26 --- .../src/hooks/services/useSchemaHook.tsx | 2 +- .../{workspaces => }/useInitService.tsx | 0 .../services/workspaces/WorkspacesService.tsx | 2 +- .../components/JobsList.tsx | 13 +- .../components/StatusView.tsx | 11 +- .../CreateDestinationPage.tsx | 2 +- .../components/DestinationSettings.tsx | 29 ++-- .../pages/OnboardingPage/OnboardingPage.tsx | 2 +- .../CreateSourcePage/CreateSourcePage.tsx | 2 +- .../components/SourceSettings.tsx | 21 ++- .../src/services/job/JobService.tsx | 70 ++++++++ .../Connector/ConnectorCard/ConnectorCard.tsx | 12 +- 36 files changed, 339 insertions(+), 474 deletions(-) delete mode 100644 airbyte-webapp/src/components/JobItem/JobsLogItem.tsx delete mode 100644 airbyte-webapp/src/components/JobItem/components/CenteredDetails.tsx delete mode 100644 airbyte-webapp/src/components/JobItem/components/JobCurrenLogs.tsx create mode 100644 airbyte-webapp/src/components/JobItem/components/LogsDetails.tsx create mode 100644 airbyte-webapp/src/core/domain/job/Job.ts create mode 100644 airbyte-webapp/src/core/domain/job/JobsService.tsx create mode 100644 airbyte-webapp/src/core/domain/job/index.ts delete mode 100644 airbyte-webapp/src/core/resources/Job.ts delete mode 100644 airbyte-webapp/src/hooks/services/useJob.tsx rename airbyte-webapp/src/packages/cloud/services/{workspaces => }/useInitService.tsx (100%) create mode 100644 airbyte-webapp/src/services/job/JobService.tsx diff --git a/airbyte-webapp/src/components/CreateConnectionContent/CreateConnectionContent.tsx b/airbyte-webapp/src/components/CreateConnectionContent/CreateConnectionContent.tsx index 3592a646dbfa3..5397ba69835a6 100644 --- a/airbyte-webapp/src/components/CreateConnectionContent/CreateConnectionContent.tsx +++ b/airbyte-webapp/src/components/CreateConnectionContent/CreateConnectionContent.tsx @@ -5,10 +5,9 @@ import { faRedoAlt } from "@fortawesome/free-solid-svg-icons"; import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; import { useResource } from "rest-hooks"; -import { Button } from "components"; +import { Button, ContentCard } from "components"; import LoadingSchema from "components/LoadingSchema"; -import ContentCard from "components/ContentCard"; -import { JobsLogItem } from "components/JobItem"; +import JobItem from "components/JobItem"; import ConnectionForm from "views/Connection/ConnectionForm"; import TryAfterErrorBlock from "./components/TryAfterErrorBlock"; @@ -94,6 +93,7 @@ const CreateConnectionContent: React.FC = ({ } if (schemaErrorStatus) { + const jobInfo = LogsRequestError.extractJobInfo(schemaErrorStatus); return ( = ({ onClick={onDiscoverSchema} additionControl={{additionBottomControls}} /> - + {jobInfo && } ); } diff --git a/airbyte-webapp/src/components/JobItem/JobItem.tsx b/airbyte-webapp/src/components/JobItem/JobItem.tsx index f3eaec213f59f..6cd919d8b0570 100644 --- a/airbyte-webapp/src/components/JobItem/JobItem.tsx +++ b/airbyte-webapp/src/components/JobItem/JobItem.tsx @@ -1,17 +1,15 @@ import React, { Suspense, useState } from "react"; import styled from "styled-components"; -import { JobItem as JobApiItem, Attempt } from "core/resources/Job"; -import Spinner from "../Spinner"; +import { Spinner } from "components"; + +import { JobInfo, JobListItem, Logs } from "core/domain/job/Job"; +import Status from "core/statuses"; + import JobLogs from "./components/JobLogs"; import ContentWrapper from "./components/ContentWrapper"; import MainInfo from "./components/MainInfo"; -import Status from "core/statuses"; - -type IProps = { - job: JobApiItem; - attempts: Attempt[]; -}; +import { LogsDetails } from "./components/LogsDetails"; const Item = styled.div<{ isFailed: boolean }>` border-bottom: 1px solid ${({ theme }) => theme.greyColor20}; @@ -31,19 +29,42 @@ const LoadLogs = styled.div` min-height: 58px; `; -const JobItem: React.FC = ({ job, attempts }) => { +const isJobEntity = ( + props: { job: JobListItem } | { jobInfo: JobInfo } +): props is { job: JobListItem } => { + return props.hasOwnProperty("job"); +}; + +const JobCurrentLogs: React.FC<{ + id: number | string; + jobIsFailed?: boolean; + logs?: Logs; +}> = (props) => { + const path = ["/tmp/workspace", props.id, "logs.log"].join("/"); + + return ; +}; + +type IProps = { + shortInfo?: boolean; +} & ({ job: JobListItem } | { jobInfo: JobInfo }); + +const JobItem: React.FC = ({ shortInfo, ...props }) => { const [isOpen, setIsOpen] = useState(false); const onExpand = () => setIsOpen(!isOpen); - const isFailed = job.status === Status.FAILED; + + const jobMeta = isJobEntity(props) ? props.job.job : props.jobInfo; + const isFailed = jobMeta.status === Status.FAILED; return (
    @@ -54,7 +75,17 @@ const JobItem: React.FC = ({ job, attempts }) => { } > - {isOpen && } + {isOpen ? ( + isJobEntity(props) ? ( + + ) : ( + + ) + ) : null}
    diff --git a/airbyte-webapp/src/components/JobItem/JobsLogItem.tsx b/airbyte-webapp/src/components/JobItem/JobsLogItem.tsx deleted file mode 100644 index 75cb49b58ba3f..0000000000000 --- a/airbyte-webapp/src/components/JobItem/JobsLogItem.tsx +++ /dev/null @@ -1,61 +0,0 @@ -import React, { useState } from "react"; -import styled from "styled-components"; - -import { JobInfo } from "core/resources/Scheduler"; -import ContentWrapper from "./components/ContentWrapper"; -import MainInfo from "./components/MainInfo"; -import JobCurrentLogs from "./components/JobCurrenLogs"; -import Status from "core/statuses"; - -type IProps = { - jobInfo?: JobInfo; -}; - -const Item = styled.div<{ isFailed: boolean }>` - border-top: 1px solid ${({ theme }) => theme.greyColor20}; - font-size: 15px; - line-height: 18px; - - &:hover { - background: ${({ theme, isFailed }) => - isFailed ? theme.dangerTransparentColor : theme.greyColor0}; - } -`; - -const JobsLogItem: React.FC = ({ jobInfo }) => { - const [isOpen, setIsOpen] = useState(false); - - if (!jobInfo) { - return null; - } - - const onExpand = () => setIsOpen(!isOpen); - const isFailed = jobInfo.status === Status.FAILED; - - return ( - - - -
    - {isOpen && ( - - )} -
    -
    -
    - ); -}; - -export default JobsLogItem; diff --git a/airbyte-webapp/src/components/JobItem/components/AttemptDetails.tsx b/airbyte-webapp/src/components/JobItem/components/AttemptDetails.tsx index 375f1f5bd0f58..36ce8c57d6425 100644 --- a/airbyte-webapp/src/components/JobItem/components/AttemptDetails.tsx +++ b/airbyte-webapp/src/components/JobItem/components/AttemptDetails.tsx @@ -3,7 +3,7 @@ import { FormattedMessage } from "react-intl"; import styled from "styled-components"; import dayjs from "dayjs"; -import { Attempt } from "core/resources/Job"; +import { Attempt } from "core/domain/job/Job"; import Status from "core/statuses"; type IProps = { diff --git a/airbyte-webapp/src/components/JobItem/components/CenteredDetails.tsx b/airbyte-webapp/src/components/JobItem/components/CenteredDetails.tsx deleted file mode 100644 index 7edc478eb023a..0000000000000 --- a/airbyte-webapp/src/components/JobItem/components/CenteredDetails.tsx +++ /dev/null @@ -1,12 +0,0 @@ -import styled from "styled-components"; - -const CenteredDetails = styled.div` - text-align: center; - padding-top: 9px; - font-size: 12px; - line-height: 28px; - color: ${({ theme }) => theme.greyColor40}; - position: relative; -`; - -export default CenteredDetails; diff --git a/airbyte-webapp/src/components/JobItem/components/DownloadButton.tsx b/airbyte-webapp/src/components/JobItem/components/DownloadButton.tsx index 622fcabb346fb..8cb97ee1f8e37 100644 --- a/airbyte-webapp/src/components/JobItem/components/DownloadButton.tsx +++ b/airbyte-webapp/src/components/JobItem/components/DownloadButton.tsx @@ -6,10 +6,6 @@ import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; import { faFileDownload } from "@fortawesome/free-solid-svg-icons"; import { Button } from "components"; -type IProps = { - logs: string[]; - fileName: string; -}; const Download = styled(Button)` position: absolute; @@ -17,6 +13,11 @@ const Download = styled(Button)` right: 11px; `; +type IProps = { + logs: string[]; + fileName: string; +}; + const DownloadButton: React.FC = ({ logs, fileName }) => { const formatMessage = useIntl().formatMessage; diff --git a/airbyte-webapp/src/components/JobItem/components/JobCurrenLogs.tsx b/airbyte-webapp/src/components/JobItem/components/JobCurrenLogs.tsx deleted file mode 100644 index df9dc068c0c55..0000000000000 --- a/airbyte-webapp/src/components/JobItem/components/JobCurrenLogs.tsx +++ /dev/null @@ -1,71 +0,0 @@ -import React, { useState } from "react"; -import { FormattedMessage } from "react-intl"; - -import { Attempt } from "core/resources/Job"; -import AttemptDetails from "./AttemptDetails"; -import DownloadButton from "./DownloadButton"; -import Logs from "./Logs"; -import Tabs from "./Tabs"; -import CenteredDetails from "./CenteredDetails"; - -type IProps = { - id: number | string; - jobIsFailed?: boolean; - attempts: { - attempt: Attempt; - logs: { logLines: string[] }; - }[]; - logs?: { logLines: string[] }; -}; - -const JobCurrentLogs: React.FC = ({ - id, - jobIsFailed, - attempts, - logs, -}) => { - const [attemptNumber, setAttemptNumber] = useState( - attempts.length ? attempts.length - 1 : 0 - ); - - const data = attempts?.map((item, index) => ({ - id: index.toString(), - status: item.attempt?.status, - name: ( - - ), - })); - - const logsText = attempts.length ? attempts[attemptNumber].logs : logs; - const attemptId = attempts.length ? attempts[attemptNumber].attempt.id : ""; - - return ( - <> - {attempts.length > 1 ? ( - setAttemptNumber(parseInt(at))} - data={data} - isFailed={jobIsFailed} - /> - ) : null} - - {attempts.length > 1 && ( - - )} -
    {`/tmp/workspace/${id}/${attemptId}/logs.log`}
    - - -
    - - - ); -}; - -export default JobCurrentLogs; diff --git a/airbyte-webapp/src/components/JobItem/components/JobLogs.tsx b/airbyte-webapp/src/components/JobItem/components/JobLogs.tsx index d36d4e403bf08..c14c4b30e543b 100644 --- a/airbyte-webapp/src/components/JobItem/components/JobLogs.tsx +++ b/airbyte-webapp/src/components/JobItem/components/JobLogs.tsx @@ -1,14 +1,12 @@ import React, { useState } from "react"; -import { useResource, useSubscription } from "rest-hooks"; import { FormattedMessage } from "react-intl"; -import JobResource from "core/resources/Job"; -import AttemptDetails from "./AttemptDetails"; -import DownloadButton from "./DownloadButton"; +import Status from "core/statuses"; +import { useGetJob } from "services/job/JobService"; + import Logs from "./Logs"; import Tabs from "./Tabs"; -import CenteredDetails from "./CenteredDetails"; -import Status from "core/statuses"; +import { LogsDetails } from "./LogsDetails"; type IProps = { id: number | string; @@ -16,8 +14,7 @@ type IProps = { }; const JobLogs: React.FC = ({ id, jobIsFailed }) => { - const job = useResource(JobResource.detailShape(), { id }); - useSubscription(JobResource.detailShape(), { id }); + const job = useGetJob(id); const [attemptNumber, setAttemptNumber] = useState( job.attempts.length ? job.attempts.length - 1 : 0 @@ -27,11 +24,16 @@ const JobLogs: React.FC = ({ id, jobIsFailed }) => { return ; } - const data = job.attempts.map((item, index) => ({ + const currentAttempt = job.attempts[attemptNumber].attempt; + const logs = job.attempts[attemptNumber]?.logs; + const path = ["/tmp/workspace", id, currentAttempt.id, "logs.log"].join("/"); + + const attemptsTabs = job.attempts.map((item, index) => ({ id: index.toString(), status: - item.status === Status.FAILED || item.status === Status.SUCCEEDED - ? item.status + item.attempt.status === Status.FAILED || + item.attempt.status === Status.SUCCEEDED + ? item.attempt.status : undefined, name: ( = ({ id, jobIsFailed }) => { ), })); - const hasLogs = !!job.logsByAttempt[attemptNumber]?.logLines?.length; return ( <> {job.attempts.length > 1 ? ( setAttemptNumber(parseInt(at))} - data={data} + data={attemptsTabs} isFailed={jobIsFailed} /> ) : null} - - {job.attempts.length > 1 && ( - - )} -
    {`/tmp/workspace/${id}/${job.attempts[attemptNumber].id}/logs.log.`}
    - {hasLogs ? ( - - ) : null} -
    - 1 ? currentAttempt : null} + logs={logs} /> ); diff --git a/airbyte-webapp/src/components/JobItem/components/Logs.tsx b/airbyte-webapp/src/components/JobItem/components/Logs.tsx index aeff169ef2a06..98c7e73a90cfd 100644 --- a/airbyte-webapp/src/components/JobItem/components/Logs.tsx +++ b/airbyte-webapp/src/components/JobItem/components/Logs.tsx @@ -37,7 +37,9 @@ type LogsProps = { }; const Logs: React.FC = ({ logsArray }) => { - const logsJoin = logsArray && logsArray.length ? logsArray.join("\n") : ""; + const logsJoin = logsArray?.length + ? logsArray.join("\n") + : "No logs available"; return ( diff --git a/airbyte-webapp/src/components/JobItem/components/LogsDetails.tsx b/airbyte-webapp/src/components/JobItem/components/LogsDetails.tsx new file mode 100644 index 0000000000000..b7d12b9e21c44 --- /dev/null +++ b/airbyte-webapp/src/components/JobItem/components/LogsDetails.tsx @@ -0,0 +1,36 @@ +import React from "react"; + +import { Attempt, Logs } from "core/domain/job"; +import DownloadButton from "./DownloadButton"; +import LogsTable from "./Logs"; +import AttemptDetails from "./AttemptDetails"; +import styled from "styled-components"; + +const CenteredDetails = styled.div` + text-align: center; + padding-top: 9px; + font-size: 12px; + line-height: 28px; + color: ${({ theme }) => theme.greyColor40}; + position: relative; +`; + +const LogsDetails: React.FC<{ + id: number | string; + path: string; + currentAttempt?: Attempt | null; + logs?: Logs; +}> = ({ path, logs, id, currentAttempt }) => ( + <> + {currentAttempt && } + +
    {path}
    + {logs?.logLines && ( + + )} +
    + + +); + +export { LogsDetails }; diff --git a/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx b/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx index dddc16619fe7e..c56d67be4031a 100644 --- a/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx +++ b/airbyte-webapp/src/components/JobItem/components/MainInfo.tsx @@ -1,29 +1,19 @@ import React from "react"; import { - FormattedMessage, FormattedDateParts, + FormattedMessage, FormattedTimeParts, } from "react-intl"; import styled from "styled-components"; import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; import { faAngleDown } from "@fortawesome/free-solid-svg-icons"; -import { JobItem as JobApiItem, Attempt } from "core/resources/Job"; -import { JobInfo } from "core/resources/Scheduler"; -import { Row, Cell } from "components/SimpleTableComponents"; +import { Attempt, JobInfo, JobMeta as JobApiItem } from "core/domain/job/Job"; +import { Cell, Row } from "components/SimpleTableComponents"; import { Button, StatusIcon } from "components"; import AttemptDetails from "./AttemptDetails"; import Status from "core/statuses"; -import useJob from "hooks/services/useJob"; - -type IProps = { - job: JobApiItem | JobInfo; - attempts: Attempt[]; - isOpen?: boolean; - onExpand: () => void; - isFailed?: boolean; - shortInfo?: boolean; -}; +import { useCancelJob } from "../../../services/job/JobService"; const MainView = styled(Row)<{ isOpen?: boolean; @@ -92,19 +82,28 @@ const Arrow = styled.div<{ } `; +type IProps = { + job: JobApiItem | JobInfo; + attempts?: Attempt[]; + isOpen?: boolean; + onExpand: () => void; + isFailed?: boolean; + shortInfo?: boolean; +}; + const MainInfo: React.FC = ({ job, - attempts, + attempts = [], isOpen, onExpand, isFailed, shortInfo, }) => { - const { cancelJob } = useJob(); + const cancelJob = useCancelJob(); - const onCancelJob = (event: React.SyntheticEvent) => { + const onCancelJob = async (event: React.SyntheticEvent) => { event.stopPropagation(); - cancelJob(job.id); + return cancelJob(job.id); }; const isNotCompleted = @@ -148,14 +147,14 @@ const MainInfo: React.FC = ({ > {(parts) => {`${parts[0].value}/${parts[2].value}`}} - {attempts.length > 1 ? ( + {attempts.length > 1 && ( - ) : null} + )} diff --git a/airbyte-webapp/src/components/JobItem/components/Tabs.tsx b/airbyte-webapp/src/components/JobItem/components/Tabs.tsx index 9103ce4ec3c66..5a3fa29be46ee 100644 --- a/airbyte-webapp/src/components/JobItem/components/Tabs.tsx +++ b/airbyte-webapp/src/components/JobItem/components/Tabs.tsx @@ -1,7 +1,7 @@ import React from "react"; import styled from "styled-components"; -import StepsMenu from "../../StepsMenu"; +import { StepsMenu } from "components/StepsMenu"; type IProps = { isFailed?: boolean; diff --git a/airbyte-webapp/src/components/JobItem/index.tsx b/airbyte-webapp/src/components/JobItem/index.tsx index d2de15e363ec8..f1596531774d1 100644 --- a/airbyte-webapp/src/components/JobItem/index.tsx +++ b/airbyte-webapp/src/components/JobItem/index.tsx @@ -1,5 +1,3 @@ import JobItem from "./JobItem"; -import JobsLogItem from "./JobsLogItem"; export default JobItem; -export { JobsLogItem }; diff --git a/airbyte-webapp/src/components/StepsMenu/index.tsx b/airbyte-webapp/src/components/StepsMenu/index.tsx index 3671f4334d498..c343cc40cfcaa 100644 --- a/airbyte-webapp/src/components/StepsMenu/index.tsx +++ b/airbyte-webapp/src/components/StepsMenu/index.tsx @@ -1,3 +1,4 @@ import StepsMenu from "./StepsMenu"; +export { StepsMenu }; export default StepsMenu; diff --git a/airbyte-webapp/src/core/domain/catalog/api.ts b/airbyte-webapp/src/core/domain/catalog/api.ts index a8e1344fd94ed..7dfdafaf357d2 100644 --- a/airbyte-webapp/src/core/domain/catalog/api.ts +++ b/airbyte-webapp/src/core/domain/catalog/api.ts @@ -1,5 +1,5 @@ import { JSONSchema7 } from "json-schema"; -import { JobInfo } from "core/resources/Scheduler"; +import { JobInfo } from "../job/Job"; export interface SourceDiscoverSchemaRead { catalog: SyncSchema; diff --git a/airbyte-webapp/src/core/domain/job/Job.ts b/airbyte-webapp/src/core/domain/job/Job.ts new file mode 100644 index 0000000000000..593b915f08b00 --- /dev/null +++ b/airbyte-webapp/src/core/domain/job/Job.ts @@ -0,0 +1,44 @@ +import Status from "core/statuses"; + +export interface JobMeta { + id: number | string; + configType: string; + configId: string; + createdAt: number; + startedAt: number; + updatedAt: number; + status: Status | null; +} + +export interface Logs { + logLines: string[]; +} + +export interface Attempt { + id: number; + status: string; + createdAt: number; + updatedAt: number; + endedAt: number; + bytesSynced: number; + recordsSynced: number; +} + +export interface AttemptInfo { + attempt: Attempt; + logs: Logs; +} + +export interface JobInfo extends JobMeta { + logs: Logs; +} + +export interface JobDetails { + job: JobMeta; + attempts: AttemptInfo[]; +} + +export interface JobListItem { + job: JobMeta; + attempts: Attempt[]; +} diff --git a/airbyte-webapp/src/core/domain/job/JobsService.tsx b/airbyte-webapp/src/core/domain/job/JobsService.tsx new file mode 100644 index 0000000000000..ae796c37fce0d --- /dev/null +++ b/airbyte-webapp/src/core/domain/job/JobsService.tsx @@ -0,0 +1,37 @@ +import { AirbyteRequestService } from "core/request/AirbyteRequestService"; + +type ListParams = { + configId: string; + configTypes: string[]; +}; + +class JobsService extends AirbyteRequestService { + get url(): string { + return "jobs"; + } + + public async list(listParams: ListParams): Promise { + const jobs = await this.fetch(`${this.url}/list`, listParams); + + return jobs; + } + + public async get(jobId: string | number): Promise { + const job = await this.fetch(`${this.url}/get`, { + id: jobId, + }); + + return job; + } + + public async cancel(jobId: string | number): Promise { + const job = await this.fetch(`${this.url}/cancel`, { + id: jobId, + }); + + return job; + } +} + +export { JobsService }; +export type { ListParams }; diff --git a/airbyte-webapp/src/core/domain/job/index.ts b/airbyte-webapp/src/core/domain/job/index.ts new file mode 100644 index 0000000000000..5a312af26f12d --- /dev/null +++ b/airbyte-webapp/src/core/domain/job/index.ts @@ -0,0 +1,2 @@ +export * from "./Job"; +export * from "./JobsService"; diff --git a/airbyte-webapp/src/core/request/LogsRequestError.ts b/airbyte-webapp/src/core/request/LogsRequestError.ts index 01134b0471373..d6fbfec65b206 100644 --- a/airbyte-webapp/src/core/request/LogsRequestError.ts +++ b/airbyte-webapp/src/core/request/LogsRequestError.ts @@ -1,18 +1,20 @@ +import { JobInfo } from "core/domain/job/Job"; + import { CommonRequestError } from "./CommonRequestError"; export class LogsRequestError extends CommonRequestError { __type = "common.errorWithLogs"; - jobInfo: any; + jobInfo: JobInfo; - constructor(jobInfo: any, response: Response, msg?: string) { + constructor(jobInfo: JobInfo, response: Response, msg?: string) { super(response, msg); this.jobInfo = jobInfo; this._status = 400; } - static extractJobInfo(error: any): any { + static extractJobInfo(error: unknown): JobInfo | null { if (!error) { - return false; + return null; } return isLogsRequestError(error) ? error.jobInfo : null; } diff --git a/airbyte-webapp/src/core/resources/Job.ts b/airbyte-webapp/src/core/resources/Job.ts deleted file mode 100644 index 791485ce1ab5e..0000000000000 --- a/airbyte-webapp/src/core/resources/Job.ts +++ /dev/null @@ -1,151 +0,0 @@ -import { - FetchOptions, - MutateShape, - ReadShape, - Resource, - SchemaDetail, -} from "rest-hooks"; -import BaseResource from "./BaseResource"; -import Status from "../statuses"; - -export interface JobItem { - id: number | string; - configType: string; - configId: string; - createdAt: number; - startedAt: number; - updatedAt: number; - status: Status | null; -} - -export interface Logs { - logLines: string[]; -} - -export interface Attempt { - id: number; - status: string; - createdAt: number; - updatedAt: number; - endedAt: number; - bytesSynced: number; - recordsSynced: number; -} - -export interface Job { - job: JobItem; - logsByAttempt: { [key: string]: Logs }; - attempts: Attempt[]; -} - -export default class JobResource extends BaseResource implements Job { - readonly job: JobItem = { - id: 0, - configType: "", - configId: "", - createdAt: 0, - startedAt: 0, - updatedAt: 0, - status: null, - }; - readonly attempts: Attempt[] = []; - readonly logsByAttempt: { [key: string]: Logs } = {}; - - pk(): string { - return this.job?.id?.toString(); - } - - static urlRoot = "jobs"; - - static getFetchOptions(): FetchOptions { - return { - pollFrequency: 2500, // every 2,5 seconds - }; - } - - static listShape( - this: T - ): ReadShape> { - return { - ...super.listShape(), - fetch: async ( - params: Readonly> - ): Promise<{ jobs: Job[] }> => { - const jobsResult = await this.fetch( - "post", - `${this.listUrl(params)}/list`, - { ...params } - ); - - return { - jobs: jobsResult.jobs, - }; - }, - schema: { jobs: [this] }, - }; - } - - static detailShape( - this: T - ): ReadShape> { - return { - ...super.detailShape(), - fetch: async ( - params: Readonly> - ): Promise => { - const jobResult: { - job: JobItem; - attempts: { attempt: Attempt; logs: Logs }[]; - } = await this.fetch("post", `${this.url(params)}/get`, params); - - const attemptsValue = jobResult.attempts.map( - (attemptItem) => attemptItem.attempt - ); - - return { - job: jobResult.job, - attempts: attemptsValue, - logsByAttempt: Object.fromEntries( - jobResult.attempts.map((attemptItem) => [ - attemptItem.attempt.id, - attemptItem.logs, - ]) - ), - }; - }, - schema: this, - }; - } - - static cancelShape( - this: T - ): MutateShape> { - return { - ...super.partialUpdateShape(), - fetch: async ( - params: Readonly> - ): Promise => { - const jobResult: { - job: JobItem; - attempts: { attempt: Attempt; logs: Logs }[]; - } = await this.fetch("post", `${this.url(params)}/cancel`, params); - - const attemptsValue = jobResult.attempts.map( - (attemptItem) => attemptItem.attempt - ); - - return { - job: jobResult.job, - attempts: attemptsValue, - logsByAttempt: Object.fromEntries( - jobResult.attempts.map((attemptItem) => [ - attemptItem.attempt.id, - attemptItem.logs, - ]) - ), - }; - }, - schema: this, - }; - } -} diff --git a/airbyte-webapp/src/core/resources/Scheduler.ts b/airbyte-webapp/src/core/resources/Scheduler.ts index 6529f9af258dc..505493aff841e 100644 --- a/airbyte-webapp/src/core/resources/Scheduler.ts +++ b/airbyte-webapp/src/core/resources/Scheduler.ts @@ -3,13 +3,9 @@ import { ReadShape, Resource, SchemaDetail } from "rest-hooks"; import BaseResource from "./BaseResource"; import Status from "core/statuses"; import { ConnectionSpecification } from "core/domain/connection"; -import { JobItem, Logs } from "core/resources/Job"; +import { JobInfo } from "core/domain/job/Job"; import { LogsRequestError } from "core/request/LogsRequestError"; -export type JobInfo = JobItem & { - logs: Logs; -}; - export interface Scheduler { status: string; message: string; diff --git a/airbyte-webapp/src/core/resources/Schema.ts b/airbyte-webapp/src/core/resources/Schema.ts index 253aa9388a5ce..d6da8bfaf2d45 100644 --- a/airbyte-webapp/src/core/resources/Schema.ts +++ b/airbyte-webapp/src/core/resources/Schema.ts @@ -1,9 +1,9 @@ import { ReadShape, Resource, SchemaDetail } from "rest-hooks"; import BaseResource from "./BaseResource"; -import { JobInfo } from "./Scheduler"; import { SourceDiscoverSchemaRead, SyncSchema } from "core/domain/catalog"; import { toInnerModel } from "core/domain/catalog/fieldUtil"; +import { JobInfo } from "../domain/job/Job"; export interface Schema extends SourceDiscoverSchemaRead { id: string; @@ -25,8 +25,6 @@ export default class SchemaResource extends BaseResource implements Schema { ): ReadShape> { return { ...super.detailShape(), - getFetchKey: (params: { sourceId: string }) => - `POST /sources/discover_schema` + JSON.stringify(params), fetch: async (params: { sourceId: string }): Promise => { const response = await this.fetch( "post", diff --git a/airbyte-webapp/src/core/resources/Workspace.ts b/airbyte-webapp/src/core/resources/Workspace.ts index e0d771eaa5425..39b2b9231e3be 100644 --- a/airbyte-webapp/src/core/resources/Workspace.ts +++ b/airbyte-webapp/src/core/resources/Workspace.ts @@ -42,17 +42,6 @@ export default class WorkspaceResource }; } - static getBySlug( - this: T - ): ReadShape> { - return { - ...super.detailShape(), - schema: this, - fetch: async (body: Readonly<{ slug: string }>): Promise => - await this.fetch("post", `${this.url(body)}/get_by_slug`, body), - }; - } - static updateShape( this: T ): MutateShape> { diff --git a/airbyte-webapp/src/hooks/services/useJob.tsx b/airbyte-webapp/src/hooks/services/useJob.tsx deleted file mode 100644 index c25b06570c8bb..0000000000000 --- a/airbyte-webapp/src/hooks/services/useJob.tsx +++ /dev/null @@ -1,26 +0,0 @@ -import { useFetcher } from "rest-hooks"; - -import JobResource, { Job } from "core/resources/Job"; - -type JobService = { - cancelJob: (jobId: number | string) => Promise; -}; - -const useJob = (): JobService => { - const cancelJobRequest = useFetcher(JobResource.cancelShape()); - - const cancelJob = async (jobId: number | string) => { - return await cancelJobRequest( - { - id: jobId, - }, - {} - ); - }; - - return { - cancelJob, - }; -}; - -export default useJob; diff --git a/airbyte-webapp/src/hooks/services/useSchemaHook.tsx b/airbyte-webapp/src/hooks/services/useSchemaHook.tsx index 20ddae6dd39ca..6549df577a41a 100644 --- a/airbyte-webapp/src/hooks/services/useSchemaHook.tsx +++ b/airbyte-webapp/src/hooks/services/useSchemaHook.tsx @@ -3,7 +3,7 @@ import { useFetcher } from "rest-hooks"; import { SyncSchema } from "core/domain/catalog"; import SchemaResource from "core/resources/Schema"; -import { JobInfo } from "core/resources/Scheduler"; +import { JobInfo } from "../../core/domain/job/Job"; export const useDiscoverSchema = ( sourceId?: string diff --git a/airbyte-webapp/src/packages/cloud/services/workspaces/useInitService.tsx b/airbyte-webapp/src/packages/cloud/services/useInitService.tsx similarity index 100% rename from airbyte-webapp/src/packages/cloud/services/workspaces/useInitService.tsx rename to airbyte-webapp/src/packages/cloud/services/useInitService.tsx diff --git a/airbyte-webapp/src/packages/cloud/services/workspaces/WorkspacesService.tsx b/airbyte-webapp/src/packages/cloud/services/workspaces/WorkspacesService.tsx index b3091891eb40e..3fa6a2b899451 100644 --- a/airbyte-webapp/src/packages/cloud/services/workspaces/WorkspacesService.tsx +++ b/airbyte-webapp/src/packages/cloud/services/workspaces/WorkspacesService.tsx @@ -14,7 +14,7 @@ import { CloudWorkspacesService } from "packages/cloud/lib/domain/cloudWorkspace import { useCurrentUser } from "packages/cloud/services/auth/AuthService"; import { useConfig } from "packages/cloud/services/config"; import { useDefaultRequestMiddlewares } from "packages/cloud/services/useDefaultRequestMiddlewares"; -import { useInitService } from "./useInitService"; +import { useInitService } from "packages/cloud/services/useInitService"; import { QueryObserverSuccessResult } from "react-query/types/core/types"; export const workspaceKeys = { diff --git a/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/JobsList.tsx b/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/JobsList.tsx index 25f8582ffad24..f61bd062e2cc0 100644 --- a/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/JobsList.tsx +++ b/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/JobsList.tsx @@ -1,15 +1,12 @@ import React, { useMemo } from "react"; -import styled from "styled-components"; import JobItem from "components/JobItem"; -import { Job } from "core/resources/Job"; +import { JobListItem } from "core/domain/job/Job"; type IProps = { - jobs: Job[]; + jobs: JobListItem[]; }; -const Content = styled.div``; - const JobsList: React.FC = ({ jobs }) => { const sortJobs = useMemo( () => jobs.sort((a, b) => (a.job.createdAt > b.job.createdAt ? -1 : 1)), @@ -17,11 +14,11 @@ const JobsList: React.FC = ({ jobs }) => { ); return ( - +
    {sortJobs.map((item) => ( - + ))} - +
    ); }; diff --git a/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/StatusView.tsx b/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/StatusView.tsx index 5c002bdf8a637..d2582fa699305 100644 --- a/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/StatusView.tsx +++ b/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/StatusView.tsx @@ -3,13 +3,14 @@ import { FormattedMessage } from "react-intl"; import styled from "styled-components"; import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; import { faRedoAlt } from "@fortawesome/free-solid-svg-icons"; -import { useFetcher, useResource, useSubscription } from "rest-hooks"; +import { useFetcher } from "rest-hooks"; + +import { useListJobs } from "services/job/JobService"; import ContentCard from "components/ContentCard"; import { Button, LoadingButton } from "components"; import StatusMainInfo from "./StatusMainInfo"; import ConnectionResource, { Connection } from "core/resources/Connection"; -import JobResource from "core/resources/Job"; import JobsList from "./JobsList"; import EmptyResource from "components/EmptyResourceBlock"; import ResetDataModal from "components/ResetDataModal"; @@ -61,11 +62,7 @@ const StatusView: React.FC = ({ const [isModalOpen, setIsModalOpen] = useState(false); const { isLoading, showFeedback, startAction } = useLoadingState(); const analyticsService = useAnalyticsService(); - const { jobs } = useResource(JobResource.listShape(), { - configId: connection.connectionId, - configTypes: ["sync", "reset_connection"], - }); - useSubscription(JobResource.listShape(), { + const jobs = useListJobs({ configId: connection.connectionId, configTypes: ["sync", "reset_connection"], }); diff --git a/airbyte-webapp/src/pages/DestinationPage/pages/CreateDestinationPage/CreateDestinationPage.tsx b/airbyte-webapp/src/pages/DestinationPage/pages/CreateDestinationPage/CreateDestinationPage.tsx index c4ec6857703ed..d3a40b237abbb 100644 --- a/airbyte-webapp/src/pages/DestinationPage/pages/CreateDestinationPage/CreateDestinationPage.tsx +++ b/airbyte-webapp/src/pages/DestinationPage/pages/CreateDestinationPage/CreateDestinationPage.tsx @@ -7,10 +7,10 @@ import useRouter from "hooks/useRouter"; import DestinationDefinitionResource from "core/resources/DestinationDefinition"; import useDestination from "hooks/services/useDestinationHook"; import { FormPageContent } from "components/ConnectorBlocks"; -import { JobInfo } from "core/resources/Scheduler"; import { ConnectionConfiguration } from "core/domain/connection"; import HeadTitle from "components/HeadTitle"; import useWorkspace from "hooks/services/useWorkspace"; +import { JobInfo } from "../../../../core/domain/job/Job"; const CreateDestinationPage: React.FC = () => { const { push } = useRouter(); diff --git a/airbyte-webapp/src/pages/DestinationPage/pages/DestinationItemPage/components/DestinationSettings.tsx b/airbyte-webapp/src/pages/DestinationPage/pages/DestinationItemPage/components/DestinationSettings.tsx index e2223ae37e0bf..e8be33432d66e 100644 --- a/airbyte-webapp/src/pages/DestinationPage/pages/DestinationItemPage/components/DestinationSettings.tsx +++ b/airbyte-webapp/src/pages/DestinationPage/pages/DestinationItemPage/components/DestinationSettings.tsx @@ -8,14 +8,13 @@ import useDestination, { useDestinationDefinitionSpecificationLoadAsync, } from "hooks/services/useDestinationHook"; import { Connection } from "core/resources/Connection"; -import { JobInfo } from "core/resources/Scheduler"; import { ConnectionConfiguration } from "core/domain/connection"; import DestinationDefinitionResource from "core/resources/DestinationDefinition"; import { createFormErrorMessage } from "utils/errorStatusMessage"; import { LogsRequestError } from "core/request/LogsRequestError"; import { ConnectorCard } from "views/Connector/ConnectorCard"; -import { Destination } from "core/domain/connector"; +import { Connector, Destination } from "core/domain/connector"; const Content = styled.div` width: 100%; @@ -33,10 +32,9 @@ const DestinationsSettings: React.FC = ({ connectionsWithDestination, }) => { const [saved, setSaved] = useState(false); - const [errorStatusRequest, setErrorStatusRequest] = useState<{ - statusMessage: string | React.ReactNode; - response: JobInfo; - } | null>(null); + const [errorStatusRequest, setErrorStatusRequest] = useState( + null + ); const destinationSpecification = useDestinationDefinitionSpecificationLoadAsync( currentDestination.destinationDefinitionId @@ -69,9 +67,7 @@ const DestinationsSettings: React.FC = ({ setSaved(true); } catch (e) { - const errorStatusMessage = createFormErrorMessage(e); - - setErrorStatusRequest({ ...e, statusMessage: errorStatusMessage }); + setErrorStatusRequest(e); } }; @@ -88,18 +84,15 @@ const DestinationsSettings: React.FC = ({ }); setSaved(true); } catch (e) { - const errorStatusMessage = createFormErrorMessage(e); - - setErrorStatusRequest({ ...e, statusMessage: errorStatusMessage }); + setErrorStatusRequest(e); } }; - const onDelete = async () => { - await deleteDestination({ + const onDelete = () => + deleteDestination({ connectionsWithDestination, destination: currentDestination, }); - }; return ( @@ -111,11 +104,13 @@ const DestinationsSettings: React.FC = ({ availableServices={[destinationDefinition]} formValues={{ ...currentDestination, - serviceType: currentDestination.destinationDefinitionId, + serviceType: Connector.id(destinationDefinition), }} selectedConnector={destinationSpecification} successMessage={saved && } - errorMessage={errorStatusRequest?.statusMessage} + errorMessage={ + errorStatusRequest && createFormErrorMessage(errorStatusRequest) + } title={} jobInfo={LogsRequestError.extractJobInfo(errorStatusRequest)} /> diff --git a/airbyte-webapp/src/pages/OnboardingPage/OnboardingPage.tsx b/airbyte-webapp/src/pages/OnboardingPage/OnboardingPage.tsx index 12edd1e3be4a0..278c7014f8926 100644 --- a/airbyte-webapp/src/pages/OnboardingPage/OnboardingPage.tsx +++ b/airbyte-webapp/src/pages/OnboardingPage/OnboardingPage.tsx @@ -12,7 +12,6 @@ import useDestination, { import useConnection, { useConnectionList, } from "hooks/services/useConnectionHook"; -import { JobInfo } from "core/resources/Scheduler"; import { ConnectionConfiguration } from "core/domain/connection"; import SourceDefinitionResource from "core/resources/SourceDefinition"; import DestinationDefinitionResource from "core/resources/DestinationDefinition"; @@ -30,6 +29,7 @@ import LoadingPage from "components/LoadingPage"; import useWorkspace from "hooks/services/useWorkspace"; import useRouterHook from "hooks/useRouter"; import { RoutePaths } from "pages/routes"; +import { JobInfo } from "../../core/domain/job/Job"; const Content = styled.div<{ big?: boolean; medium?: boolean }>` width: 100%; diff --git a/airbyte-webapp/src/pages/SourcesPage/pages/CreateSourcePage/CreateSourcePage.tsx b/airbyte-webapp/src/pages/SourcesPage/pages/CreateSourcePage/CreateSourcePage.tsx index 639a17649752b..86f9a3812841e 100644 --- a/airbyte-webapp/src/pages/SourcesPage/pages/CreateSourcePage/CreateSourcePage.tsx +++ b/airbyte-webapp/src/pages/SourcesPage/pages/CreateSourcePage/CreateSourcePage.tsx @@ -8,10 +8,10 @@ import useRouter from "hooks/useRouter"; import SourceDefinitionResource from "core/resources/SourceDefinition"; import useSource from "hooks/services/useSourceHook"; import { FormPageContent } from "components/ConnectorBlocks"; -import { JobInfo } from "core/resources/Scheduler"; import { ConnectionConfiguration } from "core/domain/connection"; import HeadTitle from "components/HeadTitle"; import useWorkspace from "hooks/services/useWorkspace"; +import { JobInfo } from "../../../../core/domain/job/Job"; const CreateSourcePage: React.FC = () => { const { push } = useRouter(); diff --git a/airbyte-webapp/src/pages/SourcesPage/pages/SourceItemPage/components/SourceSettings.tsx b/airbyte-webapp/src/pages/SourcesPage/pages/SourceItemPage/components/SourceSettings.tsx index 90fbc4da8d106..ca61c0d7277f3 100644 --- a/airbyte-webapp/src/pages/SourcesPage/pages/SourceItemPage/components/SourceSettings.tsx +++ b/airbyte-webapp/src/pages/SourcesPage/pages/SourceItemPage/components/SourceSettings.tsx @@ -7,7 +7,6 @@ import useSource from "hooks/services/useSourceHook"; import SourceDefinitionSpecificationResource from "core/resources/SourceDefinitionSpecification"; import DeleteBlock from "components/DeleteBlock"; import { Connection } from "core/resources/Connection"; -import { JobInfo } from "core/resources/Scheduler"; import { createFormErrorMessage } from "utils/errorStatusMessage"; import { ConnectionConfiguration } from "core/domain/connection"; import SourceDefinitionResource from "core/resources/SourceDefinition"; @@ -30,10 +29,9 @@ const SourceSettings: React.FC = ({ connectionsWithSource, }) => { const [saved, setSaved] = useState(false); - const [errorStatusRequest, setErrorStatusRequest] = useState<{ - statusMessage: string | React.ReactNode; - response: JobInfo; - } | null>(null); + const [errorStatusRequest, setErrorStatusRequest] = useState( + null + ); const { updateSource, deleteSource, checkSourceConnection } = useSource(); @@ -61,9 +59,7 @@ const SourceSettings: React.FC = ({ setSaved(true); } catch (e) { - const errorStatusMessage = createFormErrorMessage(e); - - setErrorStatusRequest({ ...e, statusMessage: errorStatusMessage }); + setErrorStatusRequest(e); } }; @@ -86,9 +82,8 @@ const SourceSettings: React.FC = ({ } }; - const onDelete = async () => { - await deleteSource({ connectionsWithSource, source: currentSource }); - }; + const onDelete = () => + deleteSource({ connectionsWithSource, source: currentSource }); return ( @@ -100,7 +95,9 @@ const SourceSettings: React.FC = ({ formType="source" availableServices={[sourceDefinition]} successMessage={saved && } - errorMessage={errorStatusRequest?.statusMessage} + errorMessage={ + errorStatusRequest && createFormErrorMessage(errorStatusRequest) + } formValues={{ ...currentSource, serviceType: currentSource.sourceDefinitionId, diff --git a/airbyte-webapp/src/services/job/JobService.tsx b/airbyte-webapp/src/services/job/JobService.tsx new file mode 100644 index 0000000000000..e949e4cad0985 --- /dev/null +++ b/airbyte-webapp/src/services/job/JobService.tsx @@ -0,0 +1,70 @@ +import { + QueryObserverSuccessResult, + UseMutateAsyncFunction, + useMutation, + useQuery, + useQueryClient, +} from "react-query"; + +import { useConfig } from "config"; +import { useDefaultRequestMiddlewares } from "packages/cloud/services/useDefaultRequestMiddlewares"; +import { useInitService } from "packages/cloud/services/useInitService"; +import { JobsService, ListParams } from "core/domain/job/JobsService"; +import { JobDetails, JobListItem } from "core/domain/job/Job"; + +export const jobsKeys = { + all: ["jobs"] as const, + lists: () => [...jobsKeys.all, "list"] as const, + list: (filters: string) => [...jobsKeys.lists(), { filters }] as const, + detail: (jobId: string | number) => + [...jobsKeys.all, "details", jobId] as const, + cancel: (jobId: string) => [...jobsKeys.all, "cancel", jobId] as const, +}; + +function useGetJobService(): JobsService { + const { apiUrl } = useConfig(); + + const requestAuthMiddleware = useDefaultRequestMiddlewares(); + + return useInitService(() => new JobsService(apiUrl, requestAuthMiddleware), [ + apiUrl, + requestAuthMiddleware, + ]); +} + +export const useListJobs = (listParams: ListParams): JobListItem[] => { + const service = useGetJobService(); + return (useQuery( + jobsKeys.list(listParams.configId), + () => service.list(listParams), + { + refetchInterval: 2500, // every 2,5 seconds, + } + ) as QueryObserverSuccessResult<{ jobs: JobListItem[] }>).data.jobs; +}; + +export const useGetJob = (id: string | number): JobDetails => { + const service = useGetJobService(); + + return (useQuery(jobsKeys.detail(id), () => service.get(id), { + refetchInterval: 2500, // every 2,5 seconds, + }) as QueryObserverSuccessResult).data; +}; + +export const useCancelJob = (): UseMutateAsyncFunction< + JobDetails, + Error, + string | number +> => { + const service = useGetJobService(); + const queryClient = useQueryClient(); + + return useMutation( + (id: string | number) => service.cancel(id), + { + onSuccess: (data) => { + queryClient.setQueryData(jobsKeys.detail(data.job.id), data); + }, + } + ).mutateAsync; +}; diff --git a/airbyte-webapp/src/views/Connector/ConnectorCard/ConnectorCard.tsx b/airbyte-webapp/src/views/Connector/ConnectorCard/ConnectorCard.tsx index 0df482c69a038..2af6d9f2e47bc 100644 --- a/airbyte-webapp/src/views/Connector/ConnectorCard/ConnectorCard.tsx +++ b/airbyte-webapp/src/views/Connector/ConnectorCard/ConnectorCard.tsx @@ -1,17 +1,23 @@ import React from "react"; import { ContentCard } from "components"; +import JobItem from "components/JobItem"; + import ServiceForm from "../ServiceForm"; import { ServiceFormProps } from "../ServiceForm/ServiceForm"; -import { JobsLogItem } from "components/JobItem"; +import { JobInfo } from "core/domain/job/Job"; const ConnectorCard: React.FC< - { title?: React.ReactNode; full?: boolean; jobInfo: any } & ServiceFormProps + { + title?: React.ReactNode; + full?: boolean; + jobInfo?: JobInfo | null; + } & ServiceFormProps > = ({ title, full, jobInfo, ...props }) => { return ( - + {jobInfo && } ); }; From b269b9fb1e400f57b41cc21de2db8bb5a075e9f0 Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Sun, 23 Jan 2022 21:09:03 -0800 Subject: [PATCH 197/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20e2e=20test:=20s?= =?UTF-8?q?upport=20custom=20catalog=20(#9720)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add continuous feed mode to source e2e-test * Update connector catalog doc * Fix sonar qube issues * Add cloud variant * Format code * Add testing source connector to seed --- .../resources/seed/source_definitions.yaml | 7 + .../src/main/resources/seed/source_specs.yaml | 148 +++++++++++++ .../source/SourceAcceptanceTest.java | 7 +- airbyte-integrations/builds.md | 1 + .../connectors/destination-e2e-test/README.md | 11 +- .../source-e2e-test-cloud/.dockerignore | 3 + .../source-e2e-test-cloud/Dockerfile | 20 ++ .../source-e2e-test-cloud/README.md | 67 ++++++ .../source-e2e-test-cloud/build.gradle | 28 +++ .../source/e2e_test/CloudTestingSources.java | 55 +++++ .../CloudTestingSourcesAcceptanceTest.java | 141 ++++++++++++ .../e2e_test/CloudTestingSourcesTest.java | 25 +++ .../src/test/resources/expected_spec.json | 102 +++++++++ .../connectors/source-e2e-test/Dockerfile | 2 +- .../connectors/source-e2e-test/README.md | 74 +++++++ .../connectors/source-e2e-test/build.gradle | 32 +-- .../source/e2e_test/ContinuousFeedConfig.java | 203 ++++++++++++++++++ .../e2e_test/ContinuousFeedConstants.java | 38 ++++ .../source/e2e_test/ContinuousFeedSource.java | 108 ++++++++++ .../source/e2e_test/LegacyConstants.java | 22 ++ ....java => LegacyExceptionAfterNSource.java} | 27 +-- ...rce.java => LegacyInfiniteFeedSource.java} | 59 +++-- .../source/e2e_test/TestingSources.java | 28 +-- .../main/resources/json_schema_draft_07.json | 166 ++++++++++++++ .../src/main/resources/spec.json | 105 ++++++++- .../ContinuousFeedSourceAcceptanceTest.java | 137 ++++++++++++ ...egacyInfiniteFeedSourceAcceptanceTest.java | 68 ++++++ .../e2e_test/ContinuousFeedConfigTest.java | 92 ++++++++ ...a => LegacyExceptionAfterNSourceTest.java} | 6 +- .../parse_mock_catalog_test_cases.json | 104 +++++++++ docs/SUMMARY.md | 1 + docs/integrations/README.md | 2 + docs/integrations/destinations/e2e-test.md | 4 +- docs/integrations/sources/e2e-test.md | 40 ++++ settings.gradle | 3 + 35 files changed, 1841 insertions(+), 95 deletions(-) create mode 100644 airbyte-integrations/connectors/source-e2e-test-cloud/.dockerignore create mode 100644 airbyte-integrations/connectors/source-e2e-test-cloud/Dockerfile create mode 100644 airbyte-integrations/connectors/source-e2e-test-cloud/README.md create mode 100644 airbyte-integrations/connectors/source-e2e-test-cloud/build.gradle create mode 100644 airbyte-integrations/connectors/source-e2e-test-cloud/src/main/java/io/airbyte/integrations/source/e2e_test/CloudTestingSources.java create mode 100644 airbyte-integrations/connectors/source-e2e-test-cloud/src/test-integration/java/io/airbyte/integrations/source/e2e_test/CloudTestingSourcesAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-e2e-test-cloud/src/test/java/io/airbyte/integrations/source/e2e_test/CloudTestingSourcesTest.java create mode 100644 airbyte-integrations/connectors/source-e2e-test-cloud/src/test/resources/expected_spec.json create mode 100644 airbyte-integrations/connectors/source-e2e-test/README.md create mode 100644 airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConfig.java create mode 100644 airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConstants.java create mode 100644 airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedSource.java create mode 100644 airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/LegacyConstants.java rename airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/{ExceptionAfterNSource.java => LegacyExceptionAfterNSource.java} (76%) rename airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/{InfiniteFeedSource.java => LegacyInfiniteFeedSource.java} (54%) create mode 100644 airbyte-integrations/connectors/source-e2e-test/src/main/resources/json_schema_draft_07.json create mode 100644 airbyte-integrations/connectors/source-e2e-test/src/test-integration/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-e2e-test/src/test-integration/java/io/airbyte/integrations/source/e2e_test/LegacyInfiniteFeedSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-e2e-test/src/test/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConfigTest.java rename airbyte-integrations/connectors/source-e2e-test/src/test/java/io/airbyte/integrations/source/e2e_test/{ExceptionAfterNSourceTest.java => LegacyExceptionAfterNSourceTest.java} (92%) create mode 100644 airbyte-integrations/connectors/source-e2e-test/src/test/resources/parse_mock_catalog_test_cases.json create mode 100644 docs/integrations/sources/e2e-test.md diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 8cb515efb9b54..68533b3b153d2 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -172,6 +172,13 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/drift icon: drift.svg sourceType: api +- name: E2E Testing + sourceDefinitionId: d53f9084-fa6b-4a5a-976c-5b8392f4ad8a + dockerRepository: airbyte/source-e2e-test + dockerImageTag: 1.0.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/e2e-test + icon: airbyte.svg + sourceType: api - name: Exchange Rates Api sourceDefinitionId: e2b40e36-aa0e-4bed-b41b-bcea6fa348b1 dockerRepository: airbyte/source-exchange-rates diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index c8a2ae1581c24..e01dc8a2655b3 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1448,6 +1448,154 @@ oauthFlowOutputParameters: - - "access_token" - - "refresh_token" +- dockerImage: "airbyte/source-e2e-test:1.0.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/e2e-test" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "E2E Test Source Spec" + type: "object" + required: + - "type" + oneOf: + - title: "Continuous Feed" + required: + - "type" + - "max_messages" + - "mock_catalog" + description: "A mock mode that will emit random messages based on the input\ + \ schema." + additionalProperties: false + properties: + type: + type: "string" + const: "CONTINUOUS_FEED" + default: "CONTINUOUS_FEED" + order: 10 + max_messages: + title: "Max Records" + description: "Number of records to emit per stream. Min 1. Max 100 billion." + type: "integer" + default: 100 + min: 1 + max: 100000000000 + order: 20 + seed: + title: "Random Seed" + description: "When the seed is unspecified, the current time millis will\ + \ be used as the seed. Range: [0, 1000000]." + type: "integer" + default: 0 + examples: + - 42 + min: 0 + max: 1000000 + order: 30 + message_interval_ms: + title: "Message Interval (ms)" + description: "Interval between messages in ms. Min 0 ms. Max 60000 ms\ + \ (1 minute)." + type: "integer" + min: 0 + max: 60000 + default: 0 + order: 40 + mock_catalog: + title: "Mock Catalog" + type: "object" + order: 50 + oneOf: + - title: "Single Stream" + description: "A catalog with one stream." + required: + - "type" + - "stream_name" + - "stream_schema" + properties: + type: + type: "string" + const: "SINGLE_STREAM" + default: "SINGLE_STREAM" + stream_name: + title: "Stream Name" + description: "Name of the data stream." + type: "string" + default: "data_stream" + stream_schema: + title: "Stream Schema" + description: "A Json schema for the stream. The schema should be\ + \ compatible with draft-07. See this doc for examples." + type: "string" + default: "{ \"type\": \"object\", \"properties\": { \"column1\"\ + : { \"type\": \"string\" } } }" + - title: "Multi-Stream" + description: "A catalog with multiple data streams." + required: + - "type" + - "stream_schemas" + properties: + type: + type: "string" + const: "MULTI_STREAM" + default: "MULTI_STREAM" + stream_schemas: + title: "Streams and Schemas" + description: "A Json object specifying multiple data streams and\ + \ their schemas. Each key in this object is one stream name. Each\ + \ value is the schema for that stream. The schema should be compatible\ + \ with draft-07. See this doc for examples." + type: "string" + default: "{ \"stream1\": { \"type\": \"object\", \"properties\"\ + : { \"field1\": { \"type\": \"string\" } } }, \"stream2\": { \"\ + type\": \"object\", \"properties\": { \"field1\": { \"type\":\ + \ \"boolean\" } } } }" + - title: "Legacy Exception After N" + required: + - "type" + - "throw_after_n_records" + description: "A legacy mode from v0.1.1 mainly for unit tests. The catalog\ + \ has one \"data\" stream, which has one string field \"column1\". This\ + \ mode will throw an exception after N messages." + additionalProperties: false + properties: + type: + type: "string" + const: "EXCEPTION_AFTER_N" + default: "EXCEPTION_AFTER_N" + throw_after_n_records: + title: "Throw After N Records" + description: "Number of records to emit before throwing an exception.\ + \ Min 1." + type: "integer" + min: 1 + - title: "Legacy Infinite Feed" + required: + - "type" + - "max_records" + description: "A legacy mode from v0.1.1 mainly for unit tests. The catalog\ + \ has one \"data\" stream, which has one string field \"column1\". This\ + \ mode will emit messages infinitely." + additionalProperties: true + properties: + type: + type: "string" + const: "INFINITE_FEED" + default: "INFINITE_FEED" + max_records: + title: "Max Records" + description: "Number of records to emit. If not set, defaults to infinity." + type: "integer" + message_interval: + title: "Message Interval" + description: "Interval between messages in ms." + type: "integer" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] - dockerImage: "airbyte/source-exchange-rates:0.2.5" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/exchangeratesapi" diff --git a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java index 366be1361c023..60772c479d2a4 100644 --- a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java +++ b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java @@ -160,11 +160,12 @@ public void testDiscover() throws Exception { public void testFullRefreshRead() throws Exception { final ConfiguredAirbyteCatalog catalog = withFullRefreshSyncModes(getConfiguredCatalog()); final List allMessages = runRead(catalog); - final List recordMessages = allMessages.stream().filter(m -> m.getType() == Type.RECORD).collect(Collectors.toList()); + final List recordMessages = filterRecords(allMessages); // the worker validates the message formats, so we just validate the message content // We don't need to validate message format as long as we use the worker, which we will not want to // do long term. assertFalse(recordMessages.isEmpty(), "Expected a full refresh sync to produce records"); + assertRecordMessages(recordMessages); final List regexTests = getRegexTests(); final List stringMessages = allMessages.stream().map(Jsons::serialize).collect(Collectors.toList()); @@ -175,6 +176,10 @@ public void testFullRefreshRead() throws Exception { }); } + protected void assertRecordMessages(final List recordMessages) { + // do nothing by default + } + /** * Configuring all streams in the input catalog to full refresh mode, performs two read operations * on all streams which support full refresh syncs. It then verifies that the RECORD messages output diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index 5f73847934dfc..50deaeffa517a 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -25,6 +25,7 @@ | Close.com | [![source-close-com](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-close-com%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-close-com/) | | Dixa | [![source-dixa](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-dixa%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-dixa) | | Drift | [![source-drift](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-drift%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-drift) | +| End-to-End Testing | [![source-e2e-test](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-e2e-test%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-e2e-test) | | Exchange Rates API | [![source-exchange-rates](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-exchange-rates%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-exchange-rates) | | Facebook Marketing | [![source-facebook-marketing](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-facebook-marketing%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-facebook-marketing) | | Files | [![source-file](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-file%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-file) | diff --git a/airbyte-integrations/connectors/destination-e2e-test/README.md b/airbyte-integrations/connectors/destination-e2e-test/README.md index e8f2bee4577a5..ee61061d71225 100644 --- a/airbyte-integrations/connectors/destination-e2e-test/README.md +++ b/airbyte-integrations/connectors/destination-e2e-test/README.md @@ -7,14 +7,11 @@ This is the repository for the Null destination connector in Java. For informati #### Building via Gradle From the Airbyte repository root, run: ``` -./gradlew :airbyte-integrations:connectors:destination-:build +./gradlew :airbyte-integrations:connectors:destination-e2e-test:build ``` #### Create credentials -**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. -Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. - -**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. +No credential is needed for this connector. ### Locally running the connector docker image @@ -35,8 +32,8 @@ docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-e2e-test:dev disc docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-e2e-test:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json ``` -#### Dev Null Destination -The Dev Null Destination depends on this connector. It only allows the "silent" mode. When this mode is changed, please make sure that the Dev Null Destination is updated and published accordingly as well. +#### Cloud variant +The cloud variant of this connector is Dev Null Destination. It only allows the "silent" mode. When this mode is changed, please make sure that the Dev Null Destination is updated and published accordingly as well. ## Testing We use `JUnit` for Java tests. diff --git a/airbyte-integrations/connectors/source-e2e-test-cloud/.dockerignore b/airbyte-integrations/connectors/source-e2e-test-cloud/.dockerignore new file mode 100644 index 0000000000000..65c7d0ad3e73c --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test-cloud/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-integrations/connectors/source-e2e-test-cloud/Dockerfile b/airbyte-integrations/connectors/source-e2e-test-cloud/Dockerfile new file mode 100644 index 0000000000000..a230cc6117334 --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test-cloud/Dockerfile @@ -0,0 +1,20 @@ +FROM airbyte/integration-base-java:dev AS build + +WORKDIR /airbyte + +ENV APPLICATION source-e2e-test-cloud + +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar + +RUN tar xf ${APPLICATION}.tar --strip-components=1 && rm -rf ${APPLICATION}.tar + +FROM airbyte/integration-base-java:dev + +WORKDIR /airbyte + +ENV APPLICATION source-e2e-test-cloud + +COPY --from=build /airbyte /airbyte + +LABEL io.airbyte.version=1.0.0 +LABEL io.airbyte.name=airbyte/source-e2e-test-cloud diff --git a/airbyte-integrations/connectors/source-e2e-test-cloud/README.md b/airbyte-integrations/connectors/source-e2e-test-cloud/README.md new file mode 100644 index 0000000000000..43a68d7eb4750 --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test-cloud/README.md @@ -0,0 +1,67 @@ +# End-to-End Testing Source Cloud Variant + +This is the Cloud variant of the [E2E Test Source](https://docs.airbyte.io/integrations/sources/e2e-test). It only allows the "continuous feed" mode with finite number of record messages. + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-e2e-test-cloud:build +``` + +#### Create credentials +No credential is needed for this connector. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-e2e-test-cloud:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-e2e-test-cloud:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-e2e-test-cloud:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-e2e-test-cloud:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-e2e-test-cloud:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +#### Cloud variant +The cloud version of this connector only allows the `CONTINUOUS FEED` mode. When this mode is changed, please make sure that the cloud variant is updated and published accordingly as well. + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/sources/e2e-test`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. See example(s) in +`src/test-integration/java/io/airbyte/integrations/sources/e2e-test/`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:sources-e2e-test:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:sources-e2e-test:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +2. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +3. Create a Pull Request. +4. Pat yourself on the back for being an awesome contributor. +5. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-e2e-test-cloud/build.gradle b/airbyte-integrations/connectors/source-e2e-test-cloud/build.gradle new file mode 100644 index 0000000000000..547dc0b18aceb --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test-cloud/build.gradle @@ -0,0 +1,28 @@ +plugins { + id 'application' + id 'airbyte-docker' + id 'airbyte-integration-test-java' +} + +application { + mainClass = 'io.airbyte.integrations.source.e2e_test.CloudTestingSources' +} + +dependencies { + implementation project(':airbyte-config:models') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:bases:base-java') + implementation project(':airbyte-integrations:connectors:source-e2e-test') + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + + testImplementation project(":airbyte-json-validation") + + integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-source-test') + integrationTestJavaImplementation project(':airbyte-integrations:connectors:source-e2e-test-cloud') +} + +allprojects { + repositories { + maven { url 'https://jitpack.io' } + } +} diff --git a/airbyte-integrations/connectors/source-e2e-test-cloud/src/main/java/io/airbyte/integrations/source/e2e_test/CloudTestingSources.java b/airbyte-integrations/connectors/source-e2e-test-cloud/src/main/java/io/airbyte/integrations/source/e2e_test/CloudTestingSources.java new file mode 100644 index 0000000000000..af44914ce6b38 --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test-cloud/src/main/java/io/airbyte/integrations/source/e2e_test/CloudTestingSources.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.e2e_test; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.IntegrationRunner; +import io.airbyte.integrations.base.Source; +import io.airbyte.integrations.base.spec_modification.SpecModifyingSource; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.util.Iterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CloudTestingSources extends SpecModifyingSource implements Source { + + private static final Logger LOGGER = LoggerFactory.getLogger(CloudTestingSources.class); + private static final String CLOUD_TESTING_SOURCES_TITLE = "Cloud E2E Test Source Spec"; + + public CloudTestingSources() { + super(new TestingSources()); + } + + public static void main(final String[] args) throws Exception { + final Source source = new CloudTestingSources(); + LOGGER.info("Starting source: {}", CloudTestingSources.class); + new IntegrationRunner(source).run(args); + LOGGER.info("Completed source: {}", CloudTestingSources.class); + } + + /** + * 1. Update the title. 2. Only keep the "continuous feed" mode. + */ + @Override + public ConnectorSpecification modifySpec(final ConnectorSpecification originalSpec) { + final ConnectorSpecification spec = Jsons.clone(originalSpec); + + ((ObjectNode) spec.getConnectionSpecification()).put("title", CLOUD_TESTING_SOURCES_TITLE); + + final ArrayNode types = (ArrayNode) spec.getConnectionSpecification().get("oneOf"); + final Iterator typesIterator = types.elements(); + while (typesIterator.hasNext()) { + final JsonNode typeNode = typesIterator.next(); + if (!typeNode.get("properties").get("type").get("const").asText().equalsIgnoreCase("CONTINUOUS_FEED")) { + typesIterator.remove(); + } + } + return spec; + } + +} diff --git a/airbyte-integrations/connectors/source-e2e-test-cloud/src/test-integration/java/io/airbyte/integrations/source/e2e_test/CloudTestingSourcesAcceptanceTest.java b/airbyte-integrations/connectors/source-e2e-test-cloud/src/test-integration/java/io/airbyte/integrations/source/e2e_test/CloudTestingSourcesAcceptanceTest.java new file mode 100644 index 0000000000000..7e554049c7d7c --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test-cloud/src/test-integration/java/io/airbyte/integrations/source/e2e_test/CloudTestingSourcesAcceptanceTest.java @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.e2e_test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.integrations.source.e2e_test.ContinuousFeedConfig.MockCatalogType; +import io.airbyte.integrations.source.e2e_test.TestingSources.TestingSourceType; +import io.airbyte.integrations.standardtest.source.SourceAcceptanceTest; +import io.airbyte.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConnectorSpecification; +import io.airbyte.validation.json.JsonSchemaValidator; +import io.airbyte.validation.json.JsonValidationException; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; + +/** + * This acceptance test is mostly the same as {@code ContinuousFeedSourceAcceptanceTest}. The only + * difference is the image name. + */ +public class CloudTestingSourcesAcceptanceTest extends SourceAcceptanceTest { + + private static final int MAX_MESSAGES = ThreadLocalRandom.current().nextInt(10, 20); + private static final JsonSchemaValidator JSON_VALIDATOR = new JsonSchemaValidator(); + private static final String STREAM_1 = "stream1"; + private static final JsonNode SCHEMA_1 = Jsons.deserialize(""" + { + "type": "object", + "properties": { + "field1": { "type": "integer" } + } + } + """); + private static final String STREAM_2 = "stream2"; + private static final JsonNode SCHEMA_2 = Jsons.deserialize(""" + { + "type": "object", + "properties": { + "column1": { "type": "string" }, + "column2": { + "type": "object", + "properties": { + "field1": { "type": "array", "items": { "type": "boolean" } }, + "field2": { "type": "integer" } + } + } + } + } + """); + + private JsonNode config; + + @Override + protected String getImageName() { + return "airbyte/source-e2e-test-cloud:dev"; + } + + @Override + protected JsonNode getConfig() { + return this.config; + } + + @Override + protected void setupEnvironment(final TestDestinationEnv environment) { + + final JsonNode mockCatalog = Jsons.jsonNode(ImmutableMap.builder() + .put("type", MockCatalogType.MULTI_STREAM) + .put("stream_schemas", String.format("{ \"%s\": %s, \"%s\": %s }", + STREAM_1, + Jsons.serialize(SCHEMA_1), + STREAM_2, + Jsons.serialize(SCHEMA_2))) + .build()); + this.config = Jsons.jsonNode(ImmutableMap.builder() + .put("type", TestingSourceType.CONTINUOUS_FEED) + .put("seed", 1024) + .put("message_interval_ms", 0) + .put("max_messages", MAX_MESSAGES) + .put("mock_catalog", mockCatalog) + .build()); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + // do nothing + } + + @Override + protected ConnectorSpecification getSpec() throws IOException { + return Jsons.deserialize(MoreResources.readResource("expected_spec.json"), ConnectorSpecification.class); + } + + @Override + protected ConfiguredAirbyteCatalog getConfiguredCatalog() throws JsonValidationException { + final ContinuousFeedConfig feedConfig = new ContinuousFeedConfig(this.config); + return CatalogHelpers.toDefaultConfiguredCatalog(feedConfig.getMockCatalog()); + } + + @Override + protected JsonNode getState() { + return Jsons.jsonNode(new HashMap<>()); + } + + @Override + protected List getRegexTests() { + return Collections.emptyList(); + } + + @Override + protected void assertRecordMessages(final List recordMessages) { + int index = 0; + // the first N messages are from stream 1 + while (index < MAX_MESSAGES) { + final AirbyteRecordMessage message = recordMessages.get(index); + assertEquals(STREAM_1, message.getStream()); + assertTrue(JSON_VALIDATOR.validate(SCHEMA_1, message.getData()).isEmpty()); + ++index; + } + // the second N messages are from stream 2 + while (index < MAX_MESSAGES * 2) { + final AirbyteRecordMessage message = recordMessages.get(index); + assertEquals(STREAM_2, message.getStream()); + assertTrue(JSON_VALIDATOR.validate(SCHEMA_2, message.getData()).isEmpty()); + ++index; + } + } + +} diff --git a/airbyte-integrations/connectors/source-e2e-test-cloud/src/test/java/io/airbyte/integrations/source/e2e_test/CloudTestingSourcesTest.java b/airbyte-integrations/connectors/source-e2e-test-cloud/src/test/java/io/airbyte/integrations/source/e2e_test/CloudTestingSourcesTest.java new file mode 100644 index 0000000000000..d84879038cc03 --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test-cloud/src/test/java/io/airbyte/integrations/source/e2e_test/CloudTestingSourcesTest.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.e2e_test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.ConnectorSpecification; +import org.junit.jupiter.api.Test; + +class CloudTestingSourcesTest { + + @Test + public void testSpec() throws Exception { + final ConnectorSpecification actual = new CloudTestingSources().spec(); + final ConnectorSpecification expected = Jsons.deserialize( + MoreResources.readResource("expected_spec.json"), ConnectorSpecification.class); + + assertEquals(expected, actual); + } + +} diff --git a/airbyte-integrations/connectors/source-e2e-test-cloud/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/source-e2e-test-cloud/src/test/resources/expected_spec.json new file mode 100644 index 0000000000000..92f0d03b7a7a5 --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test-cloud/src/test/resources/expected_spec.json @@ -0,0 +1,102 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/sources/e2e-test", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Cloud E2E Test Source Spec", + "type": "object", + "required": ["type"], + "oneOf": [ + { + "title": "Continuous Feed", + "required": ["type", "max_messages", "mock_catalog"], + "description": "A mock mode that will emit random messages based on the input schema.", + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "const": "CONTINUOUS_FEED", + "default": "CONTINUOUS_FEED", + "order": 10 + }, + "max_messages": { + "title": "Max Records", + "description": "Number of records to emit per stream. Min 1. Max 100 billion.", + "type": "integer", + "default": 100, + "min": 1, + "max": 100000000000, + "order": 20 + }, + "seed": { + "title": "Random Seed", + "description": "When the seed is unspecified, the current time millis will be used as the seed. Range: [0, 1000000].", + "type": "integer", + "default": 0, + "examples": [42], + "min": 0, + "max": 1000000, + "order": 30 + }, + "message_interval_ms": { + "title": "Message Interval (ms)", + "description": "Interval between messages in ms. Min 0 ms. Max 60000 ms (1 minute).", + "type": "integer", + "min": 0, + "max": 60000, + "default": 0, + "order": 40 + }, + "mock_catalog": { + "title": "Mock Catalog", + "type": "object", + "order": 50, + "oneOf": [ + { + "title": "Single Stream", + "description": "A catalog with one stream.", + "required": ["type", "stream_name", "stream_schema"], + "properties": { + "type": { + "type": "string", + "const": "SINGLE_STREAM", + "default": "SINGLE_STREAM" + }, + "stream_name": { + "title": "Stream Name", + "description": "Name of the data stream.", + "type": "string", + "default": "data_stream" + }, + "stream_schema": { + "title": "Stream Schema", + "description": "A Json schema for the stream. The schema should be compatible with draft-07. See this doc for examples.", + "type": "string", + "default": "{ \"type\": \"object\", \"properties\": { \"column1\": { \"type\": \"string\" } } }" + } + } + }, + { + "title": "Multi-Stream", + "description": "A catalog with multiple data streams.", + "required": ["type", "stream_schemas"], + "properties": { + "type": { + "type": "string", + "const": "MULTI_STREAM", + "default": "MULTI_STREAM" + }, + "stream_schemas": { + "title": "Streams and Schemas", + "description": "A Json object specifying multiple data streams and their schemas. Each key in this object is one stream name. Each value is the schema for that stream. The schema should be compatible with draft-07. See this doc for examples.", + "type": "string", + "default": "{ \"stream1\": { \"type\": \"object\", \"properties\": { \"field1\": { \"type\": \"string\" } } }, \"stream2\": { \"type\": \"object\", \"properties\": { \"field1\": { \"type\": \"boolean\" } } } }" + } + } + } + ] + } + } + } + ] + } +} diff --git a/airbyte-integrations/connectors/source-e2e-test/Dockerfile b/airbyte-integrations/connectors/source-e2e-test/Dockerfile index b83f828165d87..6ba25c8c6f25f 100644 --- a/airbyte-integrations/connectors/source-e2e-test/Dockerfile +++ b/airbyte-integrations/connectors/source-e2e-test/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-e2e-test COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=1.0.0 LABEL io.airbyte.name=airbyte/source-e2e-test diff --git a/airbyte-integrations/connectors/source-e2e-test/README.md b/airbyte-integrations/connectors/source-e2e-test/README.md new file mode 100644 index 0000000000000..045bde10a19fe --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test/README.md @@ -0,0 +1,74 @@ +# End-to-End Testing Source + +This is the repository for the mock source connector in Java. For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/sources/e2e-test) + +## Mock Json record generation +The [airbytehq/jsongenerator](https://github.com/airbytehq/jsongenerator) is used to generate random Json records based on the specified Json schema. This library is forked from [jimblackler/jsongenerator](https://github.com/jimblackler/jsongenerator) authored by [Jim Blackler](https://github.com/jimblackler) and licensed under Apache 2.0. + +Although this library seems to be the best one available for Json generation in Java, it has two downsides. + - It relies on JavaScript inside Java (through `org.mozilla:rhino-engine`), and fetches remote JavaScript snippets (in the [PatternReverser](https://github.com/jimblackler/jsongenerator/blob/master/src/main/java/net/jimblackler/jsongenerator/PatternReverser.java)). + - It does not allow customization of individual field. The generated Json object can be seemingly garbled. We may use libraries such as [java-faker](https://github.com/DiUS/java-faker) in the future to argument it. + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-e2e-test:build +``` + +#### Create credentials +No credential is needed for this connector. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-e2e-test:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-e2e-test:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-e2e-test:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-e2e-test:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-e2e-test:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +#### Cloud variant +The cloud version of this connector only allows the `CONTINUOUS FEED` mode. When this mode is changed, please make sure that the cloud variant is updated and published accordingly as well. + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/sources/e2e-test`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. See example(s) in +`src/test-integration/java/io/airbyte/integrations/sources/e2e-test/`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:sources-e2e-test:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:sources-e2e-test:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +2. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +3. Create a Pull Request. +4. Pat yourself on the back for being an awesome contributor. +5. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-e2e-test/build.gradle b/airbyte-integrations/connectors/source-e2e-test/build.gradle index 478ba8c8a47ad..2fa70b57bdeb1 100644 --- a/airbyte-integrations/connectors/source-e2e-test/build.gradle +++ b/airbyte-integrations/connectors/source-e2e-test/build.gradle @@ -9,25 +9,33 @@ application { } dependencies { - implementation project(':airbyte-db:lib') implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-protocol:models') - implementation project(':airbyte-integrations:connectors:source-jdbc') - + implementation project(':airbyte-json-validation') implementation 'org.apache.commons:commons-lang3:3.11' - implementation "org.postgresql:postgresql:42.2.18" - implementation 'io.debezium:debezium-embedded:1.4.2.Final' - implementation 'io.debezium:debezium-api:1.4.2.Final' - implementation 'io.debezium:debezium-connector-postgres:1.4.2.Final' + implementation 'com.networknt:json-schema-validator:1.0.42' + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + + // random Json object generation from Json schema + // https://github.com/airbytehq/jsongenerator + implementation 'net.jimblackler.jsonschemafriend:core:0.11.2' + implementation 'org.mozilla:rhino-engine:1.7.14' + implementation('net.jimblackler:jsongenerator') { + version { + branch = 'master' + } + } - testImplementation testFixtures(project(':airbyte-integrations:connectors:source-jdbc')) testImplementation project(":airbyte-json-validation") testImplementation project(':airbyte-test-utils') - testImplementation 'org.testcontainers:postgresql:1.15.3' - integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-source-test') - - implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + integrationTestJavaImplementation project(':airbyte-integrations:connectors:source-e2e-test') integrationTestJavaImplementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) } + +allprojects { + repositories { + maven { url 'https://jitpack.io' } + } +} diff --git a/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConfig.java b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConfig.java new file mode 100644 index 0000000000000..d8d314e500c32 --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConfig.java @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.e2e_test; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.commons.jackson.MoreMappers; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.commons.string.Strings; +import io.airbyte.commons.util.MoreIterators; +import io.airbyte.protocol.models.AirbyteCatalog; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.validation.json.JsonSchemaValidator; +import io.airbyte.validation.json.JsonValidationException; +import java.io.IOException; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; + +@SuppressWarnings("OptionalUsedAsFieldOrParameterType") +public class ContinuousFeedConfig { + + private static final JsonNode JSON_SCHEMA_DRAFT_07; + private static final JsonSchemaValidator SCHEMA_VALIDATOR = new JsonSchemaValidator(); + private static final ObjectMapper MAPPER = MoreMappers.initMapper(); + + static { + try { + final String jsonSchemaDraft07 = MoreResources.readResource("json_schema_draft_07.json"); + JSON_SCHEMA_DRAFT_07 = Jsons.deserialize(jsonSchemaDraft07); + } catch (final IOException e) { + throw new RuntimeException(e); + } + } + + public enum MockCatalogType { + SINGLE_STREAM, + MULTI_STREAM + } + + private final long seed; + private final AirbyteCatalog mockCatalog; + private final long maxMessages; + private final Optional messageIntervalMs; + + public ContinuousFeedConfig(final JsonNode config) throws JsonValidationException { + this.seed = parseSeed(config); + this.mockCatalog = parseMockCatalog(config); + this.maxMessages = parseMaxMessages(config); + this.messageIntervalMs = parseMessageIntervalMs(config); + } + + static long parseSeed(final JsonNode config) { + if (!config.has("seed")) { + return System.currentTimeMillis(); + } + return config.get("seed").asLong(); + } + + static AirbyteCatalog parseMockCatalog(final JsonNode config) throws JsonValidationException { + final JsonNode mockCatalogConfig = config.get("mock_catalog"); + final MockCatalogType mockCatalogType = MockCatalogType.valueOf(mockCatalogConfig.get("type").asText()); + switch (mockCatalogType) { + case SINGLE_STREAM -> { + final String streamName = mockCatalogConfig.get("stream_name").asText(); + final String streamSchemaText = mockCatalogConfig.get("stream_schema").asText(); + final Optional streamSchema = Jsons.tryDeserialize(streamSchemaText); + if (streamSchema.isEmpty()) { + throw new JsonValidationException(String.format("Stream \"%s\" has invalid schema: %s", streamName, streamSchemaText)); + } + processSchema(streamSchema.get()); + checkSchema(streamName, streamSchema.get()); + + final AirbyteStream stream = new AirbyteStream().withName(streamName).withJsonSchema(streamSchema.get()); + return new AirbyteCatalog().withStreams(Collections.singletonList(stream)); + } + case MULTI_STREAM -> { + final String streamSchemasText = mockCatalogConfig.get("stream_schemas").asText(); + final Optional streamSchemas = Jsons.tryDeserialize(streamSchemasText); + if (streamSchemas.isEmpty()) { + throw new JsonValidationException("Input stream schemas are invalid: %s" + streamSchemasText); + } + + final List streams = new LinkedList<>(); + for (final Map.Entry entry : MoreIterators.toList(streamSchemas.get().fields())) { + final String streamName = entry.getKey(); + final JsonNode streamSchema = Jsons.clone(entry.getValue()); + processSchema(streamSchema); + checkSchema(streamName, streamSchema); + streams.add(new AirbyteStream().withName(streamName).withJsonSchema(streamSchema)); + } + return new AirbyteCatalog().withStreams(streams); + } + default -> throw new IllegalArgumentException("Unsupported mock catalog type: " + mockCatalogType); + } + } + + /** + * Validate the stream schema against Json schema draft 07. + */ + private static void checkSchema(final String streamName, final JsonNode streamSchema) throws JsonValidationException { + final Set validationMessages = SCHEMA_VALIDATOR.validate(JSON_SCHEMA_DRAFT_07, streamSchema); + if (!validationMessages.isEmpty()) { + throw new JsonValidationException(String.format( + "Stream \"%s\" has invalid schema.\n- Errors: %s\n- Schema: %s", + streamName, + Strings.join(validationMessages, "; "), + streamSchema.toString())); + } + } + + /** + * Patch the schema so that 1) it allows no additional properties, and 2) all fields are required. + * This is necessary because the mock Json object generation library may add extra properties, or + * omit non-required fields. TODO (liren): patch the library so we don't need to patch the schema + * here. + */ + private static void processSchema(final JsonNode schema) { + if (schema.has("type") && schema.get("type").asText().equals("object")) { + // disallow additional properties + ((ObjectNode) schema).put("additionalProperties", false); + if (!schema.has("properties")) { + return; + } + // mark every field as required + final ArrayNode requiredFields = MAPPER.createArrayNode(); + MoreIterators.toList(schema.get("properties").fieldNames()).forEach(requiredFields::add); + ((ObjectNode) schema).set("required", requiredFields); + + final Iterator iterator = schema.get("properties").elements(); + while (iterator.hasNext()) { + processSchema(iterator.next()); + } + } + } + + static long parseMaxMessages(final JsonNode config) { + return config.get("max_messages").asLong(); + } + + static Optional parseMessageIntervalMs(final JsonNode config) { + if (config.has("message_interval_ms")) { + final long messageIntervalMs = config.get("message_interval_ms").asLong(); + if (messageIntervalMs > 0) { + return Optional.of(messageIntervalMs); + } + } + return Optional.empty(); + } + + public long getSeed() { + return seed; + } + + public AirbyteCatalog getMockCatalog() { + return mockCatalog; + } + + public long getMaxMessages() { + return maxMessages; + } + + public Optional getMessageIntervalMs() { + return messageIntervalMs; + } + + @Override + public String toString() { + return String.format("%s{maxMessages=%d, seed=%d, messageIntervalMs=%s, mockCatalog=%s}", + ContinuousFeedConfig.class.getSimpleName(), + maxMessages, + seed, + messageIntervalMs.toString(), + mockCatalog.toString()); + } + + @Override + public boolean equals(final Object other) { + if (!(other instanceof final ContinuousFeedConfig that)) { + return false; + } + return this.maxMessages == that.maxMessages + && this.seed == that.seed + && this.messageIntervalMs.equals(that.messageIntervalMs) + && this.mockCatalog.equals(that.mockCatalog); + } + + @Override + public int hashCode() { + return Objects.hash(seed, maxMessages, messageIntervalMs, mockCatalog); + } + +} diff --git a/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConstants.java b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConstants.java new file mode 100644 index 0000000000000..e5dba7d81134f --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConstants.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.e2e_test; + +import net.jimblackler.jsongenerator.Configuration; + +public final class ContinuousFeedConstants { + + public static final int MOCK_JSON_MAX_TREE_SIZE = 100; + public static final Configuration MOCK_JSON_CONFIG = new Configuration() { + + @Override + public boolean isPedanticTypes() { + return true; + } + + @Override + public boolean isGenerateNulls() { + return false; + } + + @Override + public boolean isGenerateMinimal() { + return false; + } + + @Override + public float nonRequiredPropertyChance() { + return 1.0F; + } + + }; + + private ContinuousFeedConstants() {} + +} diff --git a/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedSource.java b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedSource.java new file mode 100644 index 0000000000000..5f5d7b7be50c7 --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedSource.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.e2e_test; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.AbstractIterator; +import com.google.common.collect.Iterators; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.commons.util.AutoCloseableIterators; +import io.airbyte.integrations.BaseConnector; +import io.airbyte.integrations.base.Source; +import io.airbyte.protocol.models.AirbyteCatalog; +import io.airbyte.protocol.models.AirbyteConnectionStatus; +import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteMessage.Type; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.time.Instant; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; +import java.util.Random; +import java.util.concurrent.atomic.AtomicLong; +import javax.annotation.CheckForNull; +import net.jimblackler.jsongenerator.Generator; +import net.jimblackler.jsongenerator.JsonGeneratorException; +import net.jimblackler.jsonschemafriend.Schema; +import net.jimblackler.jsonschemafriend.SchemaStore; + +public class ContinuousFeedSource extends BaseConnector implements Source { + + @Override + public AirbyteConnectionStatus check(final JsonNode jsonConfig) { + try { + final ContinuousFeedConfig sourceConfig = new ContinuousFeedConfig(jsonConfig); + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED).withMessage("Source config: " + sourceConfig); + } catch (final Exception e) { + return new AirbyteConnectionStatus().withStatus(Status.FAILED).withMessage(e.getMessage()); + } + } + + @Override + public AirbyteCatalog discover(final JsonNode jsonConfig) throws Exception { + final ContinuousFeedConfig sourceConfig = new ContinuousFeedConfig(jsonConfig); + return sourceConfig.getMockCatalog(); + } + + @Override + public AutoCloseableIterator read(final JsonNode jsonConfig, final ConfiguredAirbyteCatalog catalog, final JsonNode state) + throws Exception { + final ContinuousFeedConfig feedConfig = new ContinuousFeedConfig(jsonConfig); + final List> iterators = new LinkedList<>(); + + for (final ConfiguredAirbyteStream stream : catalog.getStreams()) { + final AtomicLong emittedMessages = new AtomicLong(0); + final Optional messageIntervalMs = feedConfig.getMessageIntervalMs(); + final ThreadLocal random = ThreadLocal.withInitial(() -> new Random(feedConfig.getSeed())); + + final SchemaStore schemaStore = new SchemaStore(true); + final Schema schema = schemaStore.loadSchemaJson(Jsons.serialize(stream.getStream().getJsonSchema())); + final Generator generator = new Generator(ContinuousFeedConstants.MOCK_JSON_CONFIG, schemaStore, random.get()); + + final Iterator streamIterator = new AbstractIterator<>() { + + @CheckForNull + @Override + protected AirbyteMessage computeNext() { + if (emittedMessages.get() >= feedConfig.getMaxMessages()) { + return endOfData(); + } + + if (messageIntervalMs.isPresent() && emittedMessages.get() != 0) { + try { + Thread.sleep(messageIntervalMs.get()); + } catch (final InterruptedException e) { + throw new RuntimeException(e); + } + } + + final JsonNode data; + try { + data = Jsons.jsonNode(generator.generate(schema, ContinuousFeedConstants.MOCK_JSON_MAX_TREE_SIZE)); + } catch (final JsonGeneratorException e) { + throw new RuntimeException(e); + } + emittedMessages.incrementAndGet(); + return new AirbyteMessage() + .withType(Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(stream.getStream().getName()) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(data)); + } + + }; + iterators.add(streamIterator); + } + + return AutoCloseableIterators.fromIterator(Iterators.concat(iterators.iterator())); + } + +} diff --git a/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/LegacyConstants.java b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/LegacyConstants.java new file mode 100644 index 0000000000000..54e6deb16c51a --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/LegacyConstants.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.e2e_test; + +import io.airbyte.protocol.models.AirbyteCatalog; +import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaPrimitive; + +public final class LegacyConstants { + + public static final String DEFAULT_STREAM = "data"; + public static final String DEFAULT_COLUMN = "column1"; + public static final AirbyteCatalog DEFAULT_CATALOG = CatalogHelpers.createAirbyteCatalog( + DEFAULT_STREAM, + Field.of(DEFAULT_COLUMN, JsonSchemaPrimitive.STRING)); + + private LegacyConstants() {} + +} diff --git a/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ExceptionAfterNSource.java b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/LegacyExceptionAfterNSource.java similarity index 76% rename from airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ExceptionAfterNSource.java rename to airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/LegacyExceptionAfterNSource.java index 1a4dd66e34b89..9f04e1a7bc5ac 100644 --- a/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/ExceptionAfterNSource.java +++ b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/LegacyExceptionAfterNSource.java @@ -19,10 +19,7 @@ import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.Field; -import io.airbyte.protocol.models.JsonSchemaPrimitive; import io.airbyte.protocol.models.SyncMode; import java.time.Instant; import java.util.List; @@ -35,15 +32,11 @@ * Throws an exception after it emits N record messages where N == throw_after_n_records. Ever 5th * message emitted is a state message. State messages do NOT count against N. */ -public class ExceptionAfterNSource extends BaseConnector implements Source { +public class LegacyExceptionAfterNSource extends BaseConnector implements Source { - private static final Logger LOGGER = LoggerFactory.getLogger(ExceptionAfterNSource.class); + private static final Logger LOGGER = LoggerFactory.getLogger(LegacyExceptionAfterNSource.class); - private static final String STREAM_NAME = "data"; - private static final String COLUMN_NAME = "column1"; - static final AirbyteCatalog CATALOG = CatalogHelpers.createAirbyteCatalog( - STREAM_NAME, - Field.of(COLUMN_NAME, JsonSchemaPrimitive.STRING)); + static final AirbyteCatalog CATALOG = Jsons.clone(LegacyConstants.DEFAULT_CATALOG); static { CATALOG.getStreams().get(0).setSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)); CATALOG.getStreams().get(0).setSourceDefinedCursor(true); @@ -65,9 +58,9 @@ public AutoCloseableIterator read(final JsonNode config, final C final AtomicLong recordsEmitted = new AtomicLong(); final AtomicLong recordValue; - if (state != null && state.has(COLUMN_NAME)) { + if (state != null && state.has(LegacyConstants.DEFAULT_COLUMN)) { LOGGER.info("Found state: {}", state); - recordValue = new AtomicLong(state.get(COLUMN_NAME).asLong()); + recordValue = new AtomicLong(state.get(LegacyConstants.DEFAULT_COLUMN).asLong()); } else { LOGGER.info("No state found."); recordValue = new AtomicLong(); @@ -80,26 +73,26 @@ public AutoCloseableIterator read(final JsonNode config, final C protected AirbyteMessage computeNext() { if (recordsEmitted.get() % 5 == 0 && !hasEmittedStateAtCount.get()) { - LOGGER.info("{}: emitting state record with value {}", ExceptionAfterNSource.class, recordValue.get()); + LOGGER.info("{}: emitting state record with value {}", LegacyExceptionAfterNSource.class, recordValue.get()); hasEmittedStateAtCount.set(true); return new AirbyteMessage() .withType(Type.STATE) - .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.of(COLUMN_NAME, recordValue.get())))); + .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.of(LegacyConstants.DEFAULT_COLUMN, recordValue.get())))); } else if (throwAfterNRecords > recordsEmitted.get()) { recordsEmitted.incrementAndGet(); recordValue.incrementAndGet(); hasEmittedStateAtCount.set(false); LOGGER.info("{} ExceptionAfterNSource: emitting record with value {}. record {} in sync.", - ExceptionAfterNSource.class, recordValue.get(), recordsEmitted.get()); + LegacyExceptionAfterNSource.class, recordValue.get(), recordsEmitted.get()); return new AirbyteMessage() .withType(Type.RECORD) .withRecord(new AirbyteRecordMessage() - .withStream(STREAM_NAME) + .withStream(LegacyConstants.DEFAULT_STREAM) .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.of(COLUMN_NAME, recordValue.get())))); + .withData(Jsons.jsonNode(ImmutableMap.of(LegacyConstants.DEFAULT_COLUMN, recordValue.get())))); } else { throw new IllegalStateException("Scheduled exceptional event."); } diff --git a/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/InfiniteFeedSource.java b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/LegacyInfiniteFeedSource.java similarity index 54% rename from airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/InfiniteFeedSource.java rename to airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/LegacyInfiniteFeedSource.java index 99fe7d5cf5525..812e4d2279d19 100644 --- a/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/InfiniteFeedSource.java +++ b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/LegacyInfiniteFeedSource.java @@ -20,24 +20,17 @@ import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteRecordMessage; -import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.Field; -import io.airbyte.protocol.models.JsonSchemaPrimitive; import java.time.Instant; import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Predicate; +import java.util.function.LongPredicate; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class InfiniteFeedSource extends BaseConnector implements Source { +public class LegacyInfiniteFeedSource extends BaseConnector implements Source { - private static final Logger LOGGER = LoggerFactory.getLogger(InfiniteFeedSource.class); - - public static final AirbyteCatalog CATALOG = CatalogHelpers.createAirbyteCatalog( - "data", - Field.of("column1", JsonSchemaPrimitive.STRING)); + private static final Logger LOGGER = LoggerFactory.getLogger(LegacyInfiniteFeedSource.class); @Override public AirbyteConnectionStatus check(final JsonNode config) { @@ -46,13 +39,14 @@ public AirbyteConnectionStatus check(final JsonNode config) { @Override public AirbyteCatalog discover(final JsonNode config) { - return Jsons.clone(CATALOG); + return Jsons.clone(LegacyConstants.DEFAULT_CATALOG); } @Override public AutoCloseableIterator read(final JsonNode config, final ConfiguredAirbyteCatalog catalog, final JsonNode state) { - final Predicate anotherRecordPredicate = - config.has("max_records") ? recordNumber -> recordNumber < config.get("max_records").asLong() : recordNumber -> true; + final LongPredicate anotherRecordPredicate = config.has("max_records") + ? recordNumber -> recordNumber < config.get("max_records").asLong() + : recordNumber -> true; final Optional sleepTime = Optional.ofNullable(config.get("message_interval")).map(JsonNode::asLong); @@ -62,28 +56,27 @@ public AutoCloseableIterator read(final JsonNode config, final C @Override protected AirbyteMessage computeNext() { - if (anotherRecordPredicate.test(i.get())) { - if (i.get() != 0) { - if (sleepTime.isPresent()) { - try { - LOGGER.info("sleeping for {} ms", sleepTime.get()); - sleep(sleepTime.get()); - } catch (final InterruptedException e) { - throw new RuntimeException(e); - } - } - } - i.incrementAndGet(); - LOGGER.info("source emitting record {}:", i.get()); - return new AirbyteMessage() - .withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage() - .withStream("data") - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.of("column1", i)))); - } else { + if (!anotherRecordPredicate.test(i.get())) { return endOfData(); } + + if (sleepTime.isPresent() && i.get() != 0) { + try { + LOGGER.info("sleeping for {} ms", sleepTime.get()); + sleep(sleepTime.get()); + } catch (final InterruptedException e) { + throw new RuntimeException(e); + } + } + + i.incrementAndGet(); + LOGGER.info("source emitting record {}:", i.get()); + return new AirbyteMessage() + .withType(Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream("data") + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(Jsons.jsonNode(ImmutableMap.of("column1", i)))); } }); diff --git a/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/TestingSources.java b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/TestingSources.java index c36ce38c77a48..197eee153c986 100644 --- a/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/TestingSources.java +++ b/airbyte-integrations/connectors/source-e2e-test/src/main/java/io/airbyte/integrations/source/e2e_test/TestingSources.java @@ -18,33 +18,33 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** - * This source is designed to be a switch statement for our suite of highly-specific test sourcess. - */ public class TestingSources extends BaseConnector implements Source { private static final Logger LOGGER = LoggerFactory.getLogger(TestingSources.class); - private final Map sourceMap; + private final Map sourceMap; - public enum TestDestinationType { - INFINITE_FEED, - EXCEPTION_AFTER_N + public enum TestingSourceType { + CONTINUOUS_FEED, + // the following are legacy types + EXCEPTION_AFTER_N, + INFINITE_FEED } public TestingSources() { - this(ImmutableMap.builder() - .put(TestDestinationType.INFINITE_FEED, new InfiniteFeedSource()) - .put(TestDestinationType.EXCEPTION_AFTER_N, new ExceptionAfterNSource()) + this(ImmutableMap.builder() + .put(TestingSourceType.CONTINUOUS_FEED, new ContinuousFeedSource()) + .put(TestingSourceType.EXCEPTION_AFTER_N, new LegacyExceptionAfterNSource()) + .put(TestingSourceType.INFINITE_FEED, new LegacyInfiniteFeedSource()) .build()); } - public TestingSources(final Map sourceMap) { + public TestingSources(final Map sourceMap) { this.sourceMap = sourceMap; } private Source selectSource(final JsonNode config) { - return sourceMap.get(TestDestinationType.valueOf(config.get("type").asText())); + return sourceMap.get(TestingSourceType.valueOf(config.get("type").asText())); } @Override @@ -67,9 +67,9 @@ public AutoCloseableIterator read(final JsonNode config, public static void main(final String[] args) throws Exception { final Source source = new TestingSources(); - LOGGER.info("starting source: {}", TestingSources.class); + LOGGER.info("Starting source: {}", TestingSources.class); new IntegrationRunner(source).run(args); - LOGGER.info("completed source: {}", TestingSources.class); + LOGGER.info("Completed source: {}", TestingSources.class); } } diff --git a/airbyte-integrations/connectors/source-e2e-test/src/main/resources/json_schema_draft_07.json b/airbyte-integrations/connectors/source-e2e-test/src/main/resources/json_schema_draft_07.json new file mode 100644 index 0000000000000..8875d422c2cf9 --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test/src/main/resources/json_schema_draft_07.json @@ -0,0 +1,166 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://json-schema.org/draft-07/schema#", + "title": "Core schema meta-schema", + "definitions": { + "schemaArray": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#" } + }, + "nonNegativeInteger": { + "type": "integer", + "minimum": 0 + }, + "nonNegativeIntegerDefault0": { + "allOf": [ + { "$ref": "#/definitions/nonNegativeInteger" }, + { "default": 0 } + ] + }, + "simpleTypes": { + "enum": [ + "array", + "boolean", + "integer", + "null", + "number", + "object", + "string" + ] + }, + "stringArray": { + "type": "array", + "items": { "type": "string" }, + "uniqueItems": true, + "default": [] + } + }, + "type": ["object", "boolean"], + "properties": { + "$id": { + "type": "string", + "format": "uri-reference" + }, + "$schema": { + "type": "string", + "format": "uri" + }, + "$ref": { + "type": "string", + "format": "uri-reference" + }, + "$comment": { + "type": "string" + }, + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "default": true, + "readOnly": { + "type": "boolean", + "default": false + }, + "writeOnly": { + "type": "boolean", + "default": false + }, + "examples": { + "type": "array", + "items": true + }, + "multipleOf": { + "type": "number", + "exclusiveMinimum": 0 + }, + "maximum": { + "type": "number" + }, + "exclusiveMaximum": { + "type": "number" + }, + "minimum": { + "type": "number" + }, + "exclusiveMinimum": { + "type": "number" + }, + "maxLength": { "$ref": "#/definitions/nonNegativeInteger" }, + "minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, + "pattern": { + "type": "string", + "format": "regex" + }, + "additionalItems": { "$ref": "#" }, + "items": { + "anyOf": [{ "$ref": "#" }, { "$ref": "#/definitions/schemaArray" }], + "default": true + }, + "maxItems": { "$ref": "#/definitions/nonNegativeInteger" }, + "minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, + "uniqueItems": { + "type": "boolean", + "default": false + }, + "contains": { "$ref": "#" }, + "maxProperties": { "$ref": "#/definitions/nonNegativeInteger" }, + "minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, + "required": { "$ref": "#/definitions/stringArray" }, + "additionalProperties": { "$ref": "#" }, + "definitions": { + "type": "object", + "additionalProperties": { "$ref": "#" }, + "default": {} + }, + "properties": { + "type": "object", + "additionalProperties": { "$ref": "#" }, + "default": {} + }, + "patternProperties": { + "type": "object", + "additionalProperties": { "$ref": "#" }, + "propertyNames": { "format": "regex" }, + "default": {} + }, + "dependencies": { + "type": "object", + "additionalProperties": { + "anyOf": [{ "$ref": "#" }, { "$ref": "#/definitions/stringArray" }] + } + }, + "propertyNames": { "$ref": "#" }, + "const": true, + "enum": { + "type": "array", + "items": true, + "minItems": 1, + "uniqueItems": true + }, + "type": { + "anyOf": [ + { "$ref": "#/definitions/simpleTypes" }, + { + "type": "array", + "items": { "$ref": "#/definitions/simpleTypes" }, + "minItems": 1, + "uniqueItems": true + } + ] + }, + "format": { "type": "string" }, + "contentMediaType": { "type": "string" }, + "contentEncoding": { "type": "string" }, + "if": { "$ref": "#" }, + "then": { "$ref": "#" }, + "else": { "$ref": "#" }, + "allOf": { "$ref": "#/definitions/schemaArray" }, + "anyOf": { "$ref": "#/definitions/schemaArray" }, + "oneOf": { "$ref": "#/definitions/schemaArray" }, + "not": { "$ref": "#" } + }, + "default": true +} diff --git a/airbyte-integrations/connectors/source-e2e-test/src/main/resources/spec.json b/airbyte-integrations/connectors/source-e2e-test/src/main/resources/spec.json index 860e07cae9d71..14aa79720d8ca 100644 --- a/airbyte-integrations/connectors/source-e2e-test/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-e2e-test/src/main/resources/spec.json @@ -1,13 +1,106 @@ { - "documentationUrl": "https://example.com", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/e2e-test", "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", "title": "E2E Test Source Spec", "type": "object", + "required": ["type"], "oneOf": [ { - "title": "Exception After N", + "title": "Continuous Feed", + "required": ["type", "max_messages", "mock_catalog"], + "description": "A mock mode that will emit random messages based on the input schema.", + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "const": "CONTINUOUS_FEED", + "default": "CONTINUOUS_FEED", + "order": 10 + }, + "max_messages": { + "title": "Max Records", + "description": "Number of records to emit per stream. Min 1. Max 100 billion.", + "type": "integer", + "default": 100, + "min": 1, + "max": 100000000000, + "order": 20 + }, + "seed": { + "title": "Random Seed", + "description": "When the seed is unspecified, the current time millis will be used as the seed. Range: [0, 1000000].", + "type": "integer", + "default": 0, + "examples": [42], + "min": 0, + "max": 1000000, + "order": 30 + }, + "message_interval_ms": { + "title": "Message Interval (ms)", + "description": "Interval between messages in ms. Min 0 ms. Max 60000 ms (1 minute).", + "type": "integer", + "min": 0, + "max": 60000, + "default": 0, + "order": 40 + }, + "mock_catalog": { + "title": "Mock Catalog", + "type": "object", + "order": 50, + "oneOf": [ + { + "title": "Single Stream", + "description": "A catalog with one stream.", + "required": ["type", "stream_name", "stream_schema"], + "properties": { + "type": { + "type": "string", + "const": "SINGLE_STREAM", + "default": "SINGLE_STREAM" + }, + "stream_name": { + "title": "Stream Name", + "description": "Name of the data stream.", + "type": "string", + "default": "data_stream" + }, + "stream_schema": { + "title": "Stream Schema", + "description": "A Json schema for the stream. The schema should be compatible with draft-07. See this doc for examples.", + "type": "string", + "default": "{ \"type\": \"object\", \"properties\": { \"column1\": { \"type\": \"string\" } } }" + } + } + }, + { + "title": "Multi-Stream", + "description": "A catalog with multiple data streams.", + "required": ["type", "stream_schemas"], + "properties": { + "type": { + "type": "string", + "const": "MULTI_STREAM", + "default": "MULTI_STREAM" + }, + "stream_schemas": { + "title": "Streams and Schemas", + "description": "A Json object specifying multiple data streams and their schemas. Each key in this object is one stream name. Each value is the schema for that stream. The schema should be compatible with draft-07. See this doc for examples.", + "type": "string", + "default": "{ \"stream1\": { \"type\": \"object\", \"properties\": { \"field1\": { \"type\": \"string\" } } }, \"stream2\": { \"type\": \"object\", \"properties\": { \"field1\": { \"type\": \"boolean\" } } } }" + } + } + } + ] + } + } + }, + { + "title": "Legacy Exception After N", "required": ["type", "throw_after_n_records"], + "description": "A legacy mode from v0.1.1 mainly for unit tests. The catalog has one \"data\" stream, which has one string field \"column1\". This mode will throw an exception after N messages.", "additionalProperties": false, "properties": { "type": { @@ -17,14 +110,16 @@ }, "throw_after_n_records": { "title": "Throw After N Records", - "description": "Number of records to emit before throwing an exception.", - "type": "integer" + "description": "Number of records to emit before throwing an exception. Min 1.", + "type": "integer", + "min": 1 } } }, { - "title": "Infinite Feed", + "title": "Legacy Infinite Feed", "required": ["type", "max_records"], + "description": "A legacy mode from v0.1.1 mainly for unit tests. The catalog has one \"data\" stream, which has one string field \"column1\". This mode will emit messages infinitely.", "additionalProperties": true, "properties": { "type": { diff --git a/airbyte-integrations/connectors/source-e2e-test/src/test-integration/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-e2e-test/src/test-integration/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedSourceAcceptanceTest.java new file mode 100644 index 0000000000000..929fa941488ac --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test/src/test-integration/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedSourceAcceptanceTest.java @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.e2e_test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.integrations.source.e2e_test.ContinuousFeedConfig.MockCatalogType; +import io.airbyte.integrations.source.e2e_test.TestingSources.TestingSourceType; +import io.airbyte.integrations.standardtest.source.SourceAcceptanceTest; +import io.airbyte.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConnectorSpecification; +import io.airbyte.validation.json.JsonSchemaValidator; +import io.airbyte.validation.json.JsonValidationException; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; + +public class ContinuousFeedSourceAcceptanceTest extends SourceAcceptanceTest { + + private static final int MAX_MESSAGES = ThreadLocalRandom.current().nextInt(10, 20); + private static final JsonSchemaValidator JSON_VALIDATOR = new JsonSchemaValidator(); + private static final String STREAM_1 = "stream1"; + private static final JsonNode SCHEMA_1 = Jsons.deserialize(""" + { + "type": "object", + "properties": { + "field1": { "type": "integer" } + } + } + """); + private static final String STREAM_2 = "stream2"; + private static final JsonNode SCHEMA_2 = Jsons.deserialize(""" + { + "type": "object", + "properties": { + "column1": { "type": "string" }, + "column2": { + "type": "object", + "properties": { + "field1": { "type": "array", "items": { "type": "boolean" } }, + "field2": { "type": "integer" } + } + } + } + } + """); + + private JsonNode config; + + @Override + protected String getImageName() { + return "airbyte/source-e2e-test:dev"; + } + + @Override + protected JsonNode getConfig() { + return this.config; + } + + @Override + protected void setupEnvironment(final TestDestinationEnv environment) { + + final JsonNode mockCatalog = Jsons.jsonNode(ImmutableMap.builder() + .put("type", MockCatalogType.MULTI_STREAM) + .put("stream_schemas", String.format("{ \"%s\": %s, \"%s\": %s }", + STREAM_1, + Jsons.serialize(SCHEMA_1), + STREAM_2, + Jsons.serialize(SCHEMA_2))) + .build()); + this.config = Jsons.jsonNode(ImmutableMap.builder() + .put("type", TestingSourceType.CONTINUOUS_FEED) + .put("seed", 1024) + .put("message_interval_ms", 0) + .put("max_messages", MAX_MESSAGES) + .put("mock_catalog", mockCatalog) + .build()); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + // do nothing + } + + @Override + protected ConnectorSpecification getSpec() throws IOException { + return Jsons.deserialize(MoreResources.readResource("spec.json"), ConnectorSpecification.class); + } + + @Override + protected ConfiguredAirbyteCatalog getConfiguredCatalog() throws JsonValidationException { + final ContinuousFeedConfig feedConfig = new ContinuousFeedConfig(this.config); + return CatalogHelpers.toDefaultConfiguredCatalog(feedConfig.getMockCatalog()); + } + + @Override + protected JsonNode getState() { + return Jsons.jsonNode(new HashMap<>()); + } + + @Override + protected List getRegexTests() { + return Collections.emptyList(); + } + + @Override + protected void assertRecordMessages(final List recordMessages) { + int index = 0; + // the first N messages are from stream 1 + while (index < MAX_MESSAGES) { + final AirbyteRecordMessage message = recordMessages.get(index); + assertEquals(STREAM_1, message.getStream()); + assertTrue(JSON_VALIDATOR.validate(SCHEMA_1, message.getData()).isEmpty()); + ++index; + } + // the second N messages are from stream 2 + while (index < MAX_MESSAGES * 2) { + final AirbyteRecordMessage message = recordMessages.get(index); + assertEquals(STREAM_2, message.getStream()); + assertTrue(JSON_VALIDATOR.validate(SCHEMA_2, message.getData()).isEmpty()); + ++index; + } + } + +} diff --git a/airbyte-integrations/connectors/source-e2e-test/src/test-integration/java/io/airbyte/integrations/source/e2e_test/LegacyInfiniteFeedSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-e2e-test/src/test-integration/java/io/airbyte/integrations/source/e2e_test/LegacyInfiniteFeedSourceAcceptanceTest.java new file mode 100644 index 0000000000000..5504d86c78520 --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test/src/test-integration/java/io/airbyte/integrations/source/e2e_test/LegacyInfiniteFeedSourceAcceptanceTest.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.e2e_test; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.integrations.standardtest.source.SourceAcceptanceTest; +import io.airbyte.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +public class LegacyInfiniteFeedSourceAcceptanceTest extends SourceAcceptanceTest { + + private JsonNode config; + + @Override + protected String getImageName() { + return "airbyte/source-e2e-test:dev"; + } + + @Override + protected JsonNode getConfig() { + return this.config; + } + + @Override + protected void setupEnvironment(final TestDestinationEnv environment) { + this.config = Jsons.jsonNode(ImmutableMap.builder() + .put("type", TestingSources.TestingSourceType.INFINITE_FEED) + .put("max_records", 10) + .build()); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + // do nothing + } + + @Override + protected ConnectorSpecification getSpec() throws IOException { + return Jsons.deserialize(MoreResources.readResource("spec.json"), ConnectorSpecification.class); + } + + @Override + protected ConfiguredAirbyteCatalog getConfiguredCatalog() { + return CatalogHelpers.toDefaultConfiguredCatalog(LegacyConstants.DEFAULT_CATALOG); + } + + @Override + protected JsonNode getState() { + return Jsons.jsonNode(new HashMap<>()); + } + + @Override + protected List getRegexTests() { + return Collections.emptyList(); + } + +} diff --git a/airbyte-integrations/connectors/source-e2e-test/src/test/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConfigTest.java b/airbyte-integrations/connectors/source-e2e-test/src/test/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConfigTest.java new file mode 100644 index 0000000000000..06ffc24fa1a8c --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test/src/test/java/io/airbyte/integrations/source/e2e_test/ContinuousFeedConfigTest.java @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.e2e_test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.airbyte.commons.jackson.MoreMappers; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.commons.util.MoreIterators; +import io.airbyte.protocol.models.AirbyteCatalog; +import io.airbyte.validation.json.JsonValidationException; +import java.util.Optional; +import java.util.Random; +import java.util.stream.Stream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.provider.ArgumentsSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class ContinuousFeedConfigTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(ContinuousFeedConfigTest.class); + + private static final ObjectMapper MAPPER = MoreMappers.initMapper(); + private static final Random RANDOM = new Random(); + + @Test + public void testParseSeed() { + final long seed = RANDOM.nextLong(); + assertEquals(seed, ContinuousFeedConfig.parseSeed(Jsons.deserialize(String.format("{ \"seed\": %d }", seed)))); + } + + @Test + public void testParseMaxMessages() { + final long maxMessages = RANDOM.nextLong(); + assertEquals(maxMessages, ContinuousFeedConfig.parseMaxMessages(Jsons.deserialize(String.format("{ \"max_messages\": %d }", maxMessages)))); + } + + @Test + public void testParseMessageIntervalMs() { + assertEquals(Optional.empty(), ContinuousFeedConfig.parseMessageIntervalMs(Jsons.deserialize("{}"))); + assertEquals(Optional.empty(), ContinuousFeedConfig.parseMessageIntervalMs(Jsons.deserialize("{ \"message_interval_ms\": -1 }"))); + assertEquals(Optional.empty(), ContinuousFeedConfig.parseMessageIntervalMs(Jsons.deserialize("{ \"message_interval_ms\": 0 }"))); + assertEquals(Optional.of(999L), ContinuousFeedConfig.parseMessageIntervalMs(Jsons.deserialize("{ \"message_interval_ms\": 999 }"))); + } + + public static class ContinuousFeedConfigTestCaseProvider implements ArgumentsProvider { + + @Override + public Stream provideArguments(final ExtensionContext context) throws Exception { + final JsonNode testCases = + Jsons.deserialize(MoreResources.readResource("parse_mock_catalog_test_cases.json")); + return MoreIterators.toList(testCases.elements()).stream().map(testCase -> { + final JsonNode sourceConfig = MAPPER.createObjectNode().set("mock_catalog", testCase.get("mockCatalog")); + final boolean invalidSchema = testCase.has("invalidSchema") && testCase.get("invalidSchema").asBoolean(); + final AirbyteCatalog expectedCatalog = invalidSchema ? null : Jsons.object(testCase.get("expectedCatalog"), AirbyteCatalog.class); + return Arguments.of( + testCase.get("testCase").asText(), + sourceConfig, + invalidSchema, + expectedCatalog); + }); + } + + } + + @ParameterizedTest + @ArgumentsSource(ContinuousFeedConfigTestCaseProvider.class) + public void testParseMockCatalog(final String testCaseName, + final JsonNode mockConfig, + final boolean invalidSchema, + final AirbyteCatalog expectedCatalog) + throws Exception { + if (invalidSchema) { + assertThrows(JsonValidationException.class, () -> ContinuousFeedConfig.parseMockCatalog(mockConfig)); + } else { + final AirbyteCatalog actualCatalog = ContinuousFeedConfig.parseMockCatalog(mockConfig); + assertEquals(expectedCatalog.getStreams(), actualCatalog.getStreams()); + } + } + +} diff --git a/airbyte-integrations/connectors/source-e2e-test/src/test/java/io/airbyte/integrations/source/e2e_test/ExceptionAfterNSourceTest.java b/airbyte-integrations/connectors/source-e2e-test/src/test/java/io/airbyte/integrations/source/e2e_test/LegacyExceptionAfterNSourceTest.java similarity index 92% rename from airbyte-integrations/connectors/source-e2e-test/src/test/java/io/airbyte/integrations/source/e2e_test/ExceptionAfterNSourceTest.java rename to airbyte-integrations/connectors/source-e2e-test/src/test/java/io/airbyte/integrations/source/e2e_test/LegacyExceptionAfterNSourceTest.java index c4c7222dee554..900dfd0a15e74 100644 --- a/airbyte-integrations/connectors/source-e2e-test/src/test/java/io/airbyte/integrations/source/e2e_test/ExceptionAfterNSourceTest.java +++ b/airbyte-integrations/connectors/source-e2e-test/src/test/java/io/airbyte/integrations/source/e2e_test/LegacyExceptionAfterNSourceTest.java @@ -21,16 +21,16 @@ import java.time.Instant; import org.junit.jupiter.api.Test; -class ExceptionAfterNSourceTest { +class LegacyExceptionAfterNSourceTest { @SuppressWarnings("Convert2MethodRef") @Test void test() { - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(ExceptionAfterNSource.CATALOG); + final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(LegacyExceptionAfterNSource.CATALOG); configuredCatalog.getStreams().get(0).setSyncMode(SyncMode.INCREMENTAL); final JsonNode config = Jsons.jsonNode(ImmutableMap.of("throw_after_n_records", 10)); - final AutoCloseableIterator read = new ExceptionAfterNSource().read(config, configuredCatalog, null); + final AutoCloseableIterator read = new LegacyExceptionAfterNSource().read(config, configuredCatalog, null); assertEquals(getStateMessage(0L).getState().getData(), read.next().getState().getData()); assertEquals(getRecordMessage(1L).getRecord().getData(), read.next().getRecord().getData()); assertEquals(getRecordMessage(2L).getRecord().getData(), read.next().getRecord().getData()); diff --git a/airbyte-integrations/connectors/source-e2e-test/src/test/resources/parse_mock_catalog_test_cases.json b/airbyte-integrations/connectors/source-e2e-test/src/test/resources/parse_mock_catalog_test_cases.json new file mode 100644 index 0000000000000..cdb86a1688b88 --- /dev/null +++ b/airbyte-integrations/connectors/source-e2e-test/src/test/resources/parse_mock_catalog_test_cases.json @@ -0,0 +1,104 @@ +[ + { + "testCase": "single stream", + "mockCatalog": { + "type": "SINGLE_STREAM", + "stream_name": "my_stream", + "stream_schema": "{ \"type\": \"object\", \"properties\": { \"field1\": { \"type\": \"string\" }, \"field2\": { \"type\": \"number\" } } }" + }, + "expectedCatalog": { + "streams": [ + { + "name": "my_stream", + "json_schema": { + "type": "object", + "additionalProperties": false, + "required": ["field1", "field2"], + "properties": { + "field1": { + "type": "string" + }, + "field2": { + "type": "number" + } + } + } + } + ] + } + }, + { + "testCase": "single stream with malformed schema", + "mockCatalog": { + "type": "SINGLE_STREAM", + "stream_name": "my_stream", + "stream_schema": "[123, 456]" + }, + "invalidSchema": true + }, + { + "testCase": "single stream with invalid schema", + "mockCatalog": { + "type": "SINGLE_STREAM", + "stream_name": "my_stream", + "stream_schema": "{ \"type\": \"object\", \"properties\": { \"field1\": { \"type\": \"invalid_type\" }, \"field2\": { \"type\": \"number\" } } }" + }, + "invalidSchema": true + }, + { + "testCase": "multi stream", + "mockCatalog": { + "type": "MULTI_STREAM", + "stream_schemas": "{ \"stream1\": { \"type\": \"object\", \"properties\": { \"field1\": { \"type\": \"string\" }, \"field2\": { \"type\": \"number\" } } }, \"stream2\": { \"type\": \"object\", \"properties\": { \"column1\": { \"type\": \"string\" } } } }" + }, + "expectedCatalog": { + "streams": [ + { + "name": "stream1", + "json_schema": { + "type": "object", + "properties": { + "field1": { + "type": "string" + }, + "field2": { + "type": "number" + } + }, + "additionalProperties": false, + "required": ["field1", "field2"] + } + }, + { + "name": "stream2", + "json_schema": { + "type": "object", + "properties": { + "column1": { + "type": "string" + } + }, + "additionalProperties": false, + "required": ["column1"] + } + } + ] + } + }, + { + "testCase": "multi stream with malformed schema", + "mockCatalog": { + "type": "MULTI_STREAM", + "stream_schemas": "{ \"type\": \"object\", \"properties\": { \"field1\": { \"type\": \"string\" }, \"field2\": { \"type\": \"number\" } } }" + }, + "invalidSchema": true + }, + { + "testCase": "multi stream with invalid schema", + "mockCatalog": { + "type": "MULTI_STREAM", + "stream_schemas": "{ \"stream1\": { \"type\": \"object\", \"properties\": { \"field1\": { \"type\": \"string\" }, \"field2\": [\"invalid field spec\"] } }, \"stream2\": { \"type\": \"object\", \"properties\": { \"column1\": { \"type\": \"string\" } } } }" + }, + "invalidSchema": true + } +] diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 1670c75bfcbc5..9ed23036a648f 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -61,6 +61,7 @@ * [Dixa](integrations/sources/dixa.md) * [Drift](integrations/sources/drift.md) * [Drupal](integrations/sources/drupal.md) + * [End-to-End Testing](integrations/sources/e2e-test.md) * [Exchange Rates API](integrations/sources/exchangeratesapi.md) * [Facebook Marketing](integrations/sources/facebook-marketing.md) * [Facebook Pages](integrations/sources/facebook-pages.md) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index 37ba4b21c894c..1150285eb9bab 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -38,6 +38,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [Dixa](sources/dixa.md) | Alpha | | [Drift](sources/drift.md) | Beta | | [Drupal](sources/drupal.md) | Beta | +| [End-to-End Testing](sources/e2e-test.md) | Alpha | | [Exchange Rates API](sources/exchangeratesapi.md) | Certified | | [Facebook Marketing](sources/facebook-marketing.md) | Beta | | [Facebook Pages](sources/facebook-pages.md) | Alpha | @@ -148,6 +149,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [ClickHouse](destinations/clickhouse.md) | Alpha | | [Databricks](destinations/databricks.md) | Beta | | [Elasticsearch](destinations/elasticsearch.md) | Alpha | +| [End-to-End Testing](destinations/e2e-test.md) | Beta | | [Google Cloud Storage \(GCS\)](destinations/gcs.md) | Alpha | | [Google Firestore](destinations/firestore.md) | Alpha | | [Google Pubsub](destinations/pubsub.md) | Alpha | diff --git a/docs/integrations/destinations/e2e-test.md b/docs/integrations/destinations/e2e-test.md index 1317f802fd16a..133e3a310c0b3 100644 --- a/docs/integrations/destinations/e2e-test.md +++ b/docs/integrations/destinations/e2e-test.md @@ -42,7 +42,7 @@ This mode throws an exception after receiving a configurable number of messages. ## CHANGELOG -### E2E Testing Destination +### OSS (E2E Testing Destination) | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :--- | @@ -50,7 +50,7 @@ This mode throws an exception after receiving a configurable number of messages. | 0.2.0 | 2021-12-16 | [\#8824](https://github.com/airbytehq/airbyte/pull/8824) | Add multiple logging modes. | | 0.1.0 | 2021-05-25 | [\#3290](https://github.com/airbytehq/airbyte/pull/3290) | Create initial version. | -### E2E Testing (`/dev/null`) Destination +### Cloud (E2E Testing (`/dev/null`) Destination) | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :--- | diff --git a/docs/integrations/sources/e2e-test.md b/docs/integrations/sources/e2e-test.md new file mode 100644 index 0000000000000..64a27c934f047 --- /dev/null +++ b/docs/integrations/sources/e2e-test.md @@ -0,0 +1,40 @@ +# End-to-End Testing Source + +## Overview + +This is a mock source for testing the Airbyte pipeline. It can generate arbitrary data streams. + +## Mode + +### Continuous + +**This is the only mode available on Airbyte Cloud.** + +This mode allows users to specify a single-stream or multi-stream catalog with arbitrary schema. The schema should be compliant with Json schema [draft-07](https://json-schema.org/draft-07/json-schema-release-notes.html). + +The single-stream catalog config exists just for convenient, since in many testing cases, one stream is enough. If only one stream is specified in the multi-stream catalog config, it is equivalent to a single-stream catalog config. + +| Mock Catalog Type | Parameters | Type | Required | Default | Notes | +| --- | --- | --- | --- | --- | --- | +| Single-stream | stream name | string | yes | | Name of the stream in the catalog. | +| | stream schema | json | yes | | Json schema of the stream in the catalog. It must be a valid Json schema. | +| Multi-stream | streams and schemas | json | yes | | A Json object specifying multiple data streams and their schemas. Each key in this object is one stream name. Each value is the schema for that stream. | +| Both | max records | integer | yes | 100 | The number of record messages to emit from this connector. Min 1. Max 100 billion. | +| | random seed | integer | no | current time millis | The seed is used in random Json object generation. Min 0. Max 1 million. | +| | message interval | integer | no | 0 | The time interval between messages in millisecond. Min 0 ms. Max 60000 ms (1 minute). | + +## Changelog + +### OSS + +| Version | Date | Pull request | Notes | +| --- | --- | --- | --- | +| 1.0.0 | 2021-01-23 | [\#9720](https://github.com/airbytehq/airbyte/pull/9720) | Add new continuous feed mode that supports arbitrary catalog specification. | +| 0.1.1 | 2021-12-16 | [\#8217](https://github.com/airbytehq/airbyte/pull/8217) | Fix sleep time in infinite feed mode. | +| 0.1.0 | 2021-07-23 | [\#3290](https://github.com/airbytehq/airbyte/pull/3290) [\#4939](https://github.com/airbytehq/airbyte/pull/4939) | Initial release. | + +### Cloud + +| Version | Date | Pull request | Notes | +| --- | --- | --- | --- | +| 1.0.0 | 2021-01-23 | [\#9720](https://github.com/airbytehq/airbyte/pull/9720) | Add new continuous feed mode that supports arbitrary catalog specification. Initial release to cloud. | diff --git a/settings.gradle b/settings.gradle index 779ee999742f8..de9198e3ace06 100644 --- a/settings.gradle +++ b/settings.gradle @@ -15,6 +15,9 @@ sourceControl { gitRepository("https://github.com/airbytehq/json-avro-converter.git") { producesModule("tech.allegro.schema.json2avro:converter") } + gitRepository("https://github.com/airbytehq/jsongenerator.git") { + producesModule("net.jimblackler:jsongenerator") + } } rootProject.name = 'airbyte' From 0ed20bca613970d8677c3b80bc23de80582c1ca0 Mon Sep 17 00:00:00 2001 From: Serhii Lazebnyi <53845333+lazebnyi@users.noreply.github.com> Date: Mon, 24 Jan 2022 10:54:24 +0200 Subject: [PATCH 198/215] =?UTF-8?q?=F0=9F=90=9B=20Source=20Salesforce:=20F?= =?UTF-8?q?ix=20error=20during=20generating=20schema=20(#9478)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add filtering by queryable flag for available streams * Updated PR number and version * add logs for failed bulk jobs * Added logs for collect stream objects options if code 404 * Fixed unittests * Fixed integration tests * Bumped docker version * Update version in seed * Update streams.py * Added logger for print not queriable streams Co-authored-by: antixar --- .../b117307c-14b6-41aa-9422-947e34922962.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-salesforce/Dockerfile | 2 +- .../source_salesforce/api.py | 22 ++++++++---- .../source_salesforce/source.py | 10 +++--- .../source_salesforce/streams.py | 7 ++++ .../source-salesforce/unit_tests/unit_test.py | 35 ++++++++++++++----- docs/integrations/sources/salesforce.md | 1 + 9 files changed, 59 insertions(+), 24 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json index e0eb9d47f2ea3..ca6cea9814a58 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "b117307c-14b6-41aa-9422-947e34922962", "name": "Salesforce", "dockerRepository": "airbyte/source-salesforce", - "dockerImageTag": "0.1.13", + "dockerImageTag": "0.1.18", "documentationUrl": "https://docs.airbyte.io/integrations/sources/salesforce", "icon": "salesforce.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 68533b3b153d2..ba3387357d978 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -634,7 +634,7 @@ - name: Salesforce sourceDefinitionId: b117307c-14b6-41aa-9422-947e34922962 dockerRepository: airbyte/source-salesforce - dockerImageTag: 0.1.17 + dockerImageTag: 0.1.18 documentationUrl: https://docs.airbyte.io/integrations/sources/salesforce icon: salesforce.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index e01dc8a2655b3..9e81a13f9b627 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6700,7 +6700,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-salesforce:0.1.17" +- dockerImage: "airbyte/source-salesforce:0.1.18" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-salesforce/Dockerfile b/airbyte-integrations/connectors/source-salesforce/Dockerfile index 37713bed463b8..5454cb92c1b44 100644 --- a/airbyte-integrations/connectors/source-salesforce/Dockerfile +++ b/airbyte-integrations/connectors/source-salesforce/Dockerfile @@ -25,5 +25,5 @@ COPY source_salesforce ./source_salesforce ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.17 +LABEL io.airbyte.version=0.1.18 LABEL io.airbyte.name=airbyte/source-salesforce diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/api.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/api.py index 6487b4dcf507e..ad5300c29d00e 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/api.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/api.py @@ -208,9 +208,16 @@ def filter_streams(self, stream_name: str) -> bool: def get_validated_streams(self, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog = None): salesforce_objects = self.describe()["sobjects"] - stream_names = [stream_object["name"] for stream_object in salesforce_objects] + stream_objects = [] + for stream_object in salesforce_objects: + if stream_object["queryable"]: + stream_objects.append(stream_object) + else: + self.logger.warn(f"Stream {stream_object['name']} is not queryable and will be ignored.") + + stream_names = [stream_object["name"] for stream_object in stream_objects] if catalog: - return [configured_stream.stream.name for configured_stream in catalog.streams] + return [configured_stream.stream.name for configured_stream in catalog.streams], stream_objects if config.get("streams_criteria"): filtered_stream_list = [] @@ -221,7 +228,8 @@ def get_validated_streams(self, config: Mapping[str, Any], catalog: ConfiguredAi stream_names = list(set(filtered_stream_list)) validated_streams = [stream_name for stream_name in stream_names if self.filter_streams(stream_name)] - return validated_streams + validated_stream_objects = [stream_object for stream_object in stream_objects if stream_object["name"] in validated_streams] + return validated_streams, validated_stream_objects @default_backoff_handler(max_tries=5, factor=15) def _make_request( @@ -253,7 +261,7 @@ def login(self): self.access_token = auth["access_token"] self.instance_url = auth["instance_url"] - def describe(self, sobject: str = None) -> Mapping[str, Any]: + def describe(self, sobject: str = None, stream_objects: List = None) -> Mapping[str, Any]: """Describes all objects or a specific object""" headers = self._get_standard_headers() @@ -261,10 +269,12 @@ def describe(self, sobject: str = None) -> Mapping[str, Any]: url = f"{self.instance_url}/services/data/{self.version}/{endpoint}" resp = self._make_request("GET", url, headers=headers) + if resp.status_code == 404: + self.logger.error(f"Filtered stream objects: {stream_objects}") return resp.json() - def generate_schema(self, stream_name: str = None) -> Mapping[str, Any]: - response = self.describe(stream_name) + def generate_schema(self, stream_name: str = None, stream_objects: List = None) -> Mapping[str, Any]: + response = self.describe(stream_name, stream_objects) schema = {"$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "additionalProperties": True, "properties": {}} for field in response["fields"]: schema["properties"][field["name"]] = self.field_to_property_schema(field) diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/source.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/source.py index c28e74045f811..1784d27407e32 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/source.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/source.py @@ -29,7 +29,7 @@ def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> @classmethod def generate_streams( - cls, config: Mapping[str, Any], stream_names: List[str], sf_object: Salesforce, state: Mapping[str, Any] = None + cls, config: Mapping[str, Any], stream_names: List[str], sf_object: Salesforce, state: Mapping[str, Any] = None, stream_objects: List = None ) -> List[Stream]: """ "Generates a list of stream by their names. It can be used for different tests too""" authenticator = TokenAuthenticator(sf_object.access_token) @@ -38,7 +38,7 @@ def generate_streams( streams_kwargs = {} stream_state = state.get(stream_name, {}) if state else {} - selected_properties = sf_object.generate_schema(stream_name).get("properties", {}) + selected_properties = sf_object.generate_schema(stream_name, stream_objects).get("properties", {}) # Salesforce BULK API currently does not support loading fields with data type base64 and compound data properties_not_supported_by_bulk = { key: value for key, value in selected_properties.items() if value.get("format") == "base64" or "object" in value["type"] @@ -52,7 +52,7 @@ def generate_streams( full_refresh, incremental = BulkSalesforceStream, BulkIncrementalSalesforceStream streams_kwargs["wait_timeout"] = config.get("wait_timeout") - json_schema = sf_object.generate_schema(stream_name) + json_schema = sf_object.generate_schema(stream_name, stream_objects) pk, replication_key = sf_object.get_pk_and_replication_key(json_schema) streams_kwargs.update(dict(sf_api=sf_object, pk=pk, stream_name=stream_name, schema=json_schema, authenticator=authenticator)) if replication_key and stream_name not in UNSUPPORTED_FILTERING_STREAMS: @@ -64,8 +64,8 @@ def generate_streams( def streams(self, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog = None, state: Mapping[str, Any] = None) -> List[Stream]: sf = self._get_sf_object(config) - stream_names = sf.get_validated_streams(config=config, catalog=catalog) - return self.generate_streams(config, stream_names, sf, state=state) + stream_names, stream_objects = sf.get_validated_streams(config=config, catalog=catalog) + return self.generate_streams(config, stream_names, sf, state=state, stream_objects=stream_objects) def read( self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py index aa6d3ca9fd6d1..adc0d852095d2 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py @@ -133,6 +133,8 @@ def transform_empty_string_to_none(instance, schema): def _send_http_request(self, method: str, url: str, json: dict = None): headers = self.authenticator.get_auth_header() response = self._session.request(method, url=url, headers=headers, json=json) + if response.status_code not in [200, 204]: + self.logger.error(f"error body: {response.text}") response.raise_for_status() return response @@ -141,6 +143,7 @@ def create_stream_job(self, query: str, url: str) -> Optional[str]: docs: https://developer.salesforce.com/docs/atlas.en-us.api_asynch.meta/api_asynch/create_job.htm """ json = {"operation": "queryAll", "query": query, "contentType": "CSV", "columnDelimiter": "COMMA", "lineEnding": "LF"} + try: response = self._send_http_request("POST", url, json=json) job_id = response.json()["id"] @@ -191,6 +194,10 @@ def wait_for_job(self, url: str) -> str: job_info = self._send_http_request("GET", url=url).json() job_status = job_info["state"] if job_status in ["JobComplete", "Aborted", "Failed"]: + if job_status != "JobComplete": + # this is only job metadata without payload + self.logger.error(f"JobStatus: {job_status}, full job response: {job_info}") + return job_status if delay_timeout < self.MAX_CHECK_INTERVAL_SECONDS: diff --git a/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py index 19db4c2ddcb2e..b014006d12f75 100644 --- a/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-salesforce/unit_tests/unit_test.py @@ -302,21 +302,38 @@ def test_discover_with_streams_criteria_param(streams_criteria, predicted_filter sf_object.describe = Mock( return_value={ "sobjects": [ - {"name": "Account"}, - {"name": "AIApplications"}, - {"name": "Leads"}, - {"name": "LeadHistory"}, - {"name": "Orders"}, - {"name": "OrderHistory"}, - {"name": "CustomStream"}, - {"name": "CustomStreamHistory"}, + {"name": "Account", "queryable": True}, + {"name": "AIApplications", "queryable": True}, + {"name": "Leads", "queryable": True}, + {"name": "LeadHistory", "queryable": True}, + {"name": "Orders", "queryable": True}, + {"name": "OrderHistory", "queryable": True}, + {"name": "CustomStream", "queryable": True}, + {"name": "CustomStreamHistory", "queryable": True}, ] } ) - filtered_streams = sf_object.get_validated_streams(config=updated_config) + filtered_streams, _ = sf_object.get_validated_streams(config=updated_config) assert sorted(filtered_streams) == sorted(predicted_filtered_streams) +def test_discover_only_queryable(stream_config): + sf_object = Salesforce(**stream_config) + sf_object.login = Mock() + sf_object.access_token = Mock() + sf_object.instance_url = "https://fase-account.salesforce.com" + sf_object.describe = Mock( + return_value={ + "sobjects": [ + {"name": "Account", "queryable": True}, + {"name": "Leads", "queryable": False}, + ] + } + ) + filtered_streams, _ = sf_object.get_validated_streams(config=stream_config) + assert filtered_streams == ["Account"] + + def test_pagination_rest(stream_config, stream_api): stream_name = "ActiveFeatureLicenseMetric" state = {stream_name: {"SystemModstamp": "2122-08-22T05:08:29.000Z"}} diff --git a/docs/integrations/sources/salesforce.md b/docs/integrations/sources/salesforce.md index f52bed6b32de2..85f68ec3f05fe 100644 --- a/docs/integrations/sources/salesforce.md +++ b/docs/integrations/sources/salesforce.md @@ -737,6 +737,7 @@ List of available streams: | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:--------------------------------------------------------------------------| +| 0.1.18 | 2022-01-20 | [9478](https://github.com/airbytehq/airbyte/pull/9478) | Add available stream filtering by `queryable` flag | | 0.1.17 | 2022-01-19 | [9302](https://github.com/airbytehq/airbyte/pull/9302) | Deprecate API Type parameter | | 0.1.16 | 2022-01-18 | [9151](https://github.com/airbytehq/airbyte/pull/9151) | Fix pagination in REST API streams | | 0.1.15 | 2022-01-11 | [9409](https://github.com/airbytehq/airbyte/pull/9409) | Correcting the presence of an extra `else` handler in the error handling | From aaef33e029deacb637a00bb7cf9312f501c755b2 Mon Sep 17 00:00:00 2001 From: augan-rymkhan <93112548+augan-rymkhan@users.noreply.github.com> Date: Mon, 24 Jan 2022 15:10:04 +0600 Subject: [PATCH 199/215] Source google ads: fix for page token expired (#9608) * make slice date range 10 days * fix tests to respect date range * chaned range_days to 15 * format code * removed time_unit field * format code 2 * added comments * bump the version * updated spec and def yaml Co-authored-by: auganbay --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-google-ads/Dockerfile | 2 +- .../source_google_ads/streams.py | 32 ++++++++++++------- .../unit_tests/test_google_ads.py | 13 ++++++-- .../unit_tests/test_source.py | 13 ++++++-- docs/integrations/sources/google-ads.md | 1 + 7 files changed, 46 insertions(+), 19 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index ba3387357d978..429326d3775f4 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -245,7 +245,7 @@ - name: Google Ads sourceDefinitionId: 253487c0-2246-43ba-a21f-5116b20a2c50 dockerRepository: airbyte/source-google-ads - dockerImageTag: 0.1.21 + dockerImageTag: 0.1.22 documentationUrl: https://docs.airbyte.io/integrations/sources/google-ads icon: google-adwords.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 9e81a13f9b627..6497c5b22b701 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2308,7 +2308,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-google-ads:0.1.21" +- dockerImage: "airbyte/source-google-ads:0.1.22" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/google-ads" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-google-ads/Dockerfile b/airbyte-integrations/connectors/source-google-ads/Dockerfile index 68d3e2e289084..f0c0c1f72a3eb 100644 --- a/airbyte-integrations/connectors/source-google-ads/Dockerfile +++ b/airbyte-integrations/connectors/source-google-ads/Dockerfile @@ -13,5 +13,5 @@ RUN pip install . ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.21 +LABEL io.airbyte.version=0.1.22 LABEL io.airbyte.name=airbyte/source-google-ads diff --git a/airbyte-integrations/connectors/source-google-ads/source_google_ads/streams.py b/airbyte-integrations/connectors/source-google-ads/source_google_ads/streams.py index 18809945966d4..fec37bbebd4e0 100644 --- a/airbyte-integrations/connectors/source-google-ads/source_google_ads/streams.py +++ b/airbyte-integrations/connectors/source-google-ads/source_google_ads/streams.py @@ -13,11 +13,16 @@ def chunk_date_range( - start_date: str, conversion_window: int, field: str, end_date: str = None, time_unit: str = "months", days_of_data_storage: int = None + start_date: str, + conversion_window: int, + field: str, + end_date: str = None, + days_of_data_storage: int = None, + range_days: int = None, ) -> Iterable[Mapping[str, any]]: """ Passing optional parameter end_date for testing - Returns a list of the beginning and ending timetsamps of each month between the start date and now. + Returns a list of the beginning and ending timestamps of each `range_days` between the start date and now. The return value is a list of dicts {'date': str} which can be used directly with the Slack API """ intervals = [] @@ -38,7 +43,7 @@ def chunk_date_range( # Each stream_slice contains the beginning and ending timestamp for a 24 hour period while start_date < end_date: intervals.append({field: start_date.to_date_string()}) - start_date = start_date.add(**{time_unit: 1}) + start_date = start_date.add(days=range_days) return intervals @@ -64,7 +69,7 @@ class IncrementalGoogleAdsStream(GoogleAdsStream, ABC): days_of_data_storage = None cursor_field = "segments.date" primary_key = None - time_unit = "months" + range_days = 15 # date range is set to 15 days, because for conversion_window_days default value is 14. Range less than 15 days will break the integration tests. def __init__(self, start_date: str, conversion_window_days: int, time_zone: [pendulum.timezone, str], **kwargs): self.conversion_window_days = conversion_window_days @@ -80,19 +85,24 @@ def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Ite start_date=start_date, conversion_window=self.conversion_window_days, field=self.cursor_field, - time_unit=self.time_unit, days_of_data_storage=self.days_of_data_storage, + range_days=self.range_days, ) - def get_date_params( - self, stream_slice: Mapping[str, Any], cursor_field: str, end_date: pendulum.datetime = None, time_unit: str = "months" - ): + def get_date_params(self, stream_slice: Mapping[str, Any], cursor_field: str, end_date: pendulum.datetime = None): + """ + Returns `start_date` and `end_date` for the given stream_slice. + If (end_date - start_date) is a big date range (>= 1 month), it can take more than 2 hours to process all the records from the given slice. + After 2 hours next page tokens will be expired, finally resulting in page token expired error + Currently this method returns `start_date` and `end_date` with 15 days difference. + """ + end_date = end_date or pendulum.yesterday(tz=self.time_zone) start_date = pendulum.parse(stream_slice.get(cursor_field)) if start_date > pendulum.now(): return start_date.to_date_string(), start_date.add(days=1).to_date_string() - end_date = min(end_date, pendulum.parse(stream_slice.get(cursor_field)).add(**{time_unit: 1})) + end_date = min(end_date, pendulum.parse(stream_slice.get(cursor_field)).add(days=self.range_days)) # Fix issue #4806, start date should always be lower than end date. if start_date.add(days=1).date() >= end_date.date(): @@ -116,7 +126,7 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late return current_stream_state def get_query(self, stream_slice: Mapping[str, Any] = None) -> str: - start_date, end_date = self.get_date_params(stream_slice, self.cursor_field, time_unit=self.time_unit) + start_date, end_date = self.get_date_params(stream_slice, self.cursor_field) query = GoogleAds.convert_schema_into_query( schema=self.get_json_schema(), report_name=self.name, from_date=start_date, to_date=end_date, cursor_field=self.cursor_field ) @@ -214,5 +224,5 @@ class ClickView(IncrementalGoogleAdsStream): ClickView stream: https://developers.google.com/google-ads/api/reference/rpc/v8/ClickView """ - time_unit = "days" days_of_data_storage = 90 + range_days = 1 diff --git a/airbyte-integrations/connectors/source-google-ads/unit_tests/test_google_ads.py b/airbyte-integrations/connectors/source-google-ads/unit_tests/test_google_ads.py index 0c05d6bf70704..d61fd6d08496a 100644 --- a/airbyte-integrations/connectors/source-google-ads/unit_tests/test_google_ads.py +++ b/airbyte-integrations/connectors/source-google-ads/unit_tests/test_google_ads.py @@ -100,8 +100,15 @@ def test_get_fields_from_schema(): def test_interval_chunking(): - mock_intervals = [{"segments.date": "2021-05-18"}, {"segments.date": "2021-06-18"}, {"segments.date": "2021-07-18"}] - intervals = chunk_date_range("2021-06-01", 14, "segments.date", "2021-08-15") + mock_intervals = [ + {"segments.date": "2021-06-17"}, + {"segments.date": "2021-06-27"}, + {"segments.date": "2021-07-07"}, + {"segments.date": "2021-07-17"}, + {"segments.date": "2021-07-27"}, + {"segments.date": "2021-08-06"}, + ] + intervals = chunk_date_range("2021-07-01", 14, "segments.date", "2021-08-15", range_days=10) assert mock_intervals == intervals @@ -109,7 +116,7 @@ def test_interval_chunking(): def test_get_date_params(): # Please note that this is equal to inputted stream_slice start date + 1 day mock_start_date = "2021-05-19" - mock_end_date = "2021-06-18" + mock_end_date = "2021-06-02" mock_conversion_window_days = 14 incremental_stream_config = dict( diff --git a/airbyte-integrations/connectors/source-google-ads/unit_tests/test_source.py b/airbyte-integrations/connectors/source-google-ads/unit_tests/test_source.py index a2d496f2f2b55..1d39e2510fb76 100644 --- a/airbyte-integrations/connectors/source-google-ads/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-google-ads/unit_tests/test_source.py @@ -14,8 +14,17 @@ def test_chunk_date_range(): end_date = "2021-05-04" conversion_window = 14 field = "date" - response = chunk_date_range(start_date, conversion_window, field, end_date) - assert [{"date": "2021-02-18"}, {"date": "2021-03-18"}, {"date": "2021-04-18"}] == response + response = chunk_date_range(start_date, conversion_window, field, end_date, range_days=10) + assert [ + {"date": "2021-02-18"}, + {"date": "2021-02-28"}, + {"date": "2021-03-10"}, + {"date": "2021-03-20"}, + {"date": "2021-03-30"}, + {"date": "2021-04-09"}, + {"date": "2021-04-19"}, + {"date": "2021-04-29"}, + ] == response def test_streams_count(config): diff --git a/docs/integrations/sources/google-ads.md b/docs/integrations/sources/google-ads.md index 840746766ab2c..908e27bc25f3f 100644 --- a/docs/integrations/sources/google-ads.md +++ b/docs/integrations/sources/google-ads.md @@ -102,6 +102,7 @@ This source is constrained by whatever API limits are set for the Google Ads tha | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| `0.1.22` | 2022-01-24 | [9608](https://github.com/airbytehq/airbyte/pull/9608) | Reduce stream slice date range. | | `0.1.21` | 2021-12-28 | [9149](https://github.com/airbytehq/airbyte/pull/9149) | Update title and description | | `0.1.20` | 2021-12-22 | [9071](https://github.com/airbytehq/airbyte/pull/9071) | Fix: Keyword schema enum | | `0.1.19` | 2021-12-14 | [8431](https://github.com/airbytehq/airbyte/pull/8431) | Add new streams: Geographic and Keyword | From 395a6ec3bbcb7502bc47c459df8b27cc7056e782 Mon Sep 17 00:00:00 2001 From: Noah Kawasaki <68556134+noahkawasakigoogle@users.noreply.github.com> Date: Mon, 24 Jan 2022 02:02:54 -0800 Subject: [PATCH 200/215] MIT license for docs so its consistent with connectors licensing (#9701) --- docs/LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 docs/LICENSE diff --git a/docs/LICENSE b/docs/LICENSE new file mode 100644 index 0000000000000..ec45d182fcb90 --- /dev/null +++ b/docs/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Airbyte, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From 9ecd5a522d1c41ae760675111962bdbdef69a2d6 Mon Sep 17 00:00:00 2001 From: Tim Roes Date: Mon, 24 Jan 2022 11:05:21 +0100 Subject: [PATCH 201/215] Add license check for webapp (#9689) * Add license check for airbyte-webapp * Create scripts folder * Create licenseCheck gradle task --- airbyte-webapp/build.gradle | 10 + airbyte-webapp/package-lock.json | 429 ++++++++++++++++++++++++ airbyte-webapp/package.json | 4 +- airbyte-webapp/scripts/license-check.js | 109 ++++++ 4 files changed, 551 insertions(+), 1 deletion(-) create mode 100644 airbyte-webapp/scripts/license-check.js diff --git a/airbyte-webapp/build.gradle b/airbyte-webapp/build.gradle index 0c67957231b30..04816a6651cbb 100644 --- a/airbyte-webapp/build.gradle +++ b/airbyte-webapp/build.gradle @@ -29,6 +29,16 @@ task test(type: NpmTask) { inputs.file 'package-lock.json' } +task licenseCheck(type: NpmTask) { + dependsOn npmInstall + + args = ['run', 'license-check'] + inputs.file 'package.json' + inputs.file 'package-lock.json' +} + +// Make sure to always run a license check after we installed dependencies +npmInstall.finalizedBy licenseCheck assemble.dependsOn npm_run_build build.finalizedBy test diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 57b6ac718b7fe..f1248b4444084 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -77,6 +77,7 @@ "eslint-plugin-jest": "^24.1.5", "eslint-plugin-prettier": "^3.3.1", "husky": "^4.2.3", + "license-checker": "^25.0.1", "lint-staged": "^10.0.8", "prettier": "^2.2.1", "react-scripts": "4.0.2", @@ -17435,6 +17436,12 @@ "integrity": "sha512-9IK9EadsbHo6jLWIpxpR6pL0sazTXV6+SQv25ZB+F7Bj9mJNaOc4nCRabwd5M/JwmUa8idz6Eci6eKfJryPs6Q==", "dev": true }, + "node_modules/abbrev": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", + "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==", + "dev": true + }, "node_modules/accepts": { "version": "1.3.7", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz", @@ -17851,6 +17858,15 @@ "node": ">=0.10.0" } }, + "node_modules/array-find-index": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/array-find-index/-/array-find-index-1.0.2.tgz", + "integrity": "sha1-3wEKoSh+Fku9pvlyOwqWoexBh6E=", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/array-flatten": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-2.1.2.tgz", @@ -21859,6 +21875,15 @@ "ms": "^2.1.1" } }, + "node_modules/debuglog": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/debuglog/-/debuglog-1.0.1.tgz", + "integrity": "sha1-qiT/uaw9+aI1GDfPstJ5NgzXhJI=", + "dev": true, + "engines": { + "node": "*" + } + }, "node_modules/decamelize": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", @@ -22307,6 +22332,16 @@ "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", "dev": true }, + "node_modules/dezalgo": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/dezalgo/-/dezalgo-1.0.3.tgz", + "integrity": "sha1-f3Qt4Gb8dIvI24IFad3c5Jvw1FY=", + "dev": true, + "dependencies": { + "asap": "^2.0.0", + "wrappy": "1" + } + }, "node_modules/diff": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/diff/-/diff-5.0.0.tgz", @@ -31627,6 +31662,12 @@ "integrity": "sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==", "dev": true }, + "node_modules/json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", + "dev": true + }, "node_modules/json-schema": { "version": "0.2.3", "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", @@ -31853,6 +31894,45 @@ "node": ">= 0.8.0" } }, + "node_modules/license-checker": { + "version": "25.0.1", + "resolved": "https://registry.npmjs.org/license-checker/-/license-checker-25.0.1.tgz", + "integrity": "sha512-mET5AIwl7MR2IAKYYoVBBpV0OnkKQ1xGj2IMMeEFIs42QAkEVjRtFZGWmQ28WeU7MP779iAgOaOy93Mn44mn6g==", + "dev": true, + "dependencies": { + "chalk": "^2.4.1", + "debug": "^3.1.0", + "mkdirp": "^0.5.1", + "nopt": "^4.0.1", + "read-installed": "~4.0.3", + "semver": "^5.5.0", + "spdx-correct": "^3.0.0", + "spdx-expression-parse": "^3.0.0", + "spdx-satisfies": "^4.0.0", + "treeify": "^1.1.0" + }, + "bin": { + "license-checker": "bin/license-checker" + } + }, + "node_modules/license-checker/node_modules/debug": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", + "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", + "dev": true, + "dependencies": { + "ms": "^2.1.1" + } + }, + "node_modules/license-checker/node_modules/semver": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", + "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==", + "dev": true, + "bin": { + "semver": "bin/semver" + } + }, "node_modules/lie": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", @@ -34545,6 +34625,19 @@ "integrity": "sha512-Slf2s69+2/uAD79pVVQo8uSiC34+g8GWY8UH2Qtqv34ZfhYrxpYpfzs9Js9d6O0mbDmALuxaTlplnBTnSELcrw==", "dev": true }, + "node_modules/nopt": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/nopt/-/nopt-4.0.3.tgz", + "integrity": "sha512-CvaGwVMztSMJLOeXPrez7fyfObdZqNUK1cPAEzLHrTybIua9pMdmmPR5YwtfNftIOMv3DPUhFaxsZMNTQO20Kg==", + "dev": true, + "dependencies": { + "abbrev": "1", + "osenv": "^0.1.4" + }, + "bin": { + "nopt": "bin/nopt.js" + } + }, "node_modules/normalize-package-data": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz", @@ -34621,6 +34714,12 @@ "node": ">=0.10.0" } }, + "node_modules/npm-normalize-package-bin": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/npm-normalize-package-bin/-/npm-normalize-package-bin-1.0.1.tgz", + "integrity": "sha512-EPfafl6JL5/rU+ot6P3gRSCpPDW5VmIzX959Ob1+ySFUuuYHWHekXpwdUZcKP5C+DS4GEtdJluwBjnsNDl+fSA==", + "dev": true + }, "node_modules/npm-run-path": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz", @@ -35026,6 +35125,34 @@ "integrity": "sha1-hUNzx/XCMVkU/Jv8a9gjj92h7Cc=", "dev": true }, + "node_modules/os-homedir": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/os-homedir/-/os-homedir-1.0.2.tgz", + "integrity": "sha1-/7xJiDNuDoM94MFox+8VISGqf7M=", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/os-tmpdir": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", + "integrity": "sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/osenv": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/osenv/-/osenv-0.1.5.tgz", + "integrity": "sha512-0CWcCECdMVc2Rw3U5w9ZjqX6ga6ubk1xDVKxtBQPK7wis/0F2r9T6k4ydGYhecl7YUBxBVxhL5oisPsNxAPe2g==", + "dev": true, + "dependencies": { + "os-homedir": "^1.0.0", + "os-tmpdir": "^1.0.0" + } + }, "node_modules/overlayscrollbars": { "version": "1.13.1", "resolved": "https://registry.npmjs.org/overlayscrollbars/-/overlayscrollbars-1.13.1.tgz", @@ -39031,6 +39158,44 @@ "react-dom": ">=0.14.0" } }, + "node_modules/read-installed": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/read-installed/-/read-installed-4.0.3.tgz", + "integrity": "sha1-/5uLZ/GH0eTCm5/rMfayI6zRkGc=", + "dev": true, + "dependencies": { + "debuglog": "^1.0.1", + "read-package-json": "^2.0.0", + "readdir-scoped-modules": "^1.0.0", + "semver": "2 || 3 || 4 || 5", + "slide": "~1.1.3", + "util-extend": "^1.0.1" + }, + "optionalDependencies": { + "graceful-fs": "^4.1.2" + } + }, + "node_modules/read-installed/node_modules/semver": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", + "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==", + "dev": true, + "bin": { + "semver": "bin/semver" + } + }, + "node_modules/read-package-json": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/read-package-json/-/read-package-json-2.1.2.tgz", + "integrity": "sha512-D1KmuLQr6ZSJS0tW8hf3WGpRlwszJOXZ3E8Yd/DNRaM5d+1wVRZdHlpGBLAuovjr28LbWvjpWkBHMxpRGGjzNA==", + "dev": true, + "dependencies": { + "glob": "^7.1.1", + "json-parse-even-better-errors": "^2.3.0", + "normalize-package-data": "^2.0.0", + "npm-normalize-package-bin": "^1.0.0" + } + }, "node_modules/read-pkg": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-2.0.0.tgz", @@ -39160,6 +39325,18 @@ "node": ">= 6" } }, + "node_modules/readdir-scoped-modules": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/readdir-scoped-modules/-/readdir-scoped-modules-1.1.0.tgz", + "integrity": "sha512-asaikDeqAQg7JifRsZn1NJZXo9E+VwlyCfbkZhwyISinqk5zNS6266HS5kah6P0SaQKGF6SkNnZVHUzHFYxYDw==", + "dev": true, + "dependencies": { + "debuglog": "^1.0.1", + "dezalgo": "^1.0.0", + "graceful-fs": "^4.1.2", + "once": "^1.3.0" + } + }, "node_modules/readdirp": { "version": "3.5.0", "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.5.0.tgz", @@ -41886,6 +42063,15 @@ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "dev": true }, + "node_modules/slide": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/slide/-/slide-1.1.6.tgz", + "integrity": "sha1-VusCfWW00tzmyy4tMsTUr8nh1wc=", + "dev": true, + "engines": { + "node": "*" + } + }, "node_modules/snapdragon": { "version": "0.8.2", "resolved": "https://registry.npmjs.org/snapdragon/-/snapdragon-0.8.2.tgz", @@ -42167,6 +42353,17 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/spdx-compare": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/spdx-compare/-/spdx-compare-1.0.0.tgz", + "integrity": "sha512-C1mDZOX0hnu0ep9dfmuoi03+eOdDoz2yvK79RxbcrVEG1NO1Ph35yW102DHWKN4pk80nwCgeMmSY5L25VE4D9A==", + "dev": true, + "dependencies": { + "array-find-index": "^1.0.2", + "spdx-expression-parse": "^3.0.0", + "spdx-ranges": "^2.0.0" + } + }, "node_modules/spdx-correct": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.1.tgz", @@ -42199,6 +42396,23 @@ "integrity": "sha512-U+MTEOO0AiDzxwFvoa4JVnMV6mZlJKk2sBLt90s7G0Gd0Mlknc7kxEn3nuDPNZRta7O2uy8oLcZLVT+4sqNZHQ==", "dev": true }, + "node_modules/spdx-ranges": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/spdx-ranges/-/spdx-ranges-2.1.1.tgz", + "integrity": "sha512-mcdpQFV7UDAgLpXEE/jOMqvK4LBoO0uTQg0uvXUewmEFhpiZx5yJSZITHB8w1ZahKdhfZqP5GPEOKLyEq5p8XA==", + "dev": true + }, + "node_modules/spdx-satisfies": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/spdx-satisfies/-/spdx-satisfies-4.0.1.tgz", + "integrity": "sha512-WVzZ/cXAzoNmjCWiEluEA3BjHp5tiUmmhn9MK+X0tBbR9sOqtC6UQwmgCNrAIZvNlMuBUYAaHYfb2oqlF9SwKA==", + "dev": true, + "dependencies": { + "spdx-compare": "^1.0.0", + "spdx-expression-parse": "^3.0.0", + "spdx-ranges": "^2.0.0" + } + }, "node_modules/spdy": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/spdy/-/spdy-4.0.2.tgz", @@ -43666,6 +43880,15 @@ "node": ">=8" } }, + "node_modules/treeify": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/treeify/-/treeify-1.1.0.tgz", + "integrity": "sha512-1m4RA7xVAJrSGrrXGs0L3YTwyvBs2S8PbRHaLZAkFw7JR8oIFwYtysxlBZhYIa7xSyiYJKZ3iGrrk55cGA3i9A==", + "dev": true, + "engines": { + "node": ">=0.6" + } + }, "node_modules/trim": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/trim/-/trim-0.0.1.tgz", @@ -44497,6 +44720,12 @@ "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" }, + "node_modules/util-extend": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/util-extend/-/util-extend-1.0.3.tgz", + "integrity": "sha1-p8IW0mdUUWljeztu3GypEZ4v+T8=", + "dev": true + }, "node_modules/util.promisify": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/util.promisify/-/util.promisify-1.0.1.tgz", @@ -60485,6 +60714,12 @@ "integrity": "sha512-9IK9EadsbHo6jLWIpxpR6pL0sazTXV6+SQv25ZB+F7Bj9mJNaOc4nCRabwd5M/JwmUa8idz6Eci6eKfJryPs6Q==", "dev": true }, + "abbrev": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", + "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==", + "dev": true + }, "accepts": { "version": "1.3.7", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz", @@ -60816,6 +61051,12 @@ "integrity": "sha1-45sJrqne+Gao8gbiiK9jkZuuOcQ=", "dev": true }, + "array-find-index": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/array-find-index/-/array-find-index-1.0.2.tgz", + "integrity": "sha1-3wEKoSh+Fku9pvlyOwqWoexBh6E=", + "dev": true + }, "array-flatten": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-2.1.2.tgz", @@ -64075,6 +64316,12 @@ "ms": "^2.1.1" } }, + "debuglog": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/debuglog/-/debuglog-1.0.1.tgz", + "integrity": "sha1-qiT/uaw9+aI1GDfPstJ5NgzXhJI=", + "dev": true + }, "decamelize": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", @@ -64432,6 +64679,16 @@ } } }, + "dezalgo": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/dezalgo/-/dezalgo-1.0.3.tgz", + "integrity": "sha1-f3Qt4Gb8dIvI24IFad3c5Jvw1FY=", + "dev": true, + "requires": { + "asap": "^2.0.0", + "wrappy": "1" + } + }, "diff": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/diff/-/diff-5.0.0.tgz", @@ -71818,6 +72075,12 @@ "integrity": "sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==", "dev": true }, + "json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", + "dev": true + }, "json-schema": { "version": "0.2.3", "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", @@ -72014,6 +72277,41 @@ "type-check": "~0.4.0" } }, + "license-checker": { + "version": "25.0.1", + "resolved": "https://registry.npmjs.org/license-checker/-/license-checker-25.0.1.tgz", + "integrity": "sha512-mET5AIwl7MR2IAKYYoVBBpV0OnkKQ1xGj2IMMeEFIs42QAkEVjRtFZGWmQ28WeU7MP779iAgOaOy93Mn44mn6g==", + "dev": true, + "requires": { + "chalk": "^2.4.1", + "debug": "^3.1.0", + "mkdirp": "^0.5.1", + "nopt": "^4.0.1", + "read-installed": "~4.0.3", + "semver": "^5.5.0", + "spdx-correct": "^3.0.0", + "spdx-expression-parse": "^3.0.0", + "spdx-satisfies": "^4.0.0", + "treeify": "^1.1.0" + }, + "dependencies": { + "debug": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", + "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", + "dev": true, + "requires": { + "ms": "^2.1.1" + } + }, + "semver": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", + "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==", + "dev": true + } + } + }, "lie": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", @@ -73988,6 +74286,16 @@ "integrity": "sha512-Slf2s69+2/uAD79pVVQo8uSiC34+g8GWY8UH2Qtqv34ZfhYrxpYpfzs9Js9d6O0mbDmALuxaTlplnBTnSELcrw==", "dev": true }, + "nopt": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/nopt/-/nopt-4.0.3.tgz", + "integrity": "sha512-CvaGwVMztSMJLOeXPrez7fyfObdZqNUK1cPAEzLHrTybIua9pMdmmPR5YwtfNftIOMv3DPUhFaxsZMNTQO20Kg==", + "dev": true, + "requires": { + "abbrev": "1", + "osenv": "^0.1.4" + } + }, "normalize-package-data": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz", @@ -74050,6 +74358,12 @@ } } }, + "npm-normalize-package-bin": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/npm-normalize-package-bin/-/npm-normalize-package-bin-1.0.1.tgz", + "integrity": "sha512-EPfafl6JL5/rU+ot6P3gRSCpPDW5VmIzX959Ob1+ySFUuuYHWHekXpwdUZcKP5C+DS4GEtdJluwBjnsNDl+fSA==", + "dev": true + }, "npm-run-path": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz", @@ -74360,6 +74674,28 @@ "integrity": "sha1-hUNzx/XCMVkU/Jv8a9gjj92h7Cc=", "dev": true }, + "os-homedir": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/os-homedir/-/os-homedir-1.0.2.tgz", + "integrity": "sha1-/7xJiDNuDoM94MFox+8VISGqf7M=", + "dev": true + }, + "os-tmpdir": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", + "integrity": "sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=", + "dev": true + }, + "osenv": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/osenv/-/osenv-0.1.5.tgz", + "integrity": "sha512-0CWcCECdMVc2Rw3U5w9ZjqX6ga6ubk1xDVKxtBQPK7wis/0F2r9T6k4ydGYhecl7YUBxBVxhL5oisPsNxAPe2g==", + "dev": true, + "requires": { + "os-homedir": "^1.0.0", + "os-tmpdir": "^1.0.0" + } + }, "overlayscrollbars": { "version": "1.13.1", "resolved": "https://registry.npmjs.org/overlayscrollbars/-/overlayscrollbars-1.13.1.tgz", @@ -77560,6 +77896,41 @@ "warning": "^3.0.0" } }, + "read-installed": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/read-installed/-/read-installed-4.0.3.tgz", + "integrity": "sha1-/5uLZ/GH0eTCm5/rMfayI6zRkGc=", + "dev": true, + "requires": { + "debuglog": "^1.0.1", + "graceful-fs": "^4.1.2", + "read-package-json": "^2.0.0", + "readdir-scoped-modules": "^1.0.0", + "semver": "2 || 3 || 4 || 5", + "slide": "~1.1.3", + "util-extend": "^1.0.1" + }, + "dependencies": { + "semver": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", + "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==", + "dev": true + } + } + }, + "read-package-json": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/read-package-json/-/read-package-json-2.1.2.tgz", + "integrity": "sha512-D1KmuLQr6ZSJS0tW8hf3WGpRlwszJOXZ3E8Yd/DNRaM5d+1wVRZdHlpGBLAuovjr28LbWvjpWkBHMxpRGGjzNA==", + "dev": true, + "requires": { + "glob": "^7.1.1", + "json-parse-even-better-errors": "^2.3.0", + "normalize-package-data": "^2.0.0", + "npm-normalize-package-bin": "^1.0.0" + } + }, "read-pkg": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-2.0.0.tgz", @@ -77660,6 +78031,18 @@ "util-deprecate": "^1.0.1" } }, + "readdir-scoped-modules": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/readdir-scoped-modules/-/readdir-scoped-modules-1.1.0.tgz", + "integrity": "sha512-asaikDeqAQg7JifRsZn1NJZXo9E+VwlyCfbkZhwyISinqk5zNS6266HS5kah6P0SaQKGF6SkNnZVHUzHFYxYDw==", + "dev": true, + "requires": { + "debuglog": "^1.0.1", + "dezalgo": "^1.0.0", + "graceful-fs": "^4.1.2", + "once": "^1.3.0" + } + }, "readdirp": { "version": "3.5.0", "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.5.0.tgz", @@ -79764,6 +80147,12 @@ } } }, + "slide": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/slide/-/slide-1.1.6.tgz", + "integrity": "sha1-VusCfWW00tzmyy4tMsTUr8nh1wc=", + "dev": true + }, "snapdragon": { "version": "0.8.2", "resolved": "https://registry.npmjs.org/snapdragon/-/snapdragon-0.8.2.tgz", @@ -80007,6 +80396,17 @@ "integrity": "sha512-q/JSVd1Lptzhf5bkYm4ob4iWPjx0KiRe3sRFBNrVqbJkFaBm5vbbowy1mymoPNLRa52+oadOhJ+K49wsSeSjTA==", "dev": true }, + "spdx-compare": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/spdx-compare/-/spdx-compare-1.0.0.tgz", + "integrity": "sha512-C1mDZOX0hnu0ep9dfmuoi03+eOdDoz2yvK79RxbcrVEG1NO1Ph35yW102DHWKN4pk80nwCgeMmSY5L25VE4D9A==", + "dev": true, + "requires": { + "array-find-index": "^1.0.2", + "spdx-expression-parse": "^3.0.0", + "spdx-ranges": "^2.0.0" + } + }, "spdx-correct": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.1.tgz", @@ -80039,6 +80439,23 @@ "integrity": "sha512-U+MTEOO0AiDzxwFvoa4JVnMV6mZlJKk2sBLt90s7G0Gd0Mlknc7kxEn3nuDPNZRta7O2uy8oLcZLVT+4sqNZHQ==", "dev": true }, + "spdx-ranges": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/spdx-ranges/-/spdx-ranges-2.1.1.tgz", + "integrity": "sha512-mcdpQFV7UDAgLpXEE/jOMqvK4LBoO0uTQg0uvXUewmEFhpiZx5yJSZITHB8w1ZahKdhfZqP5GPEOKLyEq5p8XA==", + "dev": true + }, + "spdx-satisfies": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/spdx-satisfies/-/spdx-satisfies-4.0.1.tgz", + "integrity": "sha512-WVzZ/cXAzoNmjCWiEluEA3BjHp5tiUmmhn9MK+X0tBbR9sOqtC6UQwmgCNrAIZvNlMuBUYAaHYfb2oqlF9SwKA==", + "dev": true, + "requires": { + "spdx-compare": "^1.0.0", + "spdx-expression-parse": "^3.0.0", + "spdx-ranges": "^2.0.0" + } + }, "spdy": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/spdy/-/spdy-4.0.2.tgz", @@ -81208,6 +81625,12 @@ "punycode": "^2.1.1" } }, + "treeify": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/treeify/-/treeify-1.1.0.tgz", + "integrity": "sha512-1m4RA7xVAJrSGrrXGs0L3YTwyvBs2S8PbRHaLZAkFw7JR8oIFwYtysxlBZhYIa7xSyiYJKZ3iGrrk55cGA3i9A==", + "dev": true + }, "trim": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/trim/-/trim-0.0.1.tgz", @@ -81821,6 +82244,12 @@ "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" }, + "util-extend": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/util-extend/-/util-extend-1.0.3.tgz", + "integrity": "sha1-p8IW0mdUUWljeztu3GypEZ4v+T8=", + "dev": true + }, "util.promisify": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/util.promisify/-/util.promisify-1.0.1.tgz", diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 715d3c9edb798..040acc995a195 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -11,7 +11,8 @@ "test": "react-scripts test", "format": "prettier --write 'src/**/*.{ts,tsx}'", "storybook": "start-storybook -p 9009 -s public --quiet", - "lint": "eslint --ext js,ts,tsx src" + "lint": "eslint --ext js,ts,tsx src", + "license-check": "node ./scripts/license-check.js" }, "dependencies": { "@fortawesome/fontawesome-svg-core": "^1.2.36", @@ -83,6 +84,7 @@ "eslint-plugin-jest": "^24.1.5", "eslint-plugin-prettier": "^3.3.1", "husky": "^4.2.3", + "license-checker": "^25.0.1", "lint-staged": "^10.0.8", "prettier": "^2.2.1", "react-scripts": "4.0.2", diff --git a/airbyte-webapp/scripts/license-check.js b/airbyte-webapp/scripts/license-check.js new file mode 100644 index 0000000000000..96c3e2b1abdf9 --- /dev/null +++ b/airbyte-webapp/scripts/license-check.js @@ -0,0 +1,109 @@ +const checker = require('license-checker'); +const path = require('path'); +const { promisify } = require('util'); +const { version } = require('../package.json'); + +/** + * A list of all the allowed licenses that production dependencies can have. + */ +const ALLOWED_LICENSES = [ + '(Apache-2.0 OR MPL-1.1)', + '(AFL-2.1 OR BSD-3-Clause)', + '(AFLv2.1 OR BSD)', + '(BSD-2-Clause OR MIT OR Apache-2.0)', + '(BSD-3-Clause AND Apache-2.0)', + '(BSD-3-Clause OR GPL-2.0)', + '(CC-BY-4.0 AND MIT)', + '(MIT OR Apache-2.0)', + '(MIT OR CC0-1.0)', + '(MIT OR GPL-3.0)', + '(MIT OR GPL-3.0-or-later)', + '(MIT OR WTFPL)', + '(MIT AND CC-BY-3.0)', + '(MIT AND BSD-3-Clause)', + '(MIT AND Zlib)', + '(WTFPL OR MIT)', + 'BSD-3-Clause OR MIT', + '0BSD', + 'Apache', + 'Apache-2.0', + 'BSD', + 'BSD-2-Clause', + 'BSD-3-Clause', + 'CC0-1.0', + 'CC-BY-3.0', + 'CC-BY-4.0', + 'ISC', + 'MIT', + 'MPL-2.0', + 'Public Domain', + 'Python-2.0', + 'Unlicense', + 'WTFPL', +]; + +/** + * Licenses that should be allowed only for dev dependencies. + */ +const ALLOWED_DEV_LICENSES = [...ALLOWED_LICENSES, 'ODC-By-1.0', 'MPL-2.0']; + +/** + * A list of all packages that should be excluded from license checking. + */ +const IGNORED_PACKAGES = [`airbyte-webapp@${version}`]; + +/** + * Overwrite licenses for specific packages manually, e.g. because they can't be detected properly. + */ +const LICENSE_OVERWRITES = { + 'glob-to-regexp@0.3.0': 'BSD-3-Clause', + 'rework@1.0.1': 'MIT', + 'trim@0.0.1': 'MIT', +}; + +const checkLicenses = promisify(checker.init); +const params = { + start: path.join(__dirname, '..'), + excludePackages: IGNORED_PACKAGES.join(';'), + unknown: true, +}; + +function validateLicenes(licenses, allowedLicenes, usedOverwrites) { + let licensesValid = true; + for (const [pkg, info] of Object.entries(licenses)) { + let license = Array.isArray(info.licenses) ? `(${info.licenses.join(' OR ')})` : info.licenses; + if (LICENSE_OVERWRITES[pkg]) { + license = LICENSE_OVERWRITES[pkg]; + usedOverwrites.add(pkg); + } + if (license.endsWith('*')) { + license = license.substr(0, license.length - 1); + console.log(`Guessed license for package ${pkg}: ${license}`); + } + if (!license || !allowedLicenes.includes(license)) { + licensesValid = false; + console.error(`Package ${pkg} has incompatible license: ${license}`); + } + } + + return licensesValid; +} + +Promise.all([ + checkLicenses({ ...params, production: true }), + checkLicenses({ ...params, development: true }), +]).then(([prod, dev]) => { + const usedOverwrites = new Set(); + const prodLicensesValid = validateLicenes(prod, ALLOWED_LICENSES, usedOverwrites); + const devLicensesValid = validateLicenes(dev, ALLOWED_DEV_LICENSES, usedOverwrites); + + for (const overwrite of Object.keys(LICENSE_OVERWRITES)) { + if (!usedOverwrites.has(overwrite)) { + console.warn(`License overwrite for ${overwrite} is no longer needed and can be deleted.`); + } + } + + if (!prodLicensesValid || !devLicensesValid) { + process.exit(1); + } +}); From 62f2afb16d9c57b4a1c0cf9d7ce3b5c3091d7ca4 Mon Sep 17 00:00:00 2001 From: Baz Date: Mon, 24 Jan 2022 16:14:31 +0200 Subject: [PATCH 202/215] =?UTF-8?q?=F0=9F=90=9B=20=20Publish=20strict-encr?= =?UTF-8?q?ypt=20source-connectors:=20source-clickhouse-strict-encrypt,=20?= =?UTF-8?q?source-mysql-strict-encrypt,=20source-oracle-strict-encrypt=20(?= =?UTF-8?q?#9749)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Dockerfile | 2 +- .../source-mysql-strict-encrypt/Dockerfile | 2 +- .../src/test/resources/expected_spec.json | 20 ++++++++++++------- .../source-oracle-strict-encrypt/Dockerfile | 2 +- .../src/test/resources/expected_spec.json | 2 +- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/airbyte-integrations/connectors/source-clickhouse-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-clickhouse-strict-encrypt/Dockerfile index 74cd78af0f5af..7b127b5e7a3c0 100644 --- a/airbyte-integrations/connectors/source-clickhouse-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-clickhouse-strict-encrypt/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-clickhouse-strict-encrypt COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-clickhouse-strict-encrypt diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile index 82db18ce6d4af..afaefe8983177 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION source-mysql-strict-encrypt COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.5 +LABEL io.airbyte.version=0.1.7 LABEL io.airbyte.name=airbyte/source-mysql-strict-encrypt diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json index d26dd1d611a6e..9c811ed82bf64 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json @@ -8,12 +8,14 @@ "additionalProperties": false, "properties": { "host": { - "description": "Hostname of the database.", + "description": "The host name of the database.", + "title": "Host", "type": "string", "order": 0 }, "port": { - "description": "Port of the database.", + "description": "The port to connect to.", + "title": "Port", "type": "integer", "minimum": 0, "maximum": 65536, @@ -22,30 +24,34 @@ "order": 1 }, "database": { - "description": "Name of the database.", + "description": "The database name.", + "title": "Database", "type": "string", "order": 2 }, "username": { - "description": "Username to use to access the database.", + "description": "The username which is used to access the database.", + "title": "Username", "type": "string", "order": 3 }, "password": { - "description": "Password associated with the username.", + "description": "The password associated with the username.", + "title": "Password", "type": "string", "airbyte_secret": true, "order": 4 }, "jdbc_url_params": { - "description": "Additional properties to pass to the jdbc url string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3)", + "description": "Additional properties to pass to the jdbc url string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).", + "title": "JDBC URL Params", "type": "string", "order": 5 }, "replication_method": { "type": "string", "title": "Replication Method", - "description": "Replication method to use for extracting data from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. CDC uses the Binlog to detect inserts, updates, and deletes. This needs to be configured on the source database itself.", + "description": "Replication method which is used for data extraction from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. CDC uses the Binlog to detect inserts, updates, and deletes. This needs to be configured on the source database itself.", "order": 7, "default": "STANDARD", "enum": ["STANDARD", "CDC"] diff --git a/airbyte-integrations/connectors/source-oracle-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-oracle-strict-encrypt/Dockerfile index 85fd3600626c7..0728fca5be899 100644 --- a/airbyte-integrations/connectors/source-oracle-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-oracle-strict-encrypt/Dockerfile @@ -17,5 +17,5 @@ ENV TZ UTC COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/source-oracle-strict-encrypt diff --git a/airbyte-integrations/connectors/source-oracle-strict-encrypt/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/source-oracle-strict-encrypt/src/test/resources/expected_spec.json index 0a7cdf7cdfdcb..0b4b703cb2910 100644 --- a/airbyte-integrations/connectors/source-oracle-strict-encrypt/src/test/resources/expected_spec.json +++ b/airbyte-integrations/connectors/source-oracle-strict-encrypt/src/test/resources/expected_spec.json @@ -4,7 +4,7 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Oracle Source Spec", "type": "object", - "required": ["host", "port", "sid", "username", "encryption"], + "required": ["host", "port", "sid", "username"], "additionalProperties": false, "properties": { "host": { From 5a091b12460ec6773c5655e3cae63248ad5cea86 Mon Sep 17 00:00:00 2001 From: VitaliiMaltsev <39538064+VitaliiMaltsev@users.noreply.github.com> Date: Mon, 24 Jan 2022 17:17:25 +0200 Subject: [PATCH 203/215] Destination Snowflake: edit schema name which contains the character - (#9743) * fix for jdk 17 * added heartbeat interval to Apache Mina settings * fixed checkstyle * bump versions Co-authored-by: vmaltsev --- .../424892c4-daac-4491-b35d-c6688ba547ba.json | 2 +- .../main/resources/seed/destination_definitions.yaml | 2 +- .../connectors/destination-snowflake/Dockerfile | 2 +- .../SnowflakeInternalStagingConsumerFactory.java | 12 +++++------- docs/integrations/destinations/snowflake.md | 1 + 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json index acf0a7d031470..bc563225389e8 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "424892c4-daac-4491-b35d-c6688ba547ba", "name": "Snowflake", "dockerRepository": "airbyte/destination-snowflake", - "dockerImageTag": "0.4.3", + "dockerImageTag": "0.4.4", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/snowflake", "icon": "snowflake.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 712996128cd71..dc4f0cf342adf 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -179,7 +179,7 @@ - name: Snowflake destinationDefinitionId: 424892c4-daac-4491-b35d-c6688ba547ba dockerRepository: airbyte/destination-snowflake - dockerImageTag: 0.4.3 + dockerImageTag: 0.4.4 documentationUrl: https://docs.airbyte.io/integrations/destinations/snowflake icon: snowflake.svg - name: MariaDB ColumnStore diff --git a/airbyte-integrations/connectors/destination-snowflake/Dockerfile b/airbyte-integrations/connectors/destination-snowflake/Dockerfile index b0e293930d9cc..c7d1bac24852c 100644 --- a/airbyte-integrations/connectors/destination-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/destination-snowflake/Dockerfile @@ -18,5 +18,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.4.3 +LABEL io.airbyte.version=0.4.4 LABEL io.airbyte.name=airbyte/destination-snowflake diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java index 52844d93c786d..2b4c99cef079a 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingConsumerFactory.java @@ -80,7 +80,7 @@ private static Function toWriteConfig( Preconditions.checkNotNull(stream.getDestinationSyncMode(), "Undefined destination sync mode"); final AirbyteStream abStream = stream.getStream(); - final String outputSchema = getOutputSchema(abStream, namingResolver.getIdentifier(config.get("schema").asText())); + final String outputSchema = getOutputSchema(abStream, config.get("schema").asText(), namingResolver); final String streamName = abStream.getName(); final String tableName = namingResolver.getRawTableName(streamName); @@ -94,12 +94,10 @@ private static Function toWriteConfig( }; } - private static String getOutputSchema(final AirbyteStream stream, final String defaultDestSchema) { - final String sourceSchema = stream.getNamespace(); - if (sourceSchema != null) { - return sourceSchema; - } - return defaultDestSchema; + private static String getOutputSchema(final AirbyteStream stream, final String defaultDestSchema, NamingConventionTransformer namingResolver) { + return stream.getNamespace() != null + ? namingResolver.getIdentifier(stream.getNamespace()) + : namingResolver.getIdentifier(defaultDestSchema); } private static OnStartFunction onStartFunction(final JdbcDatabase database, diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index fff0a73cf611b..f1212c1abefc4 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -217,6 +217,7 @@ Finally, you need to add read/write permissions to your bucket with that email. | Version | Date | Pull Request | Subject | |:--------|:-----------| :----- | :------ | +| 0.4.4 | 2022-01-24 | [#9743](https://github.com/airbytehq/airbyte/pull/9743) | Fixed bug with dashes in schema name | | 0.4.3 | 2022-01-20 | [#9531](https://github.com/airbytehq/airbyte/pull/9531) | Start using new S3StreamCopier and expose the purgeStagingData option | | 0.4.2 | 2022-01-10 | [#9141](https://github.com/airbytehq/airbyte/pull/9141) | Fixed duplicate rows on retries | | 0.4.1 | 2021-01-06 | [#9311](https://github.com/airbytehq/airbyte/pull/9311) | Update сreating schema during check | From df6610baae429792fc492731864d6542200b08f4 Mon Sep 17 00:00:00 2001 From: andriikorotkov <88329385+andriikorotkov@users.noreply.github.com> Date: Mon, 24 Jan 2022 19:04:11 +0200 Subject: [PATCH 204/215] :tada: Updated azure blob storage destination (#9682) * updated azure blob storage * updated azure blob storage documentation * fix remarks * fix remarks * fix remarks and format code * updated doc --- .../b4c5d105-31fd-4817-96b6-cb923bfc04cb.json | 2 +- .../resources/seed/destination_specs.yaml | 2 +- .../destination-azure-blob-storage/Dockerfile | 2 +- .../AzureBlobStorageConsumer.java | 66 +++++++++++-------- .../csv/AzureBlobStorageCsvWriter.java | 17 ++--- .../jsonl/AzureBlobStorageJsonlWriter.java | 3 +- .../writer/AzureBlobStorageWriterFactory.java | 3 +- .../writer/ProductionWriterFactory.java | 9 +-- ...obStorageCsvDestinationAcceptanceTest.java | 26 ++++++++ ...eBlobStorageDestinationAcceptanceTest.java | 36 ++++++---- ...StorageJsonlDestinationAcceptanceTest.java | 21 ++++++ .../destinations/azureblobstorage.md | 3 +- 12 files changed, 122 insertions(+), 68 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json index 3c436bbd29d64..ecb42472e00fc 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/b4c5d105-31fd-4817-96b6-cb923bfc04cb.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "b4c5d105-31fd-4817-96b6-cb923bfc04cb", "name": "Azure Blob Storage", "dockerRepository": "airbyte/destination-azure-blob-storage", - "dockerImageTag": "0.1.1", + "dockerImageTag": "0.1.2", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/azureblobstorage", "icon": "azureblobstorage.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index dd34e8ef899b4..3e340bcd59476 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -1,7 +1,7 @@ # This file is generated by io.airbyte.config.specs.SeedConnectorSpecGenerator. # Do NOT edit this file directly. See generator class for more details. --- -- dockerImage: "airbyte/destination-azure-blob-storage:0.1.1" +- dockerImage: "airbyte/destination-azure-blob-storage:0.1.2" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/azureblobstorage" connectionSpecification: diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile b/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile index 8e644aa025fe7..5b076391c4738 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-azure-blob-storage COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/destination-azure-blob-storage diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageConsumer.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageConsumer.java index 843868ab54a68..fdd0364a21f06 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageConsumer.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageConsumer.java @@ -19,17 +19,24 @@ import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; -import io.airbyte.protocol.models.SyncMode; +import io.airbyte.protocol.models.DestinationSyncMode; +import java.sql.Timestamp; +import java.text.DateFormat; +import java.text.SimpleDateFormat; import java.util.HashMap; import java.util.Map; +import java.util.TimeZone; import java.util.UUID; import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class AzureBlobStorageConsumer extends FailureTrackingAirbyteMessageConsumer { private static final Logger LOGGER = LoggerFactory.getLogger(AzureBlobStorageConsumer.class); + private static final String YYYY_MM_DD_FORMAT_STRING = "yyyy_MM_dd"; private final AzureBlobStorageDestinationConfig azureBlobStorageDestinationConfig; private final ConfiguredAirbyteCatalog configuredCatalog; @@ -67,15 +74,16 @@ protected void startTracked() throws Exception { for (final ConfiguredAirbyteStream configuredStream : configuredCatalog.getStreams()) { + final String blobName = configuredStream.getStream().getName() + "/" + + getOutputFilename(new Timestamp(System.currentTimeMillis())); final AppendBlobClient appendBlobClient = specializedBlobClientBuilder - .blobName(configuredStream.getStream().getName()) + .blobName(blobName) .buildAppendBlobClient(); - final boolean isNewlyCreatedBlob = createContainers(appendBlobClient, configuredStream); + createContainers(specializedBlobClientBuilder, appendBlobClient, configuredStream); final AzureBlobStorageWriter writer = writerFactory - .create(azureBlobStorageDestinationConfig, appendBlobClient, configuredStream, - isNewlyCreatedBlob); + .create(azureBlobStorageDestinationConfig, appendBlobClient, configuredStream); final AirbyteStream stream = configuredStream.getStream(); final AirbyteStreamNameNamespacePair streamNamePair = AirbyteStreamNameNamespacePair @@ -84,39 +92,30 @@ protected void startTracked() throws Exception { } } - private boolean createContainers(final AppendBlobClient appendBlobClient, - final ConfiguredAirbyteStream configuredStream) { + private void createContainers(final SpecializedBlobClientBuilder specializedBlobClientBuilder, + final AppendBlobClient appendBlobClient, + final ConfiguredAirbyteStream configuredStream) { // create container if absent (aka SQl Schema) final BlobContainerClient containerClient = appendBlobClient.getContainerClient(); if (!containerClient.exists()) { containerClient.create(); } - // create a storage container if absent (aka Table is SQL BD) - if (SyncMode.FULL_REFRESH.equals(configuredStream.getSyncMode())) { - // full refresh sync. Create blob and override if any + if (DestinationSyncMode.OVERWRITE.equals(configuredStream.getDestinationSyncMode())) { LOGGER.info("Sync mode is selected to OVERRIDE mode. New container will be automatically" + " created or all data would be overridden (if any) for stream:" + configuredStream .getStream().getName()); - appendBlobClient.create(true); - return true; - } else { - // incremental sync. Create new container only if still absent - if (!appendBlobClient.exists()) { - LOGGER.info("Sync mode is selected to APPEND mode. New container will be automatically" - + " created for stream:" + configuredStream.getStream().getName()); - appendBlobClient.create(false); - LOGGER.info(appendBlobClient.getBlobName() + " blob has been created"); - return true; - } else { - LOGGER.info(String.format( - "Sync mode is selected to APPEND mode. Container %s already exists. Append mode is " - + "only available for \"Append blobs\". For more details please visit" - + " https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction#blobs", - configuredStream.getStream().getName())); - LOGGER.info(appendBlobClient.getBlobName() + " already exists"); - return false; - } + var blobItemList = StreamSupport.stream(containerClient.listBlobs().spliterator(), false) + .collect(Collectors.toList()); + blobItemList.forEach(blob -> { + if (!blob.isDeleted() && blob.getName().contains(configuredStream.getStream().getName() + "/")) { + final AppendBlobClient abc = specializedBlobClientBuilder + .blobName(blob.getName()) + .buildAppendBlobClient(); + abc.delete(); + } + }); } + appendBlobClient.create(true); } @Override @@ -161,4 +160,13 @@ protected void close(final boolean hasFailed) throws Exception { } } + private static String getOutputFilename(final Timestamp timestamp) { + final DateFormat formatter = new SimpleDateFormat(YYYY_MM_DD_FORMAT_STRING); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + return String.format( + "%s_%d_0", + formatter.format(timestamp), + timestamp.getTime()); + } + } diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java index f31aaab64fa02..d267bf4d5325d 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/csv/AzureBlobStorageCsvWriter.java @@ -32,8 +32,7 @@ public class AzureBlobStorageCsvWriter extends BaseAzureBlobStorageWriter implem public AzureBlobStorageCsvWriter(final AzureBlobStorageDestinationConfig config, final AppendBlobClient appendBlobClient, - final ConfiguredAirbyteStream configuredStream, - final boolean isNewlyCreatedBlob) + final ConfiguredAirbyteStream configuredStream) throws IOException { super(config, appendBlobClient, configuredStream); @@ -46,17 +45,9 @@ public AzureBlobStorageCsvWriter(final AzureBlobStorageDestinationConfig config, this.blobOutputStream = new BufferedOutputStream(appendBlobClient.getBlobOutputStream(), config.getOutputStreamBufferSize()); - if (isNewlyCreatedBlob) { - this.csvPrinter = new CSVPrinter( - new PrintWriter(blobOutputStream, false, StandardCharsets.UTF_8), - CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL) - .withHeader(csvSheetGenerator.getHeaderRow().toArray(new String[0]))); - } else { - // no header required for append - this.csvPrinter = new CSVPrinter( - new PrintWriter(blobOutputStream, false, StandardCharsets.UTF_8), - CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL)); - } + final PrintWriter printWriter = new PrintWriter(blobOutputStream, false, StandardCharsets.UTF_8); + this.csvPrinter = new CSVPrinter(printWriter, CSVFormat.DEFAULT.withQuoteMode(QuoteMode.ALL) + .withHeader(csvSheetGenerator.getHeaderRow().toArray(new String[0]))); } @Override diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java index 77ed63b7dee63..874a2c280e90a 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/jsonl/AzureBlobStorageJsonlWriter.java @@ -37,8 +37,7 @@ public class AzureBlobStorageJsonlWriter extends BaseAzureBlobStorageWriter impl public AzureBlobStorageJsonlWriter(final AzureBlobStorageDestinationConfig config, final AppendBlobClient appendBlobClient, - final ConfiguredAirbyteStream configuredStream, - final boolean isNewlyCreatedBlob) { + final ConfiguredAirbyteStream configuredStream) { super(config, appendBlobClient, configuredStream); // at this moment we already receive appendBlobClient initialized this.blobOutputStream = new BufferedOutputStream(appendBlobClient.getBlobOutputStream(), config.getOutputStreamBufferSize()); diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/writer/AzureBlobStorageWriterFactory.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/writer/AzureBlobStorageWriterFactory.java index 0b4ed34ac6229..004506652f4ce 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/writer/AzureBlobStorageWriterFactory.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/writer/AzureBlobStorageWriterFactory.java @@ -16,8 +16,7 @@ public interface AzureBlobStorageWriterFactory { AzureBlobStorageWriter create(AzureBlobStorageDestinationConfig config, AppendBlobClient appendBlobClient, - ConfiguredAirbyteStream configuredStream, - boolean isNewlyCreatedBlob) + ConfiguredAirbyteStream configuredStream) throws Exception; } diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/writer/ProductionWriterFactory.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/writer/ProductionWriterFactory.java index c4033098c4b2e..a18dddfe98a70 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/writer/ProductionWriterFactory.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/main/java/io/airbyte/integrations/destination/azure_blob_storage/writer/ProductionWriterFactory.java @@ -20,21 +20,18 @@ public class ProductionWriterFactory implements AzureBlobStorageWriterFactory { @Override public AzureBlobStorageWriter create(final AzureBlobStorageDestinationConfig config, final AppendBlobClient appendBlobClient, - final ConfiguredAirbyteStream configuredStream, - final boolean isNewlyCreatedBlob) + final ConfiguredAirbyteStream configuredStream) throws Exception { final AzureBlobStorageFormat format = config.getFormatConfig().getFormat(); if (format == AzureBlobStorageFormat.CSV) { LOGGER.debug("Picked up CSV format writer"); - return new AzureBlobStorageCsvWriter(config, appendBlobClient, configuredStream, - isNewlyCreatedBlob); + return new AzureBlobStorageCsvWriter(config, appendBlobClient, configuredStream); } if (format == AzureBlobStorageFormat.JSONL) { LOGGER.debug("Picked up JSONL format writer"); - return new AzureBlobStorageJsonlWriter(config, appendBlobClient, configuredStream, - isNewlyCreatedBlob); + return new AzureBlobStorageJsonlWriter(config, appendBlobClient, configuredStream); } throw new RuntimeException("Unexpected AzureBlobStorage destination format: " + format); diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageCsvDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageCsvDestinationAcceptanceTest.java index e1802e4547835..904d55ae44dc0 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageCsvDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageCsvDestinationAcceptanceTest.java @@ -4,13 +4,16 @@ package io.airbyte.integrations.destination.azure_blob_storage; +import com.azure.storage.blob.specialized.AppendBlobClient; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.JavaBaseConstants; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.Reader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; @@ -103,4 +106,27 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, return jsonRecords; } + @Override + protected String getAllSyncedObjects(String streamName) { + try { + final List appendBlobClients = getAppendBlobClient(streamName); + StringBuilder result = new StringBuilder(); + for (AppendBlobClient appendBlobClient : appendBlobClients) { + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + appendBlobClient.download(outputStream); + if (result.isEmpty()) { + result.append(outputStream.toString(StandardCharsets.UTF_8)); + } else { + var stringStream = outputStream.toString(StandardCharsets.UTF_8); + result.append(stringStream.substring(stringStream.indexOf('\n') + 1)); + } + } + LOGGER.info("All objects: " + result); + return result.toString(); + } catch (Exception e) { + LOGGER.error("No blobs were found for stream with name {}.", streamName); + return ""; + } + } + } diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationAcceptanceTest.java index e7894b1a49194..9226e1b0a0253 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageDestinationAcceptanceTest.java @@ -17,9 +17,11 @@ import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; -import java.io.ByteArrayOutputStream; -import java.nio.charset.StandardCharsets; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -67,19 +69,29 @@ protected JsonNode getFailCheckConfig() { /** * Helper method to retrieve all synced objects inside the configured bucket path. */ - @Deprecated - protected String getAllSyncedObjects(final String streamName) { - final AppendBlobClient appendBlobClient = specializedBlobClientBuilder + protected abstract String getAllSyncedObjects(final String streamName); + + protected List getAppendBlobClient(final String streamName) throws Exception { + final AppendBlobClient streamAppendBlobClient = specializedBlobClientBuilder .blobName(streamName) .buildAppendBlobClient(); - final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - appendBlobClient.download(outputStream); - final String result = new String(outputStream.toByteArray(), StandardCharsets.UTF_8); - - LOGGER.info("All objects: " + result); - return result; - + final BlobContainerClient containerClient = streamAppendBlobClient.getContainerClient(); + var blobItemList = StreamSupport.stream(containerClient.listBlobs().spliterator(), false) + .collect(Collectors.toList()); + var filteredBlobList = blobItemList.stream() + .filter(blob -> blob.getName().contains(streamName + "/")).collect(Collectors.toList()); + if (!filteredBlobList.isEmpty()) { + List clobClientList = new ArrayList<>(); + filteredBlobList.forEach(blobItem -> { + clobClientList.add(specializedBlobClientBuilder.blobName(blobItem.getName()).buildAppendBlobClient()); + }); + return clobClientList; + } else { + var errorText = String.format("Can not find blob started with: %s/", streamName); + LOGGER.error(errorText); + throw new Exception(errorText); + } } protected abstract JsonNode getFormatConfig(); diff --git a/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageJsonlDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageJsonlDestinationAcceptanceTest.java index 300683b7f4126..cf7f89903d41b 100644 --- a/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageJsonlDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-azure-blob-storage/src/test-integration/java/io/airbyte/integrations/destination/azure_blob_storage/AzureBlobStorageJsonlDestinationAcceptanceTest.java @@ -4,10 +4,13 @@ package io.airbyte.integrations.destination.azure_blob_storage; +import com.azure.storage.blob.specialized.AppendBlobClient; import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.JavaBaseConstants; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.LinkedList; import java.util.List; @@ -41,4 +44,22 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, return jsonRecords; } + @Override + protected String getAllSyncedObjects(String streamName) { + try { + final List appendBlobClients = getAppendBlobClient(streamName); + StringBuilder result = new StringBuilder(); + for (AppendBlobClient appendBlobClient : appendBlobClients) { + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + appendBlobClient.download(outputStream); + result.append(outputStream.toString(StandardCharsets.UTF_8)); + } + LOGGER.info("All objects: " + result); + return result.toString(); + } catch (Exception e) { + LOGGER.error("No blobs were found for stream with name {}.", streamName); + return ""; + } + } + } diff --git a/docs/integrations/destinations/azureblobstorage.md b/docs/integrations/destinations/azureblobstorage.md index 94809438f453e..13615af56619d 100644 --- a/docs/integrations/destinations/azureblobstorage.md +++ b/docs/integrations/destinations/azureblobstorage.md @@ -137,7 +137,8 @@ They will be like this in the output file: | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| 0.1.1 | 2021-12-29 | [\#5332](https://github.com/airbytehq/airbyte/pull/9190) | Added BufferedOutputStream wrapper to blob output stream to improve performance and fix issues with 50,000 block limit. Also disabled autoflush on PrintWriter. | +| 0.1.2 | 2022-01-20 | [\#9682](https://github.com/airbytehq/airbyte/pull/9682) | Each data synchronization for each stream is written to a new blob to the folder with stream name. | +| 0.1.1 | 2021-12-29 | [\#9190](https://github.com/airbytehq/airbyte/pull/9190) | Added BufferedOutputStream wrapper to blob output stream to improve performance and fix issues with 50,000 block limit. Also disabled autoflush on PrintWriter. | | 0.1.0 | 2021-08-30 | [\#5332](https://github.com/airbytehq/airbyte/pull/5332) | Initial release with JSONL and CSV output. | From c83d3aa21ed35c62c4896f5091f46f1f06341a62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9on=20Stefani?= Date: Mon, 24 Jan 2022 18:16:55 +0100 Subject: [PATCH 205/215] =?UTF-8?q?=F0=9F=90=9B=20Source=20Postgres:=20all?= =?UTF-8?q?ow=20handling=20of=20java=20sql=20date=20(#9554)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: allow handling of java sql date * fix: compilation * #9554 PR * format Co-authored-by: andrii.leonets --- .../debezium/internals/DebeziumConverterUtils.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtils.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtils.java index a97694727ed3d..7e978adbc8904 100644 --- a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtils.java +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtils.java @@ -6,6 +6,7 @@ import io.airbyte.db.DataTypeUtils; import io.debezium.spi.converter.RelationalColumn; +import java.sql.Date; import java.sql.Timestamp; import java.time.Duration; import java.time.LocalDate; @@ -42,6 +43,8 @@ public static String convertDate(final Object input) { } else if (input instanceof Number) { return DataTypeUtils.toISO8601String( new Timestamp(((Number) input).longValue()).toLocalDateTime()); + } else if (input instanceof Date) { + return DataTypeUtils.toISO8601String((Date) input); } else if (input instanceof String) { try { return LocalDateTime.parse((String) input).toString(); From 5e002ad0e5a175d2a3bcd43c3e9d1b7cbb32766a Mon Sep 17 00:00:00 2001 From: Augustin Date: Mon, 24 Jan 2022 18:50:41 +0100 Subject: [PATCH 206/215] link in summary (#9760) --- docs/SUMMARY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 9ed23036a648f..487ab947afdcb 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -31,6 +31,7 @@ * [Transformations with Airbyte (Part 3/3)](operator-guides/transformation-and-normalization/transformations-with-airbyte.md) * [Configuring Airbyte](operator-guides/configuring-airbyte.md) * [Scaling Airbyte](operator-guides/scaling-airbyte.md) + * [Securing Airbyte](operator-guides/securing-airbyte.md) * [Connector Catalog](integrations/README.md) * [Sources](integrations/sources/README.md) * [3PL Central](integrations/sources/tplcentral.md) From 0eb36887339d26602a1aef8b9e6a47fbb093d820 Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Mon, 24 Jan 2022 09:50:57 -0800 Subject: [PATCH 207/215] Reset error (#9761) --- .../temporal/scheduling/ConnectionManagerWorkflowImpl.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java index d07189309050c..653610ba2c0b4 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java @@ -84,7 +84,7 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr // Job and attempt creation maybeJobId = Optional.ofNullable(connectionUpdaterInput.getJobId()).or(() -> { final JobCreationOutput jobCreationOutput = jobCreationAndStatusUpdateActivity.createNewJob(new JobCreationInput( - connectionUpdaterInput.getConnectionId(), workflowState.isResetConnection())); + connectionUpdaterInput.getConnectionId(), connectionUpdaterInput.isResetConnection())); connectionUpdaterInput.setJobId(jobCreationOutput.getJobId()); return Optional.ofNullable(jobCreationOutput.getJobId()); }); @@ -100,7 +100,7 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr final SyncInput getSyncInputActivitySyncInput = new SyncInput( maybeAttemptId.get(), maybeJobId.get(), - workflowState.isResetConnection()); + connectionUpdaterInput.isResetConnection()); jobCreationAndStatusUpdateActivity.reportJobStart(new ReportJobStartInput( maybeJobId.get())); @@ -146,7 +146,7 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr // The naming is very misleading, it is not a failure but the expected behavior... } - if (workflowState.isResetConnection()) { + if (connectionUpdaterInput.isResetConnection()) { connectionUpdaterInput.setResetConnection(true); connectionUpdaterInput.setJobId(null); connectionUpdaterInput.setAttemptNumber(1); From 896c1178aa9568a3ea41cb7bda9485b3cd23ee0b Mon Sep 17 00:00:00 2001 From: Octavia Squidington III <90398440+octavia-squidington-iii@users.noreply.github.com> Date: Tue, 25 Jan 2022 02:52:59 +0800 Subject: [PATCH 208/215] Bump Airbyte version from 0.35.7-alpha to 0.35.8-alpha (#9762) Co-authored-by: benmoriceau --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-bootloader/Dockerfile | 4 ++-- .../src/main/resources/seed/destination_specs.yaml | 4 ++-- airbyte-container-orchestrator/Dockerfile | 6 +++--- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 4 ++-- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 10 +++++----- charts/airbyte/values.yaml | 10 +++++----- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 12 ++++++------ kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 12 ++++++------ 18 files changed, 44 insertions(+), 44 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5595158a9dc05..84b285504634c 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.35.7-alpha +current_version = 0.35.8-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index c372b8e1b765e..b903581c1cd03 100644 --- a/.env +++ b/.env @@ -10,7 +10,7 @@ ### SHARED ### -VERSION=0.35.7-alpha +VERSION=0.35.8-alpha # When using the airbyte-db via default docker image CONFIG_ROOT=/data diff --git a/airbyte-bootloader/Dockerfile b/airbyte-bootloader/Dockerfile index 9ebbe745a1878..ad039c93e42ab 100644 --- a/airbyte-bootloader/Dockerfile +++ b/airbyte-bootloader/Dockerfile @@ -5,6 +5,6 @@ ENV APPLICATION airbyte-bootloader WORKDIR /app -ADD bin/${APPLICATION}-0.35.7-alpha.tar /app +ADD bin/${APPLICATION}-0.35.8-alpha.tar /app -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}"] diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 3e340bcd59476..f26e3e7e9fd33 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -1,7 +1,7 @@ # This file is generated by io.airbyte.config.specs.SeedConnectorSpecGenerator. # Do NOT edit this file directly. See generator class for more details. --- -- dockerImage: "airbyte/destination-azure-blob-storage:0.1.2" +- dockerImage: "airbyte/destination-azure-blob-storage:0.1.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/azureblobstorage" connectionSpecification: @@ -3786,7 +3786,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-snowflake:0.4.3" +- dockerImage: "airbyte/destination-snowflake:0.4.4" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" connectionSpecification: diff --git a/airbyte-container-orchestrator/Dockerfile b/airbyte-container-orchestrator/Dockerfile index b7bf4ac54676f..a394390bf96ee 100644 --- a/airbyte-container-orchestrator/Dockerfile +++ b/airbyte-container-orchestrator/Dockerfile @@ -26,12 +26,12 @@ RUN echo "deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] htt RUN apt-get update && apt-get install -y kubectl ENV APPLICATION airbyte-container-orchestrator -ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}" +ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}" WORKDIR /app # Move orchestrator app -ADD bin/${APPLICATION}-0.35.7-alpha.tar /app +ADD bin/${APPLICATION}-0.35.8-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index cfa671ee239f9..65cdd0b38881c 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD bin/${APPLICATION}-0.35.7-alpha.tar /app +ADD bin/${APPLICATION}-0.35.8-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 97653e2aa38f4..9e0c5f54c4f91 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD bin/${APPLICATION}-0.35.7-alpha.tar /app +ADD bin/${APPLICATION}-0.35.8-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index f1248b4444084..05f89d6a44e38 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,12 +1,12 @@ { "name": "airbyte-webapp", - "version": "0.35.7-alpha", + "version": "0.35.8-alpha", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "airbyte-webapp", - "version": "0.35.7-alpha", + "version": "0.35.8-alpha", "dependencies": { "@fortawesome/fontawesome-svg-core": "^1.2.36", "@fortawesome/free-brands-svg-icons": "^5.15.4", diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 040acc995a195..a179c3c2dabd7 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.35.7-alpha", + "version": "0.35.8-alpha", "private": true, "engines": { "node": ">=16.0.0" diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index ecda6a937e4e7..15a5819bfe808 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -30,7 +30,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD bin/${APPLICATION}-0.35.7-alpha.tar /app +ADD bin/${APPLICATION}-0.35.8-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.7-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index 987d6f3186bf3..cc4d37816f0bb 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.35.7-alpha" +appVersion: "0.35.8-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 8db97e10a1b7f..653f542ce129f 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.7-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.8-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -55,7 +55,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.7-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.8-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -86,7 +86,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.7-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.8-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -120,7 +120,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.7-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.8-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -148,7 +148,7 @@ | ----------------------------- | -------------------------------------------------------------------- | -------------------- | | `bootloader.image.repository` | The repository to use for the airbyte bootloader image. | `airbyte/bootloader` | | `bootloader.image.pullPolicy` | the pull policy to use for the airbyte bootloader image | `IfNotPresent` | -| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.7-alpha` | +| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.8-alpha` | ### Temporal parameters diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 33518de80fad1..ccaf10623fead 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -43,7 +43,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.35.7-alpha + tag: 0.35.8-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -140,7 +140,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.35.7-alpha + tag: 0.35.8-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -245,7 +245,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.35.7-alpha + tag: 0.35.8-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -357,7 +357,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.35.7-alpha + tag: 0.35.8-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## @@ -446,7 +446,7 @@ bootloader: image: repository: airbyte/bootloader pullPolicy: IfNotPresent - tag: 0.35.7-alpha + tag: 0.35.8-alpha ## @section Temporal parameters ## TODO: Move to consuming temporal from a dedicated helm chart diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 3bf2ff704facf..c6879eb41d84e 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -101,7 +101,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.35.7-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.35.8-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index c964b7994200f..8883175c01686 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.7-alpha +AIRBYTE_VERSION=0.35.8-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 87a3aaf8f7b44..ee6c3cb032236 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: airbyte/bootloader - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: airbyte/scheduler - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: airbyte/server - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: airbyte/webapp - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: airbyte/worker - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index c964b7994200f..8883175c01686 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.7-alpha +AIRBYTE_VERSION=0.35.8-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 02e8f673497b5..5d3074db1bb53 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: airbyte/bootloader - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: airbyte/scheduler - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: airbyte/server - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: airbyte/webapp - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: airbyte/worker - newTag: 0.35.7-alpha + newTag: 0.35.8-alpha - name: temporalio/auto-setup newTag: 1.7.0 From d76fe1eb21e607ac14df47130a8a9f7e453943c7 Mon Sep 17 00:00:00 2001 From: Serhii Lazebnyi <53845333+lazebnyi@users.noreply.github.com> Date: Mon, 24 Jan 2022 21:52:52 +0200 Subject: [PATCH 209/215] =?UTF-8?q?=F0=9F=8E=89=20Source=20Shopify:=20Add?= =?UTF-8?q?=20privileges=20validation=20(#9648)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add handling of scroll param when it expired * Updated PR number * Fix typo in docs * Add unittest * Updated scroll or standard switch mechanism * Updated to linters * Updated spec.yaml and defenitions * Added privileges validation * Updated to linter * Updated PR number * Updated to review * Bumped docker version * Bumped seed version --- .../9da77001-af33-4bcd-be46-6252bf9342b9.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-shopify/Dockerfile | 2 +- .../source-shopify/source_shopify/source.py | 50 ++++++++++++------- .../source-shopify/source_shopify/utils.py | 15 +++++- .../source-shopify/unit_tests/conftest.py | 35 +++++++++++++ .../source-shopify/unit_tests/unit_test.py | 11 +++- docs/integrations/sources/shopify.md | 1 + 9 files changed, 94 insertions(+), 26 deletions(-) create mode 100644 airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json index 94a52b10f6171..f7c25e91b97fb 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "9da77001-af33-4bcd-be46-6252bf9342b9", "name": "Shopify", "dockerRepository": "airbyte/source-shopify", - "dockerImageTag": "0.1.28", + "dockerImageTag": "0.1.30", "documentationUrl": "https://docs.airbyte.io/integrations/sources/shopify", "icon": "shopify.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 429326d3775f4..129235fd48462 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -655,7 +655,7 @@ - name: Shopify sourceDefinitionId: 9da77001-af33-4bcd-be46-6252bf9342b9 dockerRepository: airbyte/source-shopify - dockerImageTag: 0.1.29 + dockerImageTag: 0.1.30 documentationUrl: https://docs.airbyte.io/integrations/sources/shopify icon: shopify.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 6497c5b22b701..ea94882e55bff 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6931,7 +6931,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-shopify:0.1.29" +- dockerImage: "airbyte/source-shopify:0.1.30" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/shopify" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-shopify/Dockerfile b/airbyte-integrations/connectors/source-shopify/Dockerfile index 88b6c8ef31047..8bc56821e972e 100644 --- a/airbyte-integrations/connectors/source-shopify/Dockerfile +++ b/airbyte-integrations/connectors/source-shopify/Dockerfile @@ -28,5 +28,5 @@ COPY source_shopify ./source_shopify ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.29 +LABEL io.airbyte.version=0.1.30 LABEL io.airbyte.name=airbyte/source-shopify diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/source.py b/airbyte-integrations/connectors/source-shopify/source_shopify/source.py index 2ce5fb0ce9d62..db54e5e234328 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/source.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/source.py @@ -15,12 +15,12 @@ from .auth import ShopifyAuthenticator from .transform import DataTypeEnforcer +from .utils import SCOPES_MAPPING from .utils import EagerlyCachedStreamState as stream_state_cache from .utils import ShopifyRateLimiter as limiter class ShopifyStream(HttpStream, ABC): - # Latest Stable Release api_version = "2021-07" # Page size @@ -140,7 +140,6 @@ def request_params( class ChildSubstream(IncrementalShopifyStream): - """ ChildSubstream - provides slicing functionality for streams using parts of data from parent stream. For example: @@ -251,7 +250,6 @@ def path(self, **kwargs) -> str: class Collects(IncrementalShopifyStream): - """ Collects stream does not support Incremental Refresh based on datetime fields, only `since_id` is supported: https://shopify.dev/docs/admin-api/rest/reference/products/collect @@ -284,7 +282,6 @@ def request_params( class OrderRefunds(ChildSubstream): - parent_stream_class: object = Orders slice_key = "order_id" @@ -299,7 +296,6 @@ def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: class OrderRisks(ChildSubstream): - parent_stream_class: object = Orders slice_key = "order_id" @@ -315,7 +311,6 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late class Transactions(ChildSubstream): - parent_stream_class: object = Orders slice_key = "order_id" @@ -342,7 +337,6 @@ def path(self, **kwargs) -> str: class DiscountCodes(ChildSubstream): - parent_stream_class: object = PriceRules slice_key = "price_rule_id" @@ -354,7 +348,6 @@ def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: class Locations(ShopifyStream): - """ The location API does not support any form of filtering. https://shopify.dev/api/admin-rest/2021-07/resources/location @@ -390,7 +383,6 @@ def generate_key(record): class InventoryItems(ChildSubstream): - parent_stream_class: object = Products slice_key = "id" nested_record = "variants" @@ -398,13 +390,11 @@ class InventoryItems(ChildSubstream): data_field = "inventory_items" def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: - ids = ",".join(str(x[self.nested_record_field_name]) for x in stream_slice[self.slice_key]) return f"inventory_items.json?ids={ids}" class FulfillmentOrders(ChildSubstream): - parent_stream_class: object = Orders slice_key = "order_id" @@ -421,7 +411,6 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late class Fulfillments(ChildSubstream): - parent_stream_class: object = Orders slice_key = "order_id" @@ -441,29 +430,39 @@ def path(self, **kwargs) -> str: class SourceShopify(AbstractSource): def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, any]: - """ Testing connection availability for the connector. """ config["authenticator"] = ShopifyAuthenticator(config) try: - responce = list(Shop(config).read_records(sync_mode=None)) - # check for the shop_id is present in the responce - shop_id = responce[0].get("id") + response = list(Shop(config).read_records(sync_mode=None)) + # check for the shop_id is present in the response + shop_id = response[0].get("id") if shop_id is not None: return True, None - except requests.exceptions.RequestException as e: + except (requests.exceptions.RequestException, IndexError) as e: return False, e def streams(self, config: Mapping[str, Any]) -> List[Stream]: - """ Mapping a input config of the user input configuration as defined in the connector spec. Defining streams to run. """ config["authenticator"] = ShopifyAuthenticator(config) - return [ + user_scopes = self.get_user_scopes(config) + + always_permitted_streams = ["Metafields", "Shop"] + + permitted_streams = [ + stream + for user_scope in user_scopes + if user_scope["handle"] in SCOPES_MAPPING + for stream in SCOPES_MAPPING.get(user_scope["handle"]) + ] + always_permitted_streams + + # before adding stream to stream_instances list, please add it to SCOPES_MAPPING + stream_instances = [ Customers(config), Orders(config), DraftOrders(config), @@ -485,3 +484,16 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: Fulfillments(config), Shop(config), ] + + return [stream_instance for stream_instance in stream_instances if self.format_name(stream_instance.name) in permitted_streams] + + @staticmethod + def get_user_scopes(config): + session = requests.Session() + headers = config["authenticator"].get_auth_header() + response = session.get(f"https://{config['shop']}.myshopify.com/admin/oauth/access_scopes.json", headers=headers).json() + return response["access_scopes"] + + @staticmethod + def format_name(name): + return "".join(x.capitalize() for x in name.split("_")) diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py b/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py index d0c5e7d4b5401..e23813b90924b 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py @@ -9,9 +9,21 @@ import requests +SCOPES_MAPPING = { + "read_customers": ["Customers"], + "read_orders": ["Orders", "AbandonedCheckouts", "Transactions", "Fulfillments", "OrderRefunds", "OrderRisks"], + "read_draft_orders": ["DraftOrders"], + "read_products": ["Products", "CustomCollections", "Collects"], + "read_content": ["Pages"], + "read_price_rules": ["PriceRules"], + "read_discounts": ["DiscountCodes"], + "read_locations": ["Locations"], + "read_inventory": ["InventoryItems", "InventoryLevels"], + "read_merchant_managed_fulfillment_orders": ["FulfillmentOrders"], +} -class ShopifyRateLimiter: +class ShopifyRateLimiter: """ Define timings for RateLimits. Adjust timings if needed. @@ -92,7 +104,6 @@ def wrapper_balance_rate_limit(*args, **kwargs): class EagerlyCachedStreamState: - """ This is the placeholder for the tmp stream state for each incremental stream, It's empty, once the sync has started and is being updated while sync operation takes place, diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py new file mode 100644 index 0000000000000..740807ebc8939 --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/conftest.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.models import AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode, SyncMode + + +@pytest.fixture +def logger(): + return AirbyteLogger() + + +@pytest.fixture +def basic_config(): + return {"shop": "test_shop", "credentials": {"auth_method": "api_password", "api_password": "api_password"}} + + +@pytest.fixture +def catalog_with_streams(): + def _catalog_with_streams(names): + streams = [] + for name in names: + streams.append( + ConfiguredAirbyteStream( + stream=AirbyteStream(name=name, json_schema={"type": "object"}), + sync_mode=SyncMode.full_refresh, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + ) + return ConfiguredAirbyteCatalog(streams=streams) + + return _catalog_with_streams diff --git a/airbyte-integrations/connectors/source-shopify/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-shopify/unit_tests/unit_test.py index 2dae07ef5c19f..96109cc4bc858 100644 --- a/airbyte-integrations/connectors/source-shopify/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-shopify/unit_tests/unit_test.py @@ -3,7 +3,7 @@ # import requests -from source_shopify.source import ShopifyStream +from source_shopify.source import ShopifyStream, SourceShopify def test_get_next_page_token(requests_mock): @@ -25,3 +25,12 @@ def test_get_next_page_token(requests_mock): test = ShopifyStream.next_page_token(response) assert test == expected_output_token + + +def test_privileges_validation(requests_mock, basic_config): + requests_mock.get("https://test_shop.myshopify.com/admin/oauth/access_scopes.json", json={"access_scopes": [{"handle": "read_orders"}]}) + source = SourceShopify() + + expected = ["orders", "abandoned_checkouts", "metafields", "order_refunds", "order_risks", "transactions", "fulfillments", "shop"] + + assert [stream.name for stream in source.streams(basic_config)] == expected diff --git a/docs/integrations/sources/shopify.md b/docs/integrations/sources/shopify.md index b9dced7ffb2e1..becfc749a5ea0 100644 --- a/docs/integrations/sources/shopify.md +++ b/docs/integrations/sources/shopify.md @@ -101,6 +101,7 @@ This connector support both: `OAuth 2.0` and `API PASSWORD` (for private applica | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.30 | 2021-01-24 | [9648](https://github.com/airbytehq/airbyte/pull/9648) | Added permission validation before sync | | 0.1.29 | 2022-01-20 | [9049](https://github.com/airbytehq/airbyte/pull/9248) | Added `shop_url` to the record for all streams | | 0.1.28 | 2022-01-19 | [9591](https://github.com/airbytehq/airbyte/pull/9591) | Implemented `OAuth2.0` authentication method for Airbyte Cloud | | 0.1.27 | 2021-12-22 | [9049](https://github.com/airbytehq/airbyte/pull/9049) | Update connector fields title/description | From ded87beb6d9748c287162e54a81d1bf39831cea9 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Mon, 24 Jan 2022 12:02:32 -0800 Subject: [PATCH 210/215] use path for GOOGLE_APPLICATION_CREDENTIALS (#9763) --- .github/workflows/gradle.yml | 12 ++++++------ .../config/storage/DefaultGcsClientFactory.java | 5 +++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index c5c53c331a1b9..f0b24c126e75d 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -566,6 +566,12 @@ jobs: - name: Build Platform Docker Images run: SUB_BUILD=PLATFORM ./gradlew assemble --scan + - name: Run Logging Tests + run: ./tools/bin/cloud_storage_logging_test.sh + env: + AWS_S3_INTEGRATION_TEST_CREDS: ${{ secrets.AWS_S3_INTEGRATION_TEST_CREDS }} + GOOGLE_CLOUD_STORAGE_TEST_CREDS: ${{ secrets.GOOGLE_CLOUD_STORAGE_TEST_CREDS }} + - name: Run Kubernetes End-to-End Acceptance Tests env: USER: root @@ -582,12 +588,6 @@ jobs: name: Kubernetes Logs path: /tmp/kubernetes_logs/* - - name: Run Logging Tests - run: ./tools/bin/cloud_storage_logging_test.sh - env: - AWS_S3_INTEGRATION_TEST_CREDS: ${{ secrets.AWS_S3_INTEGRATION_TEST_CREDS }} - GOOGLE_CLOUD_STORAGE_TEST_CREDS: ${{ secrets.GOOGLE_CLOUD_STORAGE_TEST_CREDS }} - - name: Show Disk Usage run: | df -h diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultGcsClientFactory.java b/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultGcsClientFactory.java index dbd4b7b6f3e4e..381b4ee003311 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultGcsClientFactory.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/storage/DefaultGcsClientFactory.java @@ -10,7 +10,8 @@ import com.google.cloud.storage.StorageOptions; import io.airbyte.config.storage.CloudStorageConfigs.GcsConfig; import java.io.ByteArrayInputStream; -import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.function.Supplier; /** @@ -34,7 +35,7 @@ private static void validate(final GcsConfig config) { @Override public Storage get() { try { - final var credentialsByteStream = new ByteArrayInputStream(config.getGoogleApplicationCredentials().getBytes(StandardCharsets.UTF_8)); + final var credentialsByteStream = new ByteArrayInputStream(Files.readAllBytes(Path.of(config.getGoogleApplicationCredentials()))); final var credentials = ServiceAccountCredentials.fromStream(credentialsByteStream); return StorageOptions.newBuilder().setCredentials(credentials).build().getService(); } catch (Exception e) { From f5b5feaae57dd9b7daf80e695cf593132b6c5ba9 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Mon, 24 Jan 2022 12:33:51 -0800 Subject: [PATCH 211/215] fix attempt output persistence for schedulerv2 (#9764) * fix attempt output persistence for schedulerv2 * fix test * add test for failure case pushing output * fix comment --- .../scheduling/ConnectionManagerWorkflowImpl.java | 3 ++- .../activities/JobCreationAndStatusUpdateActivity.java | 1 + .../JobCreationAndStatusUpdateActivityImpl.java | 10 ++++++++-- .../JobCreationAndStatusUpdateActivityTest.java | 8 +++++--- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java index 653610ba2c0b4..ae6a25aa7dce0 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java @@ -195,7 +195,8 @@ private void reportSuccess(final ConnectionUpdaterInput connectionUpdaterInput) private void reportFailure(final ConnectionUpdaterInput connectionUpdaterInput) { jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput( connectionUpdaterInput.getJobId(), - connectionUpdaterInput.getAttemptId())); + connectionUpdaterInput.getAttemptId(), + standardSyncOutput.orElse(null))); final int maxAttempt = configFetchActivity.getMaxAttempt().getMaxAttempt(); final int attemptNumber = connectionUpdaterInput.getAttemptNumber(); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java index 385f952eb56a8..33df7a447919d 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivity.java @@ -111,6 +111,7 @@ class AttemptFailureInput { private long jobId; private int attemptId; + private StandardSyncOutput standardSyncOutput; } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java index bfd122a8d8089..04238bbf7188d 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityImpl.java @@ -112,7 +112,7 @@ public void jobSuccess(final JobSuccessInput input) { final JobOutput jobOutput = new JobOutput().withSync(input.getStandardSyncOutput()); jobPersistence.writeOutput(input.getJobId(), input.getAttemptId(), jobOutput); } else { - log.warn("The job {} doesn't have an input for the attempt {}", input.getJobId(), input.getAttemptId()); + log.warn("The job {} doesn't have any output for the attempt {}", input.getJobId(), input.getAttemptId()); } jobPersistence.succeedAttempt(input.getJobId(), input.getAttemptId()); final Job job = jobPersistence.getJob(input.getJobId()); @@ -139,7 +139,13 @@ public void jobFailure(final JobFailureInput input) { public void attemptFailure(final AttemptFailureInput input) { try { jobPersistence.failAttempt(input.getJobId(), input.getAttemptId()); - final Job job = jobPersistence.getJob(input.getJobId()); + + if (input.getStandardSyncOutput() != null) { + final JobOutput jobOutput = new JobOutput().withSync(input.getStandardSyncOutput()); + jobPersistence.writeOutput(input.getJobId(), input.getAttemptId(), jobOutput); + } else { + log.warn("The job {} doesn't have any output for the attempt {}", input.getJobId(), input.getAttemptId()); + } } catch (final IOException e) { throw new RetryableException(e); } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java index 6cb059dee1187..906124a633b08 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/JobCreationAndStatusUpdateActivityTest.java @@ -77,6 +77,8 @@ public class JobCreationAndStatusUpdateActivityTest { new StandardSyncSummary() .withStatus(ReplicationStatus.COMPLETED)); + private static final JobOutput jobOutput = new JobOutput().withSync(standardSyncOutput); + @Nested class Creation { @@ -147,7 +149,6 @@ class Update { @Test public void setJobSuccess() throws IOException { jobCreationAndStatusUpdateActivity.jobSuccess(new JobSuccessInput(JOB_ID, ATTEMPT_ID, standardSyncOutput)); - final JobOutput jobOutput = new JobOutput().withSync(standardSyncOutput); Mockito.verify(mJobPersistence).writeOutput(JOB_ID, ATTEMPT_ID, jobOutput); Mockito.verify(mJobPersistence).succeedAttempt(JOB_ID, ATTEMPT_ID); @@ -185,9 +186,10 @@ public void setJobFailureWrapException() throws IOException { @Test public void setAttemptFailure() throws IOException { - jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_ID)); + jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_ID, standardSyncOutput)); Mockito.verify(mJobPersistence).failAttempt(JOB_ID, ATTEMPT_ID); + Mockito.verify(mJobPersistence).writeOutput(JOB_ID, ATTEMPT_ID, jobOutput); } @Test @@ -195,7 +197,7 @@ public void setAttemptFailureWrapException() throws IOException { Mockito.doThrow(new IOException()) .when(mJobPersistence).failAttempt(JOB_ID, ATTEMPT_ID); - Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_ID))) + Assertions.assertThatThrownBy(() -> jobCreationAndStatusUpdateActivity.attemptFailure(new AttemptFailureInput(JOB_ID, ATTEMPT_ID, null))) .isInstanceOf(RetryableException.class) .hasCauseInstanceOf(IOException.class); } From 7e25fddd6824fa1d2c8c216b294bc21a2baa1331 Mon Sep 17 00:00:00 2001 From: Octavia Squidington III <90398440+octavia-squidington-iii@users.noreply.github.com> Date: Tue, 25 Jan 2022 05:19:11 +0800 Subject: [PATCH 212/215] Bump Airbyte version from 0.35.8-alpha to 0.35.9-alpha (#9765) Co-authored-by: benmoriceau --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-bootloader/Dockerfile | 4 ++-- airbyte-container-orchestrator/Dockerfile | 6 +++--- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 4 ++-- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 10 +++++----- charts/airbyte/values.yaml | 10 +++++----- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 12 ++++++------ kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 12 ++++++------ 17 files changed, 42 insertions(+), 42 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 84b285504634c..c70e5f242943d 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.35.8-alpha +current_version = 0.35.9-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index b903581c1cd03..3f72c73c31e1c 100644 --- a/.env +++ b/.env @@ -10,7 +10,7 @@ ### SHARED ### -VERSION=0.35.8-alpha +VERSION=0.35.9-alpha # When using the airbyte-db via default docker image CONFIG_ROOT=/data diff --git a/airbyte-bootloader/Dockerfile b/airbyte-bootloader/Dockerfile index ad039c93e42ab..b573edc990369 100644 --- a/airbyte-bootloader/Dockerfile +++ b/airbyte-bootloader/Dockerfile @@ -5,6 +5,6 @@ ENV APPLICATION airbyte-bootloader WORKDIR /app -ADD bin/${APPLICATION}-0.35.8-alpha.tar /app +ADD bin/${APPLICATION}-0.35.9-alpha.tar /app -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.9-alpha/bin/${APPLICATION}"] diff --git a/airbyte-container-orchestrator/Dockerfile b/airbyte-container-orchestrator/Dockerfile index a394390bf96ee..60022b06d8aaa 100644 --- a/airbyte-container-orchestrator/Dockerfile +++ b/airbyte-container-orchestrator/Dockerfile @@ -26,12 +26,12 @@ RUN echo "deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] htt RUN apt-get update && apt-get install -y kubectl ENV APPLICATION airbyte-container-orchestrator -ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}" +ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.9-alpha/bin/${APPLICATION}" WORKDIR /app # Move orchestrator app -ADD bin/${APPLICATION}-0.35.8-alpha.tar /app +ADD bin/${APPLICATION}-0.35.9-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.9-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 65cdd0b38881c..1a5638c99153b 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD bin/${APPLICATION}-0.35.8-alpha.tar /app +ADD bin/${APPLICATION}-0.35.9-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.9-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 9e0c5f54c4f91..7622a7553eee3 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD bin/${APPLICATION}-0.35.8-alpha.tar /app +ADD bin/${APPLICATION}-0.35.9-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.9-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 05f89d6a44e38..1377db9267439 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,12 +1,12 @@ { "name": "airbyte-webapp", - "version": "0.35.8-alpha", + "version": "0.35.9-alpha", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "airbyte-webapp", - "version": "0.35.8-alpha", + "version": "0.35.9-alpha", "dependencies": { "@fortawesome/fontawesome-svg-core": "^1.2.36", "@fortawesome/free-brands-svg-icons": "^5.15.4", diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index a179c3c2dabd7..92e7f7dd7db7f 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.35.8-alpha", + "version": "0.35.9-alpha", "private": true, "engines": { "node": ">=16.0.0" diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 15a5819bfe808..6b09a6e09e6c1 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -30,7 +30,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD bin/${APPLICATION}-0.35.8-alpha.tar /app +ADD bin/${APPLICATION}-0.35.9-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.8-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.9-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index cc4d37816f0bb..183716ebfc765 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.35.8-alpha" +appVersion: "0.35.9-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 653f542ce129f..907b7ed482da2 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.8-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.9-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -55,7 +55,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.8-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.9-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -86,7 +86,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.8-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.9-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -120,7 +120,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.8-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.9-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -148,7 +148,7 @@ | ----------------------------- | -------------------------------------------------------------------- | -------------------- | | `bootloader.image.repository` | The repository to use for the airbyte bootloader image. | `airbyte/bootloader` | | `bootloader.image.pullPolicy` | the pull policy to use for the airbyte bootloader image | `IfNotPresent` | -| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.8-alpha` | +| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.9-alpha` | ### Temporal parameters diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index ccaf10623fead..51bd00b5760a3 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -43,7 +43,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.35.8-alpha + tag: 0.35.9-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -140,7 +140,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.35.8-alpha + tag: 0.35.9-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -245,7 +245,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.35.8-alpha + tag: 0.35.9-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -357,7 +357,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.35.8-alpha + tag: 0.35.9-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## @@ -446,7 +446,7 @@ bootloader: image: repository: airbyte/bootloader pullPolicy: IfNotPresent - tag: 0.35.8-alpha + tag: 0.35.9-alpha ## @section Temporal parameters ## TODO: Move to consuming temporal from a dedicated helm chart diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index c6879eb41d84e..67b8514de8b81 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -101,7 +101,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.35.8-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.35.9-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index 8883175c01686..07a059ad694a6 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.8-alpha +AIRBYTE_VERSION=0.35.9-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index ee6c3cb032236..1c0e8ededf1e0 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: airbyte/bootloader - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: airbyte/scheduler - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: airbyte/server - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: airbyte/webapp - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: airbyte/worker - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index 8883175c01686..07a059ad694a6 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.8-alpha +AIRBYTE_VERSION=0.35.9-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 5d3074db1bb53..1d18f0b2b5159 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: airbyte/bootloader - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: airbyte/scheduler - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: airbyte/server - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: airbyte/webapp - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: airbyte/worker - newTag: 0.35.8-alpha + newTag: 0.35.9-alpha - name: temporalio/auto-setup newTag: 1.7.0 From f5beea1c81ab98933d815ee98262284469ffd2ba Mon Sep 17 00:00:00 2001 From: Brian Leonard Date: Mon, 24 Jan 2022 16:07:38 -0800 Subject: [PATCH 213/215] Documentation: postgres version update for creating index (#9766) --- docs/integrations/destinations/postgres.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/destinations/postgres.md b/docs/integrations/destinations/postgres.md index c881c06be334a..6e12c9548c16c 100644 --- a/docs/integrations/destinations/postgres.md +++ b/docs/integrations/destinations/postgres.md @@ -27,7 +27,7 @@ Airbyte Cloud only supports connecting to your Postgres instance with SSL or TLS To use the Postgres destination, you'll need: -* A Postgres server version 9.4 or above +* A Postgres server version 9.5 or above #### Configure Network Access From d616279bea74bdc78eb97955629cd84c138fe5ec Mon Sep 17 00:00:00 2001 From: Vladimir remar Date: Tue, 25 Jan 2022 03:06:27 +0100 Subject: [PATCH 214/215] Source hubspot: add Feedback Submissions stream and update Ticket Pipelines stream (#9081) * update: on api.py update url for ticket_pipelines * update: on client.py add new entries for feedback_submissions streams and ticket_pipelines * update: add to shchemas feedback_submissions and ticket_pipelines * update: to integration_tests_abnormal_state add new value for feedback_submissions * update: in sample_files add new entries for feedback_submissions and ticket_pipelines * update: bump connector version * fix: in feedback submission replace timestamp to updatedAt * update version in source_definitons and Dockerfile * run format * config seed * bump connector version in config file Co-authored-by: Marcos Marx --- .../36c891d9-4bd9-43ac-bad2-10e12756272c.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-hubspot/Dockerfile | 2 +- .../source-hubspot/acceptance-test-config.yml | 4 +- .../integration_tests/abnormal_state.json | 3 + .../sample_files/configured_catalog.json | 21 +++ .../sample_files/full_refresh_catalog.json | 9 ++ .../source-hubspot/source_hubspot/api.py | 4 +- .../source-hubspot/source_hubspot/client.py | 3 + .../schemas/feedback_submissions.json | 122 ++++++++++++++++++ .../schemas/ticket_pipelines.json | 58 +++++++++ docs/integrations/sources/hubspot.md | 5 +- 13 files changed, 227 insertions(+), 10 deletions(-) create mode 100644 airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/feedback_submissions.json create mode 100644 airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/ticket_pipelines.json diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json index 28595536e48b4..6b470bcf188cb 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "36c891d9-4bd9-43ac-bad2-10e12756272c", "name": "HubSpot", "dockerRepository": "airbyte/source-hubspot", - "dockerImageTag": "0.1.33", + "dockerImageTag": "0.1.35", "documentationUrl": "https://docs.airbyte.io/integrations/sources/hubspot", "icon": "hubspot.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 129235fd48462..76a477c2a6350 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -307,7 +307,7 @@ - name: HubSpot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot - dockerImageTag: 0.1.34 + dockerImageTag: 0.1.35 documentationUrl: https://docs.airbyte.io/integrations/sources/hubspot icon: hubspot.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index ea94882e55bff..60787d4cd33cb 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3057,7 +3057,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-hubspot:0.1.34" +- dockerImage: "airbyte/source-hubspot:0.1.35" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index 572784f1391ba..5684beb2d00b6 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -34,5 +34,5 @@ COPY source_hubspot ./source_hubspot ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.34 +LABEL io.airbyte.version=0.1.35 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml b/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml index 7a320596a7ca1..b7aef4adb1f50 100644 --- a/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml @@ -18,14 +18,14 @@ tests: basic_read: - config_path: "secrets/config.json" configured_catalog_path: "sample_files/full_refresh_catalog.json" - empty_streams: ["workflows", "form_submissions"] + empty_streams: ["workflows", "form_submissions", "ticket_pipelines"] - config_path: "secrets/config_oauth.json" configured_catalog_path: "sample_files/configured_catalog_for_oauth_config.json" # The `campaigns` stream is empty in this case, because we use a catalog with # incremental streams: subscription_changes and email_events (it takes a long time to read) # and therefore the start date is set at 2021-10-10 for `config_oauth.json`, # but the campaign was created on 2021-01-11 - empty_streams: ["campaigns", "workflows", "contacts_list_memberships", "form_submissions"] + empty_streams: ["campaigns", "workflows", "contacts_list_memberships", "form_submissions", "ticket_pipelines"] incremental: - config_path: "secrets/config.json" configured_catalog_path: "sample_files/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json index 5cf26f89b23dd..5b3c143e8f645 100644 --- a/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json @@ -14,6 +14,9 @@ "email_events": { "timestamp": "2221-10-12T13:37:56.412000+00:00" }, + "feedback_submissions": { + "updatedAt": "2221-10-12T13:37:56.412000+00:00" + }, "engagements": { "lastUpdated": 7945393076412 }, diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json index c3cbea5bfa4e5..8fb486ae13c4e 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json @@ -90,6 +90,18 @@ "cursor_field": ["lastUpdated"], "destination_sync_mode": "append" }, + { + "stream": { + "name": "feedback_submissions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updatedAt"] + }, + "sync_mode": "incremental", + "cursor_field": ["updatedAt"], + "destination_sync_mode": "append" + }, { "stream": { "name": "forms", @@ -189,6 +201,15 @@ "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, + { + "stream": { + "name": "ticket_pipelines", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, { "stream": { "name": "workflows", diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json b/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json index 98d568d1cc022..76b06a1c7dd24 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/full_refresh_catalog.json @@ -135,6 +135,15 @@ "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, + { + "stream": { + "name": "ticket_pipelines", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, { "stream": { "name": "workflows", diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py index 480a9daea261e..0a366001472a6 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py @@ -781,10 +781,10 @@ class DealPipelineStream(Stream): class TicketPipelineStream(Stream): """Ticket pipelines, API v1 This endpoint requires the tickets scope. - Docs: https://legacydocs.hubspot.com/docs/methods/pipelines/get_pipelines_for_object_type + Docs: https://developers.hubspot.com/docs/api/crm/pipelines """ - url = "/crm-pipelines/v1/pipelines/tickets" + url = "/crm/v3/pipelines/tickets" updated_at_field = "updatedAt" created_at_field = "createdAt" diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py index 80e97632e26e2..6dcffff072bb7 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py @@ -24,6 +24,7 @@ MarketingEmailStream, OwnerStream, SubscriptionChangeStream, + TicketPipelineStream, WorkflowStream, ) @@ -50,6 +51,7 @@ def __init__(self, start_date, credentials, **kwargs): "deals": DealStream(associations=["contacts"], **common_params), "email_events": EmailEventStream(**common_params), "engagements": EngagementStream(**common_params), + "feedback_submissions": CRMObjectIncrementalStream(entity="feedback_submissions", associations=["contacts"], **common_params), "forms": FormStream(**common_params), "form_submissions": FormSubmissionStream(**common_params), "line_items": CRMObjectIncrementalStream(entity="line_item", **common_params), @@ -58,6 +60,7 @@ def __init__(self, start_date, credentials, **kwargs): "products": CRMObjectIncrementalStream(entity="product", **common_params), "subscription_changes": SubscriptionChangeStream(**common_params), "tickets": CRMObjectIncrementalStream(entity="ticket", **common_params), + "ticket_pipelines": TicketPipelineStream(**common_params), "workflows": WorkflowStream(**common_params), } diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/feedback_submissions.json b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/feedback_submissions.json new file mode 100644 index 0000000000000..7606945d0af05 --- /dev/null +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/feedback_submissions.json @@ -0,0 +1,122 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "properties": { + "type": ["null", "object"], + "properties": { + "hs_all_accessible_team_ids": { + "type": ["null", "string"] + }, + "hs_all_assigned_business_unit_ids": { + "type": ["null", "string"] + }, + "hs_contact_email_rollup": { + "type": ["null", "string"] + }, + "hs_contact_id": { + "type": ["null", "string"] + }, + "hs_content": { + "type": ["null", "string"] + }, + "hs_created_by_user_id": { + "type": ["null", "string"] + }, + "hs_createdate": { + "type": ["null", "string"] + }, + "hs_engagement_id": { + "type": ["null", "string"] + }, + "hs_form_guid": { + "type": ["null", "string"] + }, + "hs_ingestion_id": { + "type": ["null", "string"] + }, + "hs_knowledge_article_id": { + "type": ["null", "string"] + }, + "hs_lastmodifieddate": { + "type": ["null", "string"] + }, + "hs_merged_object_ids": { + "type": ["null", "string"] + }, + "hs_object_id": { + "type": ["null", "string"] + }, + "hs_response_group": { + "type": ["null", "string"] + }, + "hs_submission_name": { + "type": ["null", "string"] + }, + "hs_submission_timestamp": { + "type": ["null", "string"] + }, + "hs_submission_url": { + "type": ["null", "string"] + }, + "hs_survey_channel": { + "type": ["null", "string"] + }, + "hs_survey_id": { + "type": ["null", "string"] + }, + "hs_survey_name": { + "type": ["null", "string"] + }, + "hs_survey_type": { + "type": ["null", "string"] + }, + "hs_unique_creation_key": { + "type": ["null", "string"] + }, + "hs_updated_by_user_id": { + "type": ["null", "string"] + }, + "hs_user_ids_of_all_notification_followers": { + "type": ["null", "string"] + }, + "hs_user_ids_of_all_notification_unfollowers": { + "type": ["null", "string"] + }, + "hs_user_ids_of_all_owners": { + "type": ["null", "string"] + }, + "hs_value": { + "type": ["null", "string"] + }, + "hs_visitor_id": { + "type": ["null", "string"] + } + } + }, + "createdAt": { + "type": ["null", "string"], + "format": "date-time" + }, + "updatedAt": { + "type": ["null", "string"], + "format": "date-time" + }, + "archived": { + "type": ["null", "boolean"] + }, + "archivedAt": { + "type": ["null", "string"], + "format": "date-time" + }, + "contacts": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + } + } +} diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/ticket_pipelines.json b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/ticket_pipelines.json new file mode 100644 index 0000000000000..b360fb642613e --- /dev/null +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/ticket_pipelines.json @@ -0,0 +1,58 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": ["null", "object"], + "properties": { + "label": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "string"] + }, + "archived": { + "type": ["null", "boolean"] + }, + "stages": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "label": { + "type": ["null", "string"] + }, + "displayOrder": { + "type": ["null", "integer"] + }, + "metadata": { + "type": ["null", "object"], + "properties": { + "ticketState": { + "type": ["null", "string"] + }, + "isClosed": { + "type": ["null", "string"] + } + } + }, + "id": { + "type": ["null", "string"] + }, + "createdAt": { + "type": ["null", "integer"] + }, + "updatedAt": { + "type": ["null", "integer"] + }, + "active": { + "type": ["null", "boolean"] + } + } + } + }, + "createdAt": { + "type": ["null", "integer"] + }, + "updatedAt": { + "type": ["null", "integer"] + } + } +} diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index 8b12c7e32e2da..1292fa2aff46b 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -110,8 +110,9 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:-----------------------------------------------------------------------------------------------------------------------------------------------| -| 0.1.34 | 2022-01-20 | [9641](https://github.com/airbytehq/airbyte/pull/9641) | Add more fields for `email_events` stream | -| 0.1.33 | 2022-01-14 | [8887](https://github.com/airbytehq/airbyte/pull/8887) | More efficient support for incremental updates on Companies, Contact, Deals and Engagement streams | +| 0.1.35 | 2021-12-24 | [9081](https://github.com/airbytehq/airbyte/pull/9081) | Add Feedback Submissions stream and update Ticket Pipelines stream | +| 0.1.34 | 2022-01-20 | [9641](https://github.com/airbytehq/airbyte/pull/9641) | Add more fields for `email_events` stream | +| 0.1.33 | 2022-01-14 | [8887](https://github.com/airbytehq/airbyte/pull/8887) | More efficient support for incremental updates on Companies, Contact, Deals and Engagement streams | | 0.1.32 | 2022-01-13 | [8011](https://github.com/airbytehq/airbyte/pull/8011) | Add new stream form_submissions | | 0.1.31 | 2022-01-11 | [9385](https://github.com/airbytehq/airbyte/pull/9385) | Remove auto-generated `properties` from `Engagements` stream | | 0.1.30 | 2021-01-10 | [9129](https://github.com/airbytehq/airbyte/pull/9129) | Created Contacts list memberships streams | From 0dfbfdc2e3c37f17f6acb8ca4be6468e04c92799 Mon Sep 17 00:00:00 2001 From: Harshith Mullapudi Date: Tue, 25 Jan 2022 10:30:36 +0530 Subject: [PATCH 215/215] feat: added api to jobs handler to get required debug info (#9703) --- airbyte-api/src/main/openapi/config.yaml | 60 ++++++ .../airbyte/server/apis/ConfigurationApi.java | 9 +- .../server/converters/JobConverter.java | 18 ++ .../server/handlers/JobHistoryHandler.java | 85 ++++++++- .../server/converters/JobConverterTest.java | 33 +++- .../handlers/JobHistoryHandlerTest.java | 94 ++++++++-- .../api/generated-api-html/index.html | 173 ++++++++++++++++++ 7 files changed, 441 insertions(+), 31 deletions(-) diff --git a/airbyte-api/src/main/openapi/config.yaml b/airbyte-api/src/main/openapi/config.yaml index 6086830250f4d..b483fabd537a7 100644 --- a/airbyte-api/src/main/openapi/config.yaml +++ b/airbyte-api/src/main/openapi/config.yaml @@ -1665,6 +1665,29 @@ paths: $ref: "#/components/responses/NotFoundResponse" "422": $ref: "#/components/responses/InvalidInputResponse" + /v1/jobs/get_debug_info: + post: + tags: + - jobs + summary: Gets all information needed to debug this job + operationId: getJobDebugInfo + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/JobIdRequestBody" + required: true + responses: + "200": + description: Successful operation + content: + application/json: + schema: + $ref: "#/components/schemas/JobDebugInfoRead" + "404": + $ref: "#/components/responses/NotFoundResponse" + "422": + $ref: "#/components/responses/InvalidInputResponse" /v1/health: get: tags: @@ -3068,6 +3091,31 @@ components: format: int64 status: $ref: "#/components/schemas/JobStatus" + JobDebugRead: + type: object + required: + - id + - configType + - configId + - status + - airbyteVersion + - sourceDefinition + - destinationDefinition + properties: + id: + $ref: "#/components/schemas/JobId" + configType: + $ref: "#/components/schemas/JobConfigType" + configId: + type: string + status: + $ref: "#/components/schemas/JobStatus" + airbyteVersion: + type: string + sourceDefinition: + $ref: "#/components/schemas/SourceDefinitionRead" + destinationDefinition: + $ref: "#/components/schemas/DestinationDefinitionRead" JobWithAttemptsRead: type: object properties: @@ -3172,6 +3220,18 @@ components: type: array items: $ref: "#/components/schemas/AttemptInfoRead" + JobDebugInfoRead: + type: object + required: + - job + - attempts + properties: + job: + $ref: "#/components/schemas/JobDebugRead" + attempts: + type: array + items: + $ref: "#/components/schemas/AttemptInfoRead" AttemptInfoRead: type: object required: diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index c2280bfd7438e..dcf8e1fbb3475 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -36,6 +36,7 @@ import io.airbyte.api.model.HealthCheckRead; import io.airbyte.api.model.ImportRead; import io.airbyte.api.model.ImportRequestBody; +import io.airbyte.api.model.JobDebugInfoRead; import io.airbyte.api.model.JobIdRequestBody; import io.airbyte.api.model.JobInfoRead; import io.airbyte.api.model.JobListRequestBody; @@ -205,7 +206,8 @@ public ConfigurationApi(final ConfigRepository configRepository, destinationHandler = new DestinationHandler(configRepository, schemaValidator, connectionsHandler); destinationDefinitionsHandler = new DestinationDefinitionsHandler(configRepository, synchronousSchedulerClient, destinationHandler); workspacesHandler = new WorkspacesHandler(configRepository, connectionsHandler, destinationHandler, sourceHandler); - jobHistoryHandler = new JobHistoryHandler(jobPersistence, workerEnvironment, logConfigs); + jobHistoryHandler = new JobHistoryHandler(jobPersistence, workerEnvironment, logConfigs, connectionsHandler, sourceHandler, + sourceDefinitionsHandler, destinationHandler, destinationDefinitionsHandler, airbyteVersion); oAuthHandler = new OAuthHandler(configRepository, httpClient, trackingClient); webBackendConnectionsHandler = new WebBackendConnectionsHandler( connectionsHandler, @@ -642,6 +644,11 @@ public JobInfoRead getJobInfo(final JobIdRequestBody jobIdRequestBody) { return execute(() -> jobHistoryHandler.getJobInfo(jobIdRequestBody)); } + @Override + public JobDebugInfoRead getJobDebugInfo(final JobIdRequestBody jobIdRequestBody) { + return execute(() -> jobHistoryHandler.getJobDebugInfo(jobIdRequestBody)); + } + @Override public File getLogs(final LogsRequestBody logsRequestBody) { return execute(() -> logsHandler.getLogs(workspaceRoot, workerEnvironment, logConfigs, logsRequestBody)); diff --git a/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java b/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java index 5c4f74b52e79c..92b9beeab4581 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java +++ b/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java @@ -9,14 +9,18 @@ import io.airbyte.api.model.AttemptStats; import io.airbyte.api.model.AttemptStatus; import io.airbyte.api.model.AttemptStreamStats; +import io.airbyte.api.model.DestinationDefinitionRead; import io.airbyte.api.model.JobConfigType; +import io.airbyte.api.model.JobDebugRead; import io.airbyte.api.model.JobInfoRead; import io.airbyte.api.model.JobRead; import io.airbyte.api.model.JobStatus; import io.airbyte.api.model.JobWithAttemptsRead; import io.airbyte.api.model.LogRead; +import io.airbyte.api.model.SourceDefinitionRead; import io.airbyte.api.model.SynchronousJobRead; import io.airbyte.commons.enums.Enums; +import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobOutput; import io.airbyte.config.StandardSyncOutput; @@ -53,6 +57,20 @@ public JobInfoRead getJobInfoRead(final Job job) { .attempts(job.getAttempts().stream().map(attempt -> getAttemptInfoRead(attempt)).collect(Collectors.toList())); } + public JobDebugRead getDebugJobInfoRead(final JobInfoRead jobInfoRead, + final SourceDefinitionRead sourceDefinitionRead, + final DestinationDefinitionRead destinationDefinitionRead, + final AirbyteVersion airbyteVersion) { + return new JobDebugRead() + .id(jobInfoRead.getJob().getId()) + .configId(jobInfoRead.getJob().getConfigId()) + .configType(jobInfoRead.getJob().getConfigType()) + .status(jobInfoRead.getJob().getStatus()) + .airbyteVersion(airbyteVersion.serialize()) + .sourceDefinition(sourceDefinitionRead) + .destinationDefinition(destinationDefinitionRead); + } + public static JobWithAttemptsRead getJobWithAttemptsRead(final Job job) { final String configId = job.getScope(); final JobConfigType configType = Enums.convertTo(job.getConfigType(), JobConfigType.class); diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java index 0282c79490391..0e5315f1a1c98 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java @@ -5,33 +5,68 @@ package io.airbyte.server.handlers; import com.google.common.base.Preconditions; +import io.airbyte.api.model.ConnectionRead; +import io.airbyte.api.model.DestinationDefinitionIdRequestBody; +import io.airbyte.api.model.DestinationDefinitionRead; +import io.airbyte.api.model.DestinationIdRequestBody; +import io.airbyte.api.model.DestinationRead; +import io.airbyte.api.model.JobDebugInfoRead; +import io.airbyte.api.model.JobDebugRead; import io.airbyte.api.model.JobIdRequestBody; import io.airbyte.api.model.JobInfoRead; import io.airbyte.api.model.JobListRequestBody; import io.airbyte.api.model.JobReadList; import io.airbyte.api.model.JobWithAttemptsRead; +import io.airbyte.api.model.SourceDefinitionIdRequestBody; +import io.airbyte.api.model.SourceDefinitionRead; +import io.airbyte.api.model.SourceIdRequestBody; +import io.airbyte.api.model.SourceRead; import io.airbyte.commons.enums.Enums; +import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobConfig; import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.helpers.LogConfigs; +import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.scheduler.models.Job; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.server.converters.JobConverter; +import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; import java.util.List; import java.util.Set; +import java.util.UUID; import java.util.stream.Collectors; public class JobHistoryHandler { + private final ConnectionsHandler connectionsHandler; + private final SourceHandler sourceHandler; + private final DestinationHandler destinationHandler; + private final SourceDefinitionsHandler sourceDefinitionsHandler; + private final DestinationDefinitionsHandler destinationDefinitionsHandler; public static final int DEFAULT_PAGE_SIZE = 200; private final JobPersistence jobPersistence; private final JobConverter jobConverter; + private final AirbyteVersion airbyteVersion; - public JobHistoryHandler(final JobPersistence jobPersistence, final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs) { + public JobHistoryHandler(final JobPersistence jobPersistence, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final ConnectionsHandler connectionsHandler, + final SourceHandler sourceHandler, + final SourceDefinitionsHandler sourceDefinitionsHandler, + final DestinationHandler destinationHandler, + final DestinationDefinitionsHandler destinationDefinitionsHandler, + final AirbyteVersion airbyteVersion) { jobConverter = new JobConverter(workerEnvironment, logConfigs); this.jobPersistence = jobPersistence; + this.connectionsHandler = connectionsHandler; + this.sourceHandler = sourceHandler; + this.sourceDefinitionsHandler = sourceDefinitionsHandler; + this.destinationHandler = destinationHandler; + this.destinationDefinitionsHandler = destinationDefinitionsHandler; + this.airbyteVersion = airbyteVersion; } @SuppressWarnings("UnstableApiUsage") @@ -61,4 +96,52 @@ public JobInfoRead getJobInfo(final JobIdRequestBody jobIdRequestBody) throws IO return jobConverter.getJobInfoRead(job); } + public JobDebugInfoRead getJobDebugInfo(final JobIdRequestBody jobIdRequestBody) + throws ConfigNotFoundException, IOException, JsonValidationException { + final Job job = jobPersistence.getJob(jobIdRequestBody.getId()); + final JobInfoRead jobinfoRead = jobConverter.getJobInfoRead(job); + + return buildJobDebugInfoRead(jobinfoRead); + } + + private SourceRead getSourceRead(final ConnectionRead connectionRead) throws JsonValidationException, IOException, ConfigNotFoundException { + final SourceIdRequestBody sourceIdRequestBody = new SourceIdRequestBody().sourceId(connectionRead.getSourceId()); + return sourceHandler.getSource(sourceIdRequestBody); + } + + private DestinationRead getDestinationRead(final ConnectionRead connectionRead) + throws JsonValidationException, IOException, ConfigNotFoundException { + final DestinationIdRequestBody destinationIdRequestBody = new DestinationIdRequestBody().destinationId(connectionRead.getDestinationId()); + return destinationHandler.getDestination(destinationIdRequestBody); + } + + private SourceDefinitionRead getSourceDefinitionRead(final SourceRead sourceRead) + throws JsonValidationException, IOException, ConfigNotFoundException { + final SourceDefinitionIdRequestBody sourceDefinitionIdRequestBody = + new SourceDefinitionIdRequestBody().sourceDefinitionId(sourceRead.getSourceDefinitionId()); + return sourceDefinitionsHandler.getSourceDefinition(sourceDefinitionIdRequestBody); + } + + private DestinationDefinitionRead getDestinationDefinitionRead(final DestinationRead destinationRead) + throws JsonValidationException, IOException, ConfigNotFoundException { + final DestinationDefinitionIdRequestBody destinationDefinitionIdRequestBody = + new DestinationDefinitionIdRequestBody().destinationDefinitionId(destinationRead.getDestinationDefinitionId()); + return destinationDefinitionsHandler.getDestinationDefinition(destinationDefinitionIdRequestBody); + } + + private JobDebugInfoRead buildJobDebugInfoRead(final JobInfoRead jobInfoRead) + throws ConfigNotFoundException, IOException, JsonValidationException { + final String configId = jobInfoRead.getJob().getConfigId(); + final ConnectionRead connection = connectionsHandler.getConnection(UUID.fromString(configId)); + final SourceRead source = getSourceRead(connection); + final DestinationRead destination = getDestinationRead(connection); + final SourceDefinitionRead sourceDefinitionRead = getSourceDefinitionRead(source); + final DestinationDefinitionRead destinationDefinitionRead = getDestinationDefinitionRead(destination); + final JobDebugRead jobDebugRead = jobConverter.getDebugJobInfoRead(jobInfoRead, sourceDefinitionRead, destinationDefinitionRead, airbyteVersion); + + return new JobDebugInfoRead() + .attempts(jobInfoRead.getAttempts()) + .job(jobDebugRead); + } + } diff --git a/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java b/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java index 37a9d8cab50bc..f7ee55f87b486 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java @@ -10,16 +10,9 @@ import static org.mockito.Mockito.when; import com.google.common.collect.Lists; -import io.airbyte.api.model.AttemptInfoRead; -import io.airbyte.api.model.AttemptRead; -import io.airbyte.api.model.AttemptStats; -import io.airbyte.api.model.AttemptStreamStats; -import io.airbyte.api.model.JobConfigType; -import io.airbyte.api.model.JobInfoRead; -import io.airbyte.api.model.JobRead; -import io.airbyte.api.model.JobWithAttemptsRead; -import io.airbyte.api.model.LogRead; +import io.airbyte.api.model.*; import io.airbyte.commons.enums.Enums; +import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobCheckConnectionConfig; import io.airbyte.config.JobConfig; @@ -37,6 +30,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Optional; +import java.util.UUID; import java.util.stream.Collectors; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -114,6 +108,22 @@ class JobConverterTest { .endedAt(CREATED_AT)) .logs(new LogRead().logLines(new ArrayList<>())))); + private static final String version = "0.33.4"; + private static final AirbyteVersion airbyteVersion = new AirbyteVersion(version); + private static final SourceDefinitionRead sourceDefinitionRead = new SourceDefinitionRead().sourceDefinitionId(UUID.randomUUID()); + private static final DestinationDefinitionRead destinationDefinitionRead = + new DestinationDefinitionRead().destinationDefinitionId(UUID.randomUUID()); + + private static final JobDebugRead JOB_DEBUG_INFO = + new JobDebugRead() + .id(JOB_ID) + .configId(JOB_CONFIG_ID) + .status(io.airbyte.api.model.JobStatus.RUNNING) + .configType(JobConfigType.CHECK_CONNECTION_SOURCE) + .airbyteVersion(airbyteVersion.serialize()) + .sourceDefinition(sourceDefinitionRead) + .destinationDefinition(destinationDefinitionRead); + private static final JobWithAttemptsRead JOB_WITH_ATTEMPTS_READ = new JobWithAttemptsRead() .job(JOB_INFO.getJob()) .attempts(JOB_INFO.getAttempts().stream().map(AttemptInfoRead::getAttempt).collect(Collectors.toList())); @@ -146,6 +156,11 @@ public void testGetJobInfoRead() { assertEquals(JOB_INFO, jobConverter.getJobInfoRead(job)); } + @Test + public void testGetDebugJobInfoRead() { + assertEquals(JOB_DEBUG_INFO, jobConverter.getDebugJobInfoRead(JOB_INFO, sourceDefinitionRead, destinationDefinitionRead, airbyteVersion)); + } + @Test public void testGetJobWithAttemptsRead() { assertEquals(JOB_WITH_ATTEMPTS_READ, jobConverter.getJobWithAttemptsRead(job)); diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java index 530f7c24cc67e..e985c0b92ac5d 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java @@ -10,34 +10,29 @@ import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableList; -import io.airbyte.api.model.AttemptInfoRead; -import io.airbyte.api.model.AttemptRead; -import io.airbyte.api.model.JobConfigType; -import io.airbyte.api.model.JobIdRequestBody; -import io.airbyte.api.model.JobInfoRead; -import io.airbyte.api.model.JobListRequestBody; -import io.airbyte.api.model.JobRead; -import io.airbyte.api.model.JobReadList; -import io.airbyte.api.model.JobWithAttemptsRead; -import io.airbyte.api.model.LogRead; -import io.airbyte.api.model.Pagination; +import io.airbyte.api.model.*; import io.airbyte.commons.enums.Enums; +import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.config.*; import io.airbyte.config.Configs.WorkerEnvironment; -import io.airbyte.config.JobCheckConnectionConfig; -import io.airbyte.config.JobConfig; import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.helpers.LogConfigs; +import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.scheduler.models.Attempt; import io.airbyte.scheduler.models.AttemptStatus; import io.airbyte.scheduler.models.Job; import io.airbyte.scheduler.models.JobStatus; import io.airbyte.scheduler.persistence.JobPersistence; +import io.airbyte.server.helpers.ConnectionHelpers; +import io.airbyte.server.helpers.DestinationDefinitionHelpers; +import io.airbyte.server.helpers.DestinationHelpers; +import io.airbyte.server.helpers.SourceDefinitionHelpers; +import io.airbyte.server.helpers.SourceHelpers; +import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.URISyntaxException; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Set; +import java.util.*; import java.util.function.Function; import java.util.stream.Collectors; import org.junit.jupiter.api.BeforeEach; @@ -50,7 +45,7 @@ public class JobHistoryHandlerTest { private static final long JOB_ID = 100L; private static final long ATTEMPT_ID = 1002L; - private static final String JOB_CONFIG_ID = "123"; + private static final String JOB_CONFIG_ID = "ef296385-6796-413f-ac1b-49c4caba3f2b"; private static final JobStatus JOB_STATUS = JobStatus.SUCCEEDED; private static final JobConfig.ConfigType CONFIG_TYPE = JobConfig.ConfigType.CHECK_CONNECTION_SOURCE; private static final JobConfigType CONFIG_TYPE_FOR_API = JobConfigType.CHECK_CONNECTION_SOURCE; @@ -61,6 +56,17 @@ public class JobHistoryHandlerTest { private static final LogRead EMPTY_LOG_READ = new LogRead().logLines(new ArrayList<>()); private static final long CREATED_AT = System.currentTimeMillis() / 1000; + private SourceRead sourceRead; + private ConnectionRead connectionRead; + private DestinationRead destinationRead; + private ConnectionsHandler connectionsHandler; + private SourceHandler sourceHandler; + private DestinationHandler destinationHandler; + private SourceDefinitionsHandler sourceDefinitionsHandler; + private DestinationDefinitionsHandler destinationDefinitionsHandler; + private StandardDestinationDefinition standardDestinationDefinition; + private StandardSourceDefinition standardSourceDefinition; + private AirbyteVersion airbyteVersion; private Job testJob; private Attempt testJobAttempt; private JobPersistence jobPersistence; @@ -76,6 +82,16 @@ private static JobRead toJobInfo(final Job job) { } + private static JobDebugRead toDebugJobInfo(final Job job) { + return new JobDebugRead().id(job.getId()) + .configId(job.getScope()) + .status(Enums.convertTo(job.getStatus(), io.airbyte.api.model.JobStatus.class)) + .configType(Enums.convertTo(job.getConfigType(), io.airbyte.api.model.JobConfigType.class)) + .sourceDefinition(null) + .destinationDefinition(null); + + } + private static List toAttemptInfoList(final List attempts) { final List attemptReads = attempts.stream().map(JobHistoryHandlerTest::toAttemptRead).collect(Collectors.toList()); @@ -97,13 +113,20 @@ private static Attempt createSuccessfulAttempt(final long jobId, final long time } @BeforeEach - public void setUp() { + public void setUp() throws IOException, JsonValidationException, ConfigNotFoundException { testJobAttempt = createSuccessfulAttempt(JOB_ID, CREATED_AT); testJob = new Job(JOB_ID, JOB_CONFIG.getConfigType(), JOB_CONFIG_ID, JOB_CONFIG, ImmutableList.of(testJobAttempt), JOB_STATUS, null, CREATED_AT, CREATED_AT); + connectionsHandler = mock(ConnectionsHandler.class); + sourceHandler = mock(SourceHandler.class); + sourceDefinitionsHandler = mock(SourceDefinitionsHandler.class); + destinationHandler = mock(DestinationHandler.class); + destinationDefinitionsHandler = mock(DestinationDefinitionsHandler.class); + airbyteVersion = mock(AirbyteVersion.class); jobPersistence = mock(JobPersistence.class); - jobHistoryHandler = new JobHistoryHandler(jobPersistence, WorkerEnvironment.DOCKER, LogConfigs.EMPTY); + jobHistoryHandler = new JobHistoryHandler(jobPersistence, WorkerEnvironment.DOCKER, LogConfigs.EMPTY, connectionsHandler, sourceHandler, + sourceDefinitionsHandler, destinationHandler, destinationDefinitionsHandler, airbyteVersion); } @Nested @@ -197,6 +220,37 @@ public void testGetJobInfo() throws IOException { assertEquals(exp, jobInfoActual); } + @Test + @DisplayName("Should return the right info to debug this job") + public void testGetDebugJobInfo() throws IOException, JsonValidationException, ConfigNotFoundException, URISyntaxException { + standardSourceDefinition = SourceDefinitionHelpers.generateSourceDefinition(); + final SourceConnection source = SourceHelpers.generateSource(UUID.randomUUID()); + sourceRead = SourceHelpers.getSourceRead(source, standardSourceDefinition); + + standardDestinationDefinition = DestinationDefinitionHelpers.generateDestination(); + final DestinationConnection destination = DestinationHelpers.generateDestination(UUID.randomUUID()); + destinationRead = DestinationHelpers.getDestinationRead(destination, standardDestinationDefinition); + + final StandardSync standardSync = ConnectionHelpers.generateSyncWithSourceId(source.getSourceId()); + connectionRead = ConnectionHelpers.generateExpectedConnectionRead(standardSync); + when(connectionsHandler.getConnection(UUID.fromString(testJob.getScope()))).thenReturn(connectionRead); + + final SourceIdRequestBody sourceIdRequestBody = new SourceIdRequestBody(); + sourceIdRequestBody.setSourceId(connectionRead.getSourceId()); + when(sourceHandler.getSource(sourceIdRequestBody)).thenReturn(sourceRead); + + final DestinationIdRequestBody destinationIdRequestBody = new DestinationIdRequestBody(); + destinationIdRequestBody.setDestinationId(connectionRead.getDestinationId()); + when(destinationHandler.getDestination(destinationIdRequestBody)).thenReturn(destinationRead); + when(jobPersistence.getJob(JOB_ID)).thenReturn(testJob); + + final JobIdRequestBody requestBody = new JobIdRequestBody().id(JOB_ID); + final JobDebugInfoRead jobDebugInfoActual = jobHistoryHandler.getJobDebugInfo(requestBody); + final JobDebugInfoRead exp = new JobDebugInfoRead().job(toDebugJobInfo(testJob)).attempts(toAttemptInfoList(ImmutableList.of(testJobAttempt))); + + assertEquals(exp, jobDebugInfoActual); + } + @Test @DisplayName("Should have compatible config enums") public void testEnumConversion() { diff --git a/docs/reference/api/generated-api-html/index.html b/docs/reference/api/generated-api-html/index.html index 4e9ff250a3027..457677dd409a7 100644 --- a/docs/reference/api/generated-api-html/index.html +++ b/docs/reference/api/generated-api-html/index.html @@ -275,6 +275,7 @@

    Health

    Jobs

    @@ -3027,6 +3028,155 @@

    422

    InvalidInputExceptionInfo

    +
    +
    + Up +
    post /v1/jobs/get_debug_info
    +
    Gets all information needed to debug this job (getJobDebugInfo)
    +
    + + +

    Consumes

    + This API call consumes the following media types via the Content-Type request header: +
      +
    • application/json
    • +
    + +

    Request body

    +
    +
    JobIdRequestBody JobIdRequestBody (required)
    + +
    Body Parameter
    + +
    + + + + +

    Return type

    + + + + +

    Example data

    +
    Content-Type: application/json
    +
    {
    +  "job" : {
    +    "configId" : "configId",
    +    "sourceDefinition" : {
    +      "documentationUrl" : "https://openapi-generator.tech",
    +      "dockerImageTag" : "dockerImageTag",
    +      "dockerRepository" : "dockerRepository",
    +      "name" : "name",
    +      "icon" : "icon",
    +      "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
    +    },
    +    "airbyteVersion" : "airbyteVersion",
    +    "id" : 0,
    +    "destinationDefinition" : {
    +      "documentationUrl" : "https://openapi-generator.tech",
    +      "dockerImageTag" : "dockerImageTag",
    +      "dockerRepository" : "dockerRepository",
    +      "name" : "name",
    +      "icon" : "icon",
    +      "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
    +    }
    +  },
    +  "attempts" : [ {
    +    "attempt" : {
    +      "totalStats" : {
    +        "stateMessagesEmitted" : 7,
    +        "recordsCommitted" : 1,
    +        "bytesEmitted" : 4,
    +        "recordsEmitted" : 2
    +      },
    +      "createdAt" : 5,
    +      "bytesSynced" : 9,
    +      "endedAt" : 7,
    +      "streamStats" : [ {
    +        "stats" : {
    +          "stateMessagesEmitted" : 7,
    +          "recordsCommitted" : 1,
    +          "bytesEmitted" : 4,
    +          "recordsEmitted" : 2
    +        },
    +        "streamName" : "streamName"
    +      }, {
    +        "stats" : {
    +          "stateMessagesEmitted" : 7,
    +          "recordsCommitted" : 1,
    +          "bytesEmitted" : 4,
    +          "recordsEmitted" : 2
    +        },
    +        "streamName" : "streamName"
    +      } ],
    +      "id" : 5,
    +      "recordsSynced" : 3,
    +      "updatedAt" : 2
    +    },
    +    "logs" : {
    +      "logLines" : [ "logLines", "logLines" ]
    +    }
    +  }, {
    +    "attempt" : {
    +      "totalStats" : {
    +        "stateMessagesEmitted" : 7,
    +        "recordsCommitted" : 1,
    +        "bytesEmitted" : 4,
    +        "recordsEmitted" : 2
    +      },
    +      "createdAt" : 5,
    +      "bytesSynced" : 9,
    +      "endedAt" : 7,
    +      "streamStats" : [ {
    +        "stats" : {
    +          "stateMessagesEmitted" : 7,
    +          "recordsCommitted" : 1,
    +          "bytesEmitted" : 4,
    +          "recordsEmitted" : 2
    +        },
    +        "streamName" : "streamName"
    +      }, {
    +        "stats" : {
    +          "stateMessagesEmitted" : 7,
    +          "recordsCommitted" : 1,
    +          "bytesEmitted" : 4,
    +          "recordsEmitted" : 2
    +        },
    +        "streamName" : "streamName"
    +      } ],
    +      "id" : 5,
    +      "recordsSynced" : 3,
    +      "updatedAt" : 2
    +    },
    +    "logs" : {
    +      "logLines" : [ "logLines", "logLines" ]
    +    }
    +  } ]
    +}
    + +

    Produces

    + This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
      +
    • application/json
    • +
    + +

    Responses

    +

    200

    + Successful operation + JobDebugInfoRead +

    404

    + Object with given id was not found. + NotFoundKnownExceptionInfo +

    422

    + Input failed validation + InvalidInputExceptionInfo +
    +
    + +
    +

    JobDebugRead - Up

    +
    +
    +
    id
    Long format: int64
    +
    configType
    +
    configId
    +
    status
    +
    airbyteVersion
    +
    sourceDefinition
    +
    destinationDefinition
    +
    +