Skip to content

Commit

Permalink
Fix: Resolve issue where PyAirbyte would fail if property names conta…
Browse files Browse the repository at this point in the history
…in the dot character (`'.'`), e.g. with `source-google-ads` (#343)

Co-authored-by: Ajit Pratap Singh <[email protected]>
Co-authored-by: uditchaudhary <[email protected]>
  • Loading branch information
3 people authored Sep 7, 2024
1 parent 1775cb4 commit 826d689
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 6 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,6 @@ dmypy.json

# Cython debug symbols
cython_debug/

# Pycharm
.idea
24 changes: 18 additions & 6 deletions airbyte/shared/catalog_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
)

from airbyte import exceptions as exc
from airbyte._util.name_normalizers import LowerCaseNormalizer
from airbyte.strategies import WriteMethod, WriteStrategy


Expand Down Expand Up @@ -149,13 +150,24 @@ def get_primary_keys(
if not pks:
return []

joined_pks = [".".join(pk) for pk in pks]
for pk in joined_pks:
if "." in pk:
msg = f"Nested primary keys are not yet supported. Found: {pk}"
raise NotImplementedError(msg)
normalized_pks: list[list[str]] = [
[LowerCaseNormalizer.normalize(c) for c in pk] for pk in pks
]

return joined_pks
for pk_nodes in normalized_pks:
if len(pk_nodes) != 1:
raise exc.AirbyteError(
message=(
"Nested primary keys are not supported. "
"Each PK column should have exactly one node. "
),
context={
"stream_name": stream_name,
"primary_key_nodes": pk_nodes,
},
)

return [pk_nodes[0] for pk_nodes in normalized_pks]

def get_cursor_key(
self,
Expand Down
32 changes: 32 additions & 0 deletions tests/integration_tests/fixtures/source-test/source_test/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,23 @@
},
},
},
{
"name": "primary-key-with-dot",
"description": "This stream has a primary key with dot similar what is there in GAds.",
"source_defined_primary_key": [["table1.Column1"]],
"source_defined_cursor": False,
"supported_sync_modes": ["full_refresh"],
"json_schema": {
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"table1.Column1": {"type": "string"},
"table1.Column2": {"type": "number"},
"table1.empty_column": {"type": "string"},
"table1.big_number": {"type": "number"},
},
},
},
]
},
}
Expand Down Expand Up @@ -137,6 +154,19 @@
"emitted_at": 1704067200,
},
}
sample_record_primary_key_with_dot = {
"type": "RECORD",
"record": {
"data": {
"table1.Column1": "value1",
"table1.Column2": 1,
"table1.empty_column": None,
"table1.big_number": 1234567890123456,
},
"stream": "primary-key-with-dot",
"emitted_at": 1704067200,
},
}


def parse_args():
Expand Down Expand Up @@ -184,3 +214,5 @@ def run():
print(json.dumps(sample_record2_stream1))
elif stream["stream"]["name"] == "stream2":
print(json.dumps(sample_record_stream2))
elif stream["stream"]["name"] == "primary-key-with-dot":
print(json.dumps(sample_record_primary_key_with_dot))
1 change: 1 addition & 0 deletions tests/unit_tests/test_text_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ def test_case_insensitive_w_pretty_keys(
("", "", True),
("*", "", True),
("!@$", "", True),
("some.col", "some_col", False),
],
)
def test_lower_case_normalizer(
Expand Down

0 comments on commit 826d689

Please sign in to comment.