Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for the overriding of stringify_dict for json export format on BaseSQLToGCSOperator #26277

Merged
merged 1 commit into from
Sep 18, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion airflow/providers/google/cloud/transfers/sql_to_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class BaseSQLToGCSOperator(BaseOperator):
file size of the splits. Check https://cloud.google.com/storage/quotas
to see the maximum allowed file size for a single object.
:param export_format: Desired format of files to be exported. (json, csv or parquet)
:param stringify_dict: Whether to dump Dictionary type objects
(such as JSON columns) as a string. Applies only to JSON export format.
:param field_delimiter: The delimiter to be used for CSV files.
:param null_marker: The null marker to be used for CSV files.
:param gzip: Option to compress file for upload (does not apply to schemas).
Expand Down Expand Up @@ -97,6 +99,7 @@ def __init__(
schema_filename: Optional[str] = None,
approx_max_file_size_bytes: int = 1900000000,
export_format: str = 'json',
stringify_dict: bool = False,
field_delimiter: str = ',',
null_marker: Optional[str] = None,
gzip: bool = False,
Expand All @@ -119,6 +122,7 @@ def __init__(
self.schema_filename = schema_filename
self.approx_max_file_size_bytes = approx_max_file_size_bytes
self.export_format = export_format.lower()
self.stringify_dict = stringify_dict
self.field_delimiter = field_delimiter
self.null_marker = null_marker
self.gzip = gzip
Expand Down Expand Up @@ -241,7 +245,7 @@ def _write_local_data_files(self, cursor):
tbl = pa.Table.from_pydict(row_pydic, parquet_schema)
parquet_writer.write_table(tbl)
else:
row = self.convert_types(schema, col_type_dict, row, stringify_dict=False)
row = self.convert_types(schema, col_type_dict, row, stringify_dict=self.stringify_dict)
row_dict = dict(zip(schema, row))

tmp_file_handle.write(
Expand Down