apache · potiuk · Sep 18, 2022 · Sep 9, 2022
diff --git a/airflow/providers/google/cloud/transfers/sql_to_gcs.py b/airflow/providers/google/cloud/transfers/sql_to_gcs.py
@@ -51,6 +51,8 @@ class BaseSQLToGCSOperator(BaseOperator):
         file size of the splits. Check https://cloud.google.com/storage/quotas
         to see the maximum allowed file size for a single object.
     :param export_format: Desired format of files to be exported. (json, csv or parquet)
+    :param stringify_dict: Whether to dump Dictionary type objects
+        (such as JSON columns) as a string. Applies only to JSON export format.
     :param field_delimiter: The delimiter to be used for CSV files.
     :param null_marker: The null marker to be used for CSV files.
     :param gzip: Option to compress file for upload (does not apply to schemas).
@@ -97,6 +99,7 @@ def __init__(
         schema_filename: Optional[str] = None,
         approx_max_file_size_bytes: int = 1900000000,
         export_format: str = 'json',
+        stringify_dict: bool = False,
         field_delimiter: str = ',',
         null_marker: Optional[str] = None,
         gzip: bool = False,
@@ -119,6 +122,7 @@ def __init__(
         self.schema_filename = schema_filename
         self.approx_max_file_size_bytes = approx_max_file_size_bytes
         self.export_format = export_format.lower()
+        self.stringify_dict = stringify_dict
         self.field_delimiter = field_delimiter
         self.null_marker = null_marker
         self.gzip = gzip
@@ -241,7 +245,7 @@ def _write_local_data_files(self, cursor):
                 tbl = pa.Table.from_pydict(row_pydic, parquet_schema)
                 parquet_writer.write_table(tbl)
             else:
-                row = self.convert_types(schema, col_type_dict, row, stringify_dict=False)
+                row = self.convert_types(schema, col_type_dict, row, stringify_dict=self.stringify_dict)
                 row_dict = dict(zip(schema, row))
 
                 tmp_file_handle.write(