From 00c4c7ec4b7002264597562b631ebfe4d0a2b04b Mon Sep 17 00:00:00 2001 From: timifasubaa <30888507+timifasubaa@users.noreply.github.com> Date: Thu, 20 Sep 2018 10:34:15 -0500 Subject: [PATCH] fix csv upload bugs (#5940) --- superset/db_engine_specs.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index 176fbed04d5e5..6c8e07061f6a3 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -1005,6 +1005,13 @@ def convert_to_hive_type(col_type): } return tableschema_to_hive_types.get(col_type, 'STRING') + bucket_path = config['CSV_TO_HIVE_UPLOAD_S3_BUCKET'] + + if not bucket_path: + logging.info('No upload bucket specified') + raise Exception( + 'No upload bucket specified. You can specify one in the config file.') + table_name = form.name.data schema_name = form.schema.data @@ -1014,37 +1021,29 @@ def convert_to_hive_type(col_type): "You can't specify a namespace. " 'All tables will be uploaded to the `{}` namespace'.format( config.get('HIVE_NAMESPACE'))) - table_name = '{}.{}'.format( + full_table_name = '{}.{}'.format( config.get('UPLOADED_CSV_HIVE_NAMESPACE'), table_name) else: if '.' in table_name and schema_name: raise Exception( "You can't specify a namespace both in the name of the table " 'and in the schema field. Please remove one') - if schema_name: - table_name = '{}.{}'.format(schema_name, table_name) - filename = form.csv_file.data.filename - bucket_path = config['CSV_TO_HIVE_UPLOAD_S3_BUCKET'] + full_table_name = '{}.{}'.format( + schema_name, table_name) if schema_name else table_name - if not bucket_path: - logging.info('No upload bucket specified') - raise Exception( - 'No upload bucket specified. You can specify one in the config file.') - - table_name = form.name.data filename = form.csv_file.data.filename - upload_prefix = config['CSV_TO_HIVE_UPLOAD_DIRECTORY'] + upload_prefix = config['CSV_TO_HIVE_UPLOAD_DIRECTORY'] upload_path = config['UPLOAD_FOLDER'] + \ - secure_filename(form.csv_file.data.filename) + secure_filename(filename) hive_table_schema = Table(upload_path).infer() column_name_and_type = [] for column_info in hive_table_schema['fields']: column_name_and_type.append( - '{} {}'.format( - "'" + column_info['name'] + "'", + '`{}` {}'.format( + column_info['name'], convert_to_hive_type(column_info['type']))) schema_definition = ', '.join(column_name_and_type) @@ -1053,7 +1052,7 @@ def convert_to_hive_type(col_type): s3.upload_file( upload_path, bucket_path, os.path.join(upload_prefix, table_name, filename)) - sql = """CREATE TABLE {table_name} ( {schema_definition} ) + sql = """CREATE TABLE {full_table_name} ( {schema_definition} ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE LOCATION '{location}' tblproperties ('skip.header.line.count'='1')""".format(**locals())