Skip to content

Commit

Permalink
fix csv upload bugs (apache#5940)
Browse files Browse the repository at this point in the history
  • Loading branch information
timifasubaa authored Sep 20, 2018
1 parent 1e5a02a commit 00c4c7e
Showing 1 changed file with 15 additions and 16 deletions.
31 changes: 15 additions & 16 deletions superset/db_engine_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,13 @@ def convert_to_hive_type(col_type):
}
return tableschema_to_hive_types.get(col_type, 'STRING')

bucket_path = config['CSV_TO_HIVE_UPLOAD_S3_BUCKET']

if not bucket_path:
logging.info('No upload bucket specified')
raise Exception(
'No upload bucket specified. You can specify one in the config file.')

table_name = form.name.data
schema_name = form.schema.data

Expand All @@ -1014,37 +1021,29 @@ def convert_to_hive_type(col_type):
"You can't specify a namespace. "
'All tables will be uploaded to the `{}` namespace'.format(
config.get('HIVE_NAMESPACE')))
table_name = '{}.{}'.format(
full_table_name = '{}.{}'.format(
config.get('UPLOADED_CSV_HIVE_NAMESPACE'), table_name)
else:
if '.' in table_name and schema_name:
raise Exception(
"You can't specify a namespace both in the name of the table "
'and in the schema field. Please remove one')
if schema_name:
table_name = '{}.{}'.format(schema_name, table_name)

filename = form.csv_file.data.filename
bucket_path = config['CSV_TO_HIVE_UPLOAD_S3_BUCKET']
full_table_name = '{}.{}'.format(
schema_name, table_name) if schema_name else table_name

if not bucket_path:
logging.info('No upload bucket specified')
raise Exception(
'No upload bucket specified. You can specify one in the config file.')

table_name = form.name.data
filename = form.csv_file.data.filename
upload_prefix = config['CSV_TO_HIVE_UPLOAD_DIRECTORY']

upload_prefix = config['CSV_TO_HIVE_UPLOAD_DIRECTORY']
upload_path = config['UPLOAD_FOLDER'] + \
secure_filename(form.csv_file.data.filename)
secure_filename(filename)

hive_table_schema = Table(upload_path).infer()
column_name_and_type = []
for column_info in hive_table_schema['fields']:
column_name_and_type.append(
'{} {}'.format(
"'" + column_info['name'] + "'",
'`{}` {}'.format(
column_info['name'],
convert_to_hive_type(column_info['type'])))
schema_definition = ', '.join(column_name_and_type)

Expand All @@ -1053,7 +1052,7 @@ def convert_to_hive_type(col_type):
s3.upload_file(
upload_path, bucket_path,
os.path.join(upload_prefix, table_name, filename))
sql = """CREATE TABLE {table_name} ( {schema_definition} )
sql = """CREATE TABLE {full_table_name} ( {schema_definition} )
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS
TEXTFILE LOCATION '{location}'
tblproperties ('skip.header.line.count'='1')""".format(**locals())
Expand Down

0 comments on commit 00c4c7e

Please sign in to comment.