Skip to content

Commit

Permalink
[fix] Improve csv upload functionality (apache#8457)
Browse files Browse the repository at this point in the history
* [fix] csv upload when table metadata present

* Remove table from hive spec

* Move upload before table metadata creation

* Refine upload logic, dd unit tests and fix translations

* Use ALLOWED_EXTENSIONS from config

* Address review comments

* Fix error message grammar

* Add return type to hive csv upload and replace first with one_or_none
  • Loading branch information
villebro authored Nov 7, 2019
1 parent 397e1e3 commit 49ea232
Show file tree
Hide file tree
Showing 18 changed files with 156 additions and 69 deletions.
2 changes: 1 addition & 1 deletion superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def _try_json_readfile(filepath):

# Allowed format types for upload on Database view
# TODO: Add processing of other spreadsheet formats (xls, xlsx etc)
ALLOWED_EXTENSIONS = set(["csv"])
ALLOWED_EXTENSIONS = {"csv", "tsv"}

# CSV Options: key/value pairs that will be passed as argument to DataFrame.to_csv
# method.
Expand Down
18 changes: 7 additions & 11 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from sqlalchemy.types import TypeEngine
from werkzeug.utils import secure_filename

from superset import app, db, sql_parse
from superset import app, sql_parse
from superset.utils import core as utils

if TYPE_CHECKING:
Expand Down Expand Up @@ -388,15 +388,17 @@ def df_to_sql(cls, df: pd.DataFrame, **kwargs): # pylint: disable=invalid-name
df.to_sql(**kwargs)

@classmethod
def create_table_from_csv(cls, form, table):
""" Create table (including metadata in backend) from contents of a csv.
def create_table_from_csv(cls, form) -> None:
"""
Create table from contents of a csv. Note: this method does not create
metadata for the table.
:param form: Parameters defining how to process data
:param table: Metadata of new table to be created
"""

def _allowed_file(filename: str) -> bool:
# Only allow specific file extensions as specified in the config
extension = os.path.splitext(filename)[1]
extension = os.path.splitext(filename)[1].lower()
return (
extension is not None and extension[1:] in config["ALLOWED_EXTENSIONS"]
)
Expand Down Expand Up @@ -432,12 +434,6 @@ def _allowed_file(filename: str) -> bool:
}
cls.df_to_sql(**df_to_sql_kwargs)

table.user_id = g.user.id
table.schema = form.schema.data
table.fetch_metadata()
db.session.add(table)
db.session.commit()

@classmethod
def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
"""
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def fetch_data(cls, cursor, limit: int) -> List[Tuple]:
return []

@classmethod
def create_table_from_csv(cls, form, table): # pylint: disable=too-many-locals
def create_table_from_csv(cls, form) -> None: # pylint: disable=too-many-locals
"""Uploads a csv file and creates a superset datasource in Hive."""

def convert_to_hive_type(col_type):
Expand Down
3 changes: 1 addition & 2 deletions superset/translations/en/LC_MESSAGES/messages.po
Original file line number Diff line number Diff line change
Expand Up @@ -4669,7 +4669,7 @@ msgid "CSV to Database configuration"
msgstr ""

#: superset/views/core.py:375
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr ""

#: superset/views/core.py:401 superset/views/core.py:667
Expand Down Expand Up @@ -7765,4 +7765,3 @@ msgstr ""

#~ msgid "Saved Queries"
#~ msgstr ""

2 changes: 1 addition & 1 deletion superset/translations/fr/LC_MESSAGES/messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -3124,7 +3124,7 @@
"CSV to Database configuration": [
""
],
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
""
],
"User": [
Expand Down
4 changes: 2 additions & 2 deletions superset/translations/fr/LC_MESSAGES/messages.po
Original file line number Diff line number Diff line change
Expand Up @@ -4880,8 +4880,8 @@ msgid "CSV to Database configuration"
msgstr "CSV vers la configuration de la base de données"

#: superset/views/core.py:375
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgstr "Fichier CSV \"{0}\" chargé dans la table \"{1}\" de la base de données \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr "Fichier CSV \"%(csv_filename)s\" chargé dans la table \"%(table_name)s\" de la base de données \"%(db_name)s\""

#: superset/views/core.py:401 superset/views/core.py:667
#: superset/views/sql_lab.py:23 superset/views/sql_lab.py:60
Expand Down
4 changes: 2 additions & 2 deletions superset/translations/it/LC_MESSAGES/messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -2923,7 +2923,7 @@
"CSV to Database configuration": [
""
],
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
""
],
"User": [
Expand Down Expand Up @@ -3132,4 +3132,4 @@
]
}
}
}
}
3 changes: 1 addition & 2 deletions superset/translations/it/LC_MESSAGES/messages.po
Original file line number Diff line number Diff line change
Expand Up @@ -4432,7 +4432,7 @@ msgid "CSV to Database configuration"
msgstr ""

#: superset/views/core.py:363
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr ""

#: superset/views/core.py:389 superset/views/core.py:655
Expand Down Expand Up @@ -6461,4 +6461,3 @@ msgstr "Query salvate"

#~ msgid "Slice"
#~ msgstr "Slice"

2 changes: 1 addition & 1 deletion superset/translations/ko/LC_MESSAGES/messages.po
Original file line number Diff line number Diff line change
Expand Up @@ -3788,7 +3788,7 @@ msgstr "대시보드 가져오기"
msgid "CSV to Database configuration"
msgstr ""

msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr ""

msgid "User"
Expand Down
3 changes: 1 addition & 2 deletions superset/translations/messages.pot
Original file line number Diff line number Diff line change
Expand Up @@ -4669,7 +4669,7 @@ msgid "CSV to Database configuration"
msgstr ""

#: superset/views/core.py:375
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr ""

#: superset/views/core.py:401 superset/views/core.py:667
Expand Down Expand Up @@ -4967,4 +4967,3 @@ msgstr ""
#: superset/views/sql_lab.py:86
msgid "Saved Queries"
msgstr ""

6 changes: 3 additions & 3 deletions superset/translations/ru/LC_MESSAGES/messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -2923,8 +2923,8 @@
"CSV to Database configuration": [
"Настройка CSV для БД"
],
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
"CSV-файл \"{0}\" загружен в таблицу \"{1}\" базы данных \"{2}\""
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
"CSV-файл \"%(csv_filename)s\" загружен в таблицу \"%(table_name)s\" базы данных \"%(db_name)s\""
],
"User": [
"Пользователь"
Expand Down Expand Up @@ -3132,4 +3132,4 @@
]
}
}
}
}
4 changes: 2 additions & 2 deletions superset/translations/ru/LC_MESSAGES/messages.po
Original file line number Diff line number Diff line change
Expand Up @@ -4544,8 +4544,8 @@ msgid "CSV to Database configuration"
msgstr "Настройка CSV для БД"

#: superset/views/core.py:363
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgstr "CSV-файл \"{0}\" загружен в таблицу \"{1}\" базы данных \"{2}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr "CSV-файл \"%(csv_filename)s\" загружен в таблицу \"%(table_name)s\" базы данных \"%(db_name)s\""

#: superset/views/core.py:389 superset/views/core.py:655
#: superset/views/sql_lab.py:16 superset/views/sql_lab.py:53
Expand Down
4 changes: 2 additions & 2 deletions superset/translations/zh/LC_MESSAGES/messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -3124,8 +3124,8 @@
"CSV to Database configuration": [
"csv 到数据库配置"
],
"CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
"csv 文件 \"{0}\" 上传到数据库 \"{2}\" 中的表 \"{1}\""
"CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [
"csv 文件 \"%(csv_filename)s\" 上传到数据库 \"%(db_name)s\" 中的表 \"%(table_name)s\""
],
"User": [
"用户"
Expand Down
4 changes: 2 additions & 2 deletions superset/translations/zh/LC_MESSAGES/messages.po
Original file line number Diff line number Diff line change
Expand Up @@ -4746,8 +4746,8 @@ msgid "CSV to Database configuration"
msgstr "csv 到数据库配置"

#: superset/views/core.py:375
msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
msgstr "csv 文件 \"{0}\" 上传到数据库 \"{2}\" 中的表 \"{1}\""
msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\""
msgstr "csv 文件 \"%(csv_filename)s\" 上传到数据库 \"%(db_name)s\" 中的表 \"%(table_name)s\""

#: superset/views/core.py:401 superset/views/core.py:667
#: superset/views/sql_lab.py:23 superset/views/sql_lab.py:60
Expand Down
2 changes: 1 addition & 1 deletion superset/views/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3049,7 +3049,7 @@ def schemas_access_for_csv_upload(self):
except Exception:
return json_error_response(
"Failed to fetch schemas allowed for csv upload in this database! "
"Please contact Superset Admin!",
"Please contact your Superset Admin!",
stacktrace=utils.get_stacktrace(),
)

Expand Down
12 changes: 11 additions & 1 deletion superset/views/database/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,17 @@ def at_least_one_schema_is_allowed(database):
csv_file = FileField(
_("CSV File"),
description=_("Select a CSV file to be uploaded to a database."),
validators=[FileRequired(), FileAllowed(["csv"], _("CSV Files Only!"))],
validators=[
FileRequired(),
FileAllowed(
config["ALLOWED_EXTENSIONS"],
_(
"Only the following file extensions are allowed: "
"%(allowed_extensions)s",
allowed_extensions=", ".join(config["ALLOWED_EXTENSIONS"]),
),
),
],
)
con = QuerySelectField(
_("Database"),
Expand Down
64 changes: 45 additions & 19 deletions superset/views/database/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# pylint: disable=C,R,W
import os

from flask import flash, redirect
from flask import flash, g, redirect
from flask_appbuilder import SimpleFormView
from flask_appbuilder.forms import DynamicForm
from flask_appbuilder.models.sqla.interface import SQLAInterface
Expand All @@ -28,7 +28,7 @@
from wtforms.validators import ValidationError

import superset.models.core as models
from superset import app, appbuilder, security_manager
from superset import app, appbuilder, db, security_manager
from superset.connectors.sqla.models import SqlaTable
from superset.utils import core as utils
from superset.views.base import DeleteMixin, SupersetModelView, YamlExportMixin
Expand Down Expand Up @@ -102,10 +102,10 @@ def form_post(self, form):

if not self.is_schema_allowed(database, schema_name):
message = _(
'Database "{0}" Schema "{1}" is not allowed for csv uploads. '
"Please contact Superset Admin".format(
database.database_name, schema_name
)
'Database "%(database_name)s" schema "%(schema_name)s" '
"is not allowed for csv uploads. Please contact your Superset Admin.",
database_name=database.database_name,
schema_name=schema_name,
)
flash(message, "danger")
return redirect("/csvtodatabaseview/form")
Expand All @@ -117,32 +117,58 @@ def form_post(self, form):
try:
utils.ensure_path_exists(config["UPLOAD_FOLDER"])
csv_file.save(path)
table = SqlaTable(table_name=form.name.data)
table.database = form.data.get("con")
table.database_id = table.database.id
table.database.db_engine_spec.create_table_from_csv(form, table)
table_name = form.name.data
database = form.data.get("con")
database.db_engine_spec.create_table_from_csv(form)

table = (
db.session.query(SqlaTable)
.filter_by(
table_name=table_name,
schema=form.schema.data,
database_id=database.id,
)
.one_or_none()
)
if table:
table.fetch_metadata()
if not table:
table = SqlaTable(table_name=table_name)
table.database = database
table.database_id = database.id
table.user_id = g.user.id
table.schema = form.schema.data
table.fetch_metadata()
db.session.add(table)
db.session.commit()
except Exception as e:
db.session.rollback()
try:
os.remove(path)
except OSError:
pass
message = (
"Table name {} already exists. Please pick another".format(
form.name.data
)
if isinstance(e, IntegrityError)
else str(e)
message = _(
'Unable to upload CSV file "%(filename)s" to table '
'"%(table_name)s" in database "%(db_name)s". '
"Error message: %(error_msg)s",
filename=csv_filename,
table_name=form.name.data,
db_name=database.database_name,
error_msg=str(e),
)

flash(message, "danger")
stats_logger.incr("failed_csv_upload")
return redirect("/csvtodatabaseview/form")

os.remove(path)
# Go back to welcome page / splash screen
db_name = table.database.database_name
message = _(
'CSV file "{0}" uploaded to table "{1}" in '
'database "{2}"'.format(csv_filename, form.name.data, db_name)
'CSV file "%(csv_filename)s" uploaded to table "%(table_name)s" in '
'database "%(db_name)s"',
csv_filename=csv_filename,
table_name=form.name.data,
db_name=table.database.database_name,
)
flash(message, "info")
stats_logger.incr("successful_csv_upload")
Expand Down
Loading

0 comments on commit 49ea232

Please sign in to comment.