Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(sql): unable to filter text with quotes #17881

Merged
merged 12 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion superset/connectors/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def handle_single_value(value: Optional[FilterValue]) -> Optional[FilterValue]:
):
return datetime.utcfromtimestamp(value / 1000)
if isinstance(value, str):
value = value.strip("\t\n'\"")
value = value.strip("\t\n")

if target_column_type == utils.GenericDataType.NUMERIC:
# For backwards compatibility and edge cases
Expand Down
2 changes: 1 addition & 1 deletion tests/integration_tests/druid_func_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def test_get_filters_extracts_values_in_quotes(self):
col = DruidColumn(column_name="A")
column_dict = {"A": col}
res = DruidDatasource.get_filters([filtr], [], column_dict)
self.assertEqual("a", res.filter["filter"]["value"])
self.assertEqual('"a"', res.filter["filter"]["value"])

@unittest.skipUnless(
SupersetTestCase.is_module_installed("pydruid"), "pydruid not installed"
Expand Down
2 changes: 1 addition & 1 deletion tests/integration_tests/druid_func_tests_sip38.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def test_get_filters_extracts_values_in_quotes(self):
col = DruidColumn(column_name="A")
column_dict = {"A": col}
res = DruidDatasource.get_filters([filtr], [], column_dict)
self.assertEqual("a", res.filter["filter"]["value"])
self.assertEqual('"a"', res.filter["filter"]["value"])

@unittest.skipUnless(
SupersetTestCase.is_module_installed("pydruid"), "pydruid not installed"
Expand Down
2 changes: 1 addition & 1 deletion tests/integration_tests/query_context_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ def test_query_response_type(self):
assert re.search(r'[`"\[]?num[`"\]]? IS NOT NULL', sql_text)
assert re.search(
r"""NOT \([`"\[]?name[`"\]]? IS NULL[\s\n]* """
r"""OR [`"\[]?name[`"\]]? IN \('abc'\)\)""",
r"""OR [`"\[]?name[`"\]]? IN \('"abc"'\)\)""",
sql_text,
)

Expand Down
170 changes: 112 additions & 58 deletions tests/integration_tests/sqla_models_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
load_birth_names_dashboard_with_slices,
load_birth_names_data,
)
from tests.integration_tests.test_app import app

from .base_tests import SupersetTestCase

Expand Down Expand Up @@ -475,80 +476,133 @@ def test_labels_expected_on_mutated_query(self):
db.session.delete(database)
db.session.commit()

def test_values_for_column(self):

@pytest.fixture
def text_column_table():
with app.app_context():
table = SqlaTable(
table_name="test_null_in_column",
table_name="text_column_table",
sql=(
"SELECT 'foo' as foo "
"UNION SELECT '' "
"UNION SELECT NULL "
"UNION SELECT 'null'"
"UNION SELECT 'null' "
"UNION SELECT '\"text in double quotes\"' "
"UNION SELECT '''text in single quotes''' "
"UNION SELECT 'double quotes \" in text' "
"UNION SELECT 'single quotes '' in text' "
),
database=get_example_database(),
)
TableColumn(column_name="foo", type="VARCHAR(255)", table=table)
SqlMetric(metric_name="count", expression="count(*)", table=table)
yield table
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

table wasn't persisted so we no need delete command.


# null value, empty string and text should be retrieved
with_null = table.values_for_column(column_name="foo", limit=10000)
assert None in with_null
assert len(with_null) == 4

# null value should be replaced
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": [NULL_STRING], "op": "IN"}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1
def test_values_for_column_on_text_column(text_column_table):
# null value, empty string and text should be retrieved
with_null = text_column_table.values_for_column(column_name="foo", limit=10000)
assert None in with_null
assert len(with_null) == 8

# also accept None value
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": [None], "op": "IN"}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1

# empty string should be replaced
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": [EMPTY_STRING], "op": "IN"}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1
def test_filter_on_text_column(text_column_table):
table = text_column_table
# null value should be replaced
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": [NULL_STRING], "op": "IN"}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1

# also accept "" string
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": [""], "op": "IN"}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1
# also accept None value
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": [None], "op": "IN"}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1

# both replaced
result_object = table.query(
{
"metrics": ["count"],
"filter": [
{
"col": "foo",
"val": [EMPTY_STRING, NULL_STRING, "null", "foo"],
"op": "IN",
}
],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 4
# empty string should be replaced
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": [EMPTY_STRING], "op": "IN"}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1

# also accept "" string
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": [""], "op": "IN"}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1

# both replaced
result_object = table.query(
{
"metrics": ["count"],
"filter": [
{
"col": "foo",
"val": [EMPTY_STRING, NULL_STRING, "null", "foo"],
"op": "IN",
}
],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 4

# should filter text in double quotes
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": ['"text in double quotes"'], "op": "IN",}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1

# should filter text in single quotes
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": ["'text in single quotes'"], "op": "IN",}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1

# should filter text with single quotes
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": ['double quotes " in text'], "op": "IN",}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1

# should filter text with double quotes
result_object = table.query(
{
"metrics": ["count"],
"filter": [{"col": "foo", "val": ["single quotes ' in text"], "op": "IN",}],
"is_timeseries": False,
}
)
assert result_object.df["count"][0] == 1


@pytest.mark.parametrize(
Expand Down