Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(chartdata): disable sqlparse calls for chart data requests to improve querying performance #19572

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions superset/common/query_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,10 @@ def validate(
try:
self._validate_there_are_no_missing_series()
self._validate_no_have_duplicate_labels()
# problematic line where calling sqlparser.parse() causes quadratic
# performance for WHERE ... IN (...) clauses
# Clauses are anyway checked for their validity in
# e.g., connectors/sqla/models/get_query_str_extended
self._sanitize_filters()
return None
except QueryObjectValidationError as ex:
Expand Down
4 changes: 2 additions & 2 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1761,7 +1761,7 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument
# def DATASET_HEALTH_CHECK(datasource: SqlaTable) -> Optional[str]:
# if (
# datasource.sql and
# len(sql_parse.ParsedQuery(datasource.sql, strip_comments=True).tables) == 1
# len(sql_parse.ParsedQuery(datasource.sql).tables) == 1
# ):
# return (
# "This virtual dataset queries only one table and therefore could be "
Expand Down Expand Up @@ -1900,7 +1900,7 @@ class ExtraDynamicQueryFilters(TypedDict, total=False):
elif importlib.util.find_spec("superset_config") and not is_test():
try:
# pylint: disable=import-error,wildcard-import,unused-wildcard-import
import superset_config
import superset_config as superset_config
from superset_config import * # noqa: F403, F401

click.secho(
Expand Down
7 changes: 4 additions & 3 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

import pandas as pd
import requests
import sqlparse
import sqlglot
from apispec import APISpec
from apispec.ext.marshmallow import MarshmallowPlugin
from deprecation import deprecated
Expand Down Expand Up @@ -1236,7 +1236,7 @@ def get_cte_query(cls, sql: str) -> str | None:

"""
if not cls.allows_cte_in_subquery:
stmt = sqlparse.parse(sql)[0]
stmt = sqlglot.tokenize(sql)

# The first meaningful token for CTE will be with WITH
idx, token = stmt.token_next(-1, skip_ws=True, skip_cm=True)
Expand Down Expand Up @@ -2158,7 +2158,8 @@ def cancel_query( # pylint: disable=unused-argument

@classmethod
def parse_sql(cls, sql: str) -> list[str]:
return [str(s).strip(" ;") for s in sqlparse.parse(sql)]
return sqlglot.transpile(sql)
# return [str(s).strip(" ;") for s in sqlparse.parse(sql)]

@classmethod
def get_impersonation_key(cls, user: User | None) -> Any:
Expand Down
57 changes: 30 additions & 27 deletions superset/sql_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@
from collections.abc import Iterator
from typing import Any, cast, TYPE_CHECKING

import sqlglot
import sqlparse
from flask_babel import gettext as __
from jinja2 import nodes
from sqlalchemy import and_
from sqlglot.dialects.dialect import Dialects
from sqlglot.errors import ParseError
from sqlparse import keywords
from sqlparse.lexer import Lexer
from sqlparse.sql import (
Expand All @@ -42,7 +44,6 @@
Where,
)
from sqlparse.tokens import (
Comment,
CTE,
DDL,
DML,
Expand Down Expand Up @@ -255,12 +256,9 @@ class ParsedQuery:
def __init__(
self,
sql_statement: str,
strip_comments: bool = False,
engine: str = "base",
):
if strip_comments:
sql_statement = sqlparse.format(sql_statement, strip_comments=True)

sql_statement = sqlglot.transpile(sql_statement)
self.sql: str = sql_statement
self._engine = engine
self._dialect = SQLGLOT_DIALECTS.get(engine) if engine else None
Expand Down Expand Up @@ -583,30 +581,35 @@ def set_or_update_query_limit(self, new_limit: int, force: bool = False) -> str:

def sanitize_clause(clause: str) -> str:
# clause = sqlparse.format(clause, strip_comments=True)
statements = sqlparse.parse(clause)
try:
statements = sqlglot.transpile(clause, pretty=True)
except Exception as p_err:
if isinstance(p_err, ParseError):
raise QueryClauseValidationException(str(p_err)) from p_err
raise ValueError(str(p_err)) from None
if len(statements) != 1:
raise QueryClauseValidationException("Clause contains multiple statements")
open_parens = 0

previous_token = None
for token in statements[0]:
if token.value == "/" and previous_token and previous_token.value == "*":
raise QueryClauseValidationException("Closing unopened multiline comment")
if token.value == "*" and previous_token and previous_token.value == "/":
raise QueryClauseValidationException("Unclosed multiline comment")
if token.value in (")", "("):
open_parens += 1 if token.value == "(" else -1
if open_parens < 0:
raise QueryClauseValidationException(
"Closing unclosed parenthesis in filter clause"
)
previous_token = token
if open_parens > 0:
raise QueryClauseValidationException("Unclosed parenthesis in filter clause")

if previous_token and previous_token.ttype in Comment:
if previous_token.value[-1] != "\n":
clause = f"{clause}\n"
# open_parens = 0

# previous_token = None
# for token in statements[0]:
# if token.value == "/" and previous_token and previous_token.value == "*":
# raise QueryClauseValidationException("Closing unopened multiline comment")
# if token.value == "*" and previous_token and previous_token.value == "/":
# raise QueryClauseValidationException("Unclosed multiline comment")
# if token.value in (")", "("):
# open_parens += 1 if token.value == "(" else -1
# if open_parens < 0:
# raise QueryClauseValidationException(
# "Closing unclosed parenthesis in filter clause"
# )
# previous_token = token
# if open_parens > 0:
# raise QueryClauseValidationException("Unclosed parenthesis in filter clause")

# if previous_token and previous_token.ttype in Comment:
# if previous_token.value[-1] != "\n":
# clause = f"{clause}\n"

return clause

Expand Down
1 change: 1 addition & 0 deletions tests/example_data/test-long-where-clause-1k.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Liam,James,Noah,Wyatt,Gabriel,Lucas,Ethan,Alexander,Joseph,Benjamin,William,Logan,Mason,Jack,John,Asher,Elijah,Daniel,Henry,Jacob,Jaxon,Michael,Oliver,Hunter,David,Levi,Matthew,Landon,Aiden,Isaac,Jackson,Caleb,Ryan,Elias,Connor,Evan,Joshua,Samuel,Christian,Jayden,Jeremiah,Cooper,Eli,Robert,Ryder,Christopher,Colton,Josiah,Andrew,Austin,Carson,Jaxson,Jonathan,Luke,Malachi,Nathan,Owen,Blake,Lincoln,Ezra,Gavin,Thomas,Dylan,Grayson,Kai,Ryker,Zachary,Anthony,Isaiah,Jase,Jason,Micah,Sebastian,Silas,Titus,Bentley,Brody,Cameron,Carter,Chase,Gideon,Jace,Sawyer,Tristan,Tyler,Weston,Adam,Charles,Everett,Wesley,Xander,Brandon,Brayden,Nathaniel,Theodore,Xavier,Ashton,Avery,Dominic,Easton,Finn,George,Hudson,Ian,Jasper,Kayden,Marshall,Max,Maxwell,Miles,Orion,Richard,Timothy,Abel,Drake,Garrett,Jameson,Jayce,Joel,Kenneth,Maximus,Nicholas,Parker,Travis,Cody,Dean,Declan,Elliot,Ezekiel,Karter,Nolan,Patrick,Riley,Seth,Solomon,Steven,Victor,Waylon,Aaron,August,Bradley,Braxton,Bryce,Calvin,Camden,Cayden,Charlie,Cole,Damian,Dawson,Eric,Greyson,Jake,Jeffrey,Jesse,Jonah,Julian,Kaiden,Killian,Kingston,Maddox,Matthias,Maverick,Odin,Paul,Peter,Roman,Trevor,Zane,Alex,Archer,Caden,Collin,Colt,Edward,Gage,Gunner,Harrison,Ivan,Jax,Leo,Lukas,Marcus,Paxton,Soren,Sullivan,Tanner,Trenton,Troy,Tucker,Vincent,Walter,Warren,Adrian,Augustus,Axel,Beckett,Cade,Clayton,Dante,Emmett,Felix,Grant,Hatcher,Jordan,Jude,Julius,Kaleb,Kevin,Remington,Rowan,Russell,Simon,Skyler,Stephen,Sterling,Talon,Westin,Aidan,Alden,Angelo,Arthur,Atticus,Barrett,Brantley,Brooks,Bruce,Cash,Cyrus,Derek,Desmond,Erik,Griffin,Hayden,Jeremy,Justice,Kaden,Kane,Kyle,Leif,Mark,Mateo,Miguel,Myles,Nikolai,Paxson,Peyton,Raymond,Reid,River,Rodney,Roland,Ronin,Rylan,Shawn,Spencer,Thaddeus,Tobias,Tyson,Walker,William,James,John,Mason,Elijah,Noah,Jackson,Aiden,Jacob,Joshua,Samuel,Michael,Ethan,Carter,Liam,Grayson,Christopher,Jayden,Logan,Hunter,Andrew,Landon,Caleb,Gabriel,Joseph,Charles,David,Jaxon,Colton,Luke,Ayden,Levi,Cameron,Matthew,Christian,Carson,Wyatt,Alexander,Jordan,Benjamin,Robert,Daniel,Kayden,Lucas,Easton,Bentley,Eli,Brantley,Braxton,Brayden,Jonathan,Henry,Anthony,Thomas,Jeremiah,Jace,Connor,Sawyer,Austin,Bryson,Oliver,Parker,Cooper,Hudson,Dylan,Isaac,Tyler,Isaiah,Josiah,Nathan,Karter,Brandon,Jase,Timothy,Jack,Kingston,Nicholas,Gavin,Tucker,Ashton,Tristan,Asher,Justin,Kaleb,Chase,Brody,Jayceon,Owen,Kaiden,Jaxson,Kevin,Ryan,Aaron,Cayden,Ian,Jason,Jayce,Micah,Ryder,Evan,Greyson,Hayden,Kaden,Harrison,Karson,Camden,Braylen,Rylan,Zachary,Zayden,Adam,Jose,Kameron,Wesley,Barrett,Marcus,Nolan,Blake,Dallas,Gunner,Kenneth,Malachi,Preston,Silas,Bryce,Collin,Antonio,Caden,Jonah,King,Peyton,Adrian,Maddox,Messiah,Brooks,Dawson,Trenton,Avery,Bennett,Lincoln,Steven,Alex,Richard,Sebastian,Weston,Braylon,Brian,Dalton,Ezra,Jaden,Jaiden,Riley,Tanner,Zion,Corbin,Walker,Xavier,Amari,Judah,Lawson,Maxwell,River,Jude,Angel,Carlos,Eric,Kason,Griffin,Juan,Princeton,Ryker,Abel,Bryan,Cason,Derrick,George,Roman,Aidan,Conner,Everett,Garrett,Jaylen,Miles,Stephen,Dominic,Emmett,Jameson,Johnny,Julian,Kyler,Zane,Bradley,Cole,Edward,Elias,Graham,Jesse,Kai,Nathaniel,Paul,Bryant,Caiden,Clayton,Gregory,Jeremy,Judson,Kylan,Patrick,Walter,Alan,Charlie,Colin,Landen,Chandler,Luis,Titus,Waylon,Anderson,Axel,Cody,Ezekiel,Grant,Jax,Kayson,Miguel,Tyson,Chance,Declan,Jett,Kyle,Vincent,Amir,Braydon,Johnathan,Seth,Travis,Brentley,Case,Devin,Jasper,Jeffery,Joel,Keegan,Marshall,Skyler,Cade,Fisher,Jensen,Max,Paxton,Rhett,Spencer,Trevor,Calvin,Cohen,Colt,Kendrick,Major,Taylor,Xander,Zander,Beau,Braden,Brycen,Curtis,Finn,Iker,Ivan,Jamari,Jesus,Mark,Maverick,Myles,Reid,Troy,Victor,Zaiden,Casen,Cash,Colby,Damien,Ellis,Grady,Houston,Kamden,Kolton,Landyn,Malcolm,Phillip,Warren,Brodie,Corey,Cristian,Dustin,Elliott,Jeffrey,Kasen,Raylan,Rowan,Wilson,Aden,Cedric,Coleman,Diego,Edwin,Gage,Jake,Kash,Prince,Ronald,Russell,Ryland,Theodore,Abram,Ace,Damian,Demarcus,Hayes,Holden,Jamarcus,Javier,Mateo,Maurice,Nicolas,Remington,Terry,Tony,Abraham,Brantlee,Carmelo,Demetrius,Dillon,Emmanuel,Israel,Jamichael,Jerry,Kobe,Kylen,Lane,Phoenix,Willie,August,Baylor,Channing,Daxton,Desmond,Donald,Donovan,Eduardo,Emerson,Jimmy,Kamari,Kendall,Porter,Skylar,Alejandro,Asa,Beckett,Bentlee,Bo,Brady,Brendan,Brock,Cannon,Casey,Courtney,Danny,Deacon,Dean,Elliot,Gannon,Jakobe,Jamarion,Jaxton,Jermaine,Jon,Leonardo,Luca,Morgan,Nehemiah,Pedro,Sylas,Tripp,Triston,Turner,Alexis,Andre,Brennan,Bruce,Cayson,Cornelius,Crimson,Davion,Davis,Deandre,Dennis,Franklin,Frederick,Gary,Harley,Jalen,Justice,Kade,Kaison,Kyson,Layton,Legend,Malik,Manuel,Micheal,Miller,Oakley,Omar,Reed,Ricardo,Ricky,Simon,Terrance,Trent,Zechariah,Archer,Dakota,Derek,Dexter,Erick,Felix,Jagger,Jasiah,Jorge,Keaton,Khalil,Kyrie,Ladarius,Leland,Leo,London,Lorenzo,Lukas,Maximus,Omari,Oscar,Quinton,Reese,Reginald,Rodney,Samson,Sean,Shane,Trace,Zachariah,Zaylen,Adan,Atticus,Branson,Braylin,Damon,Darius,Dominick,Dorian,Fletcher,Francisco,Issac,Jamison,Jared,Javion,Jay,Joe,Jordyn,Josue,Kamryn,Knox,Kolten,Kristian,Lance,Lathan,Louis,Lyric,Maison,Mitchell,Neymar,Peter,Randall,Reece,Scott,Shawn,Solomon,Tatum,Terrence,Tylan,Ahmad,Alijah,Andres,Andy,Beckham,Benton,Billy,Blaine,Bowen,Brenton,Byron,Camdyn,Cullen,Deangelo,Devon,Drake,Drew,Emory,Finley,Gael,Gerald,Gideon,Gunnar,Jacoby,Jakaiden,Jaydon,Jayson,Karsyn,Keagan,Keelan,Keith,Kristopher,Landan,Leon,Macon,Mario,Markell,Martin,Marvin,Raymond,Roderick,Sutton,Uriah,Allen,Amarion,Amos,Brayan,Brayson,Brenden,Callen,Camron,Chevy,Colten,Cortez,Dante,Deshawn,Eddie,Emanuel,Fernando,Foster,Giovanni,Hector,Izaiah,Jabari,Jakob,Jamir,Jaxen,Jayveon,Jayvion,Jorden,Kane,Kaysen,Keandre,Keller,Kenny,Kody,Konnor,Lamar,Langston,Larry,Lewis,Malakai,Mayson,Mohammed,Payton,Philip,Pierce,Quentin,Ridge,Ronan,Ronnie,Roy,Rylee,Santiago,Shepherd,Sullivan,Talon,Thaddeus,Toby,Tristen,Tristin,Will,Xzavier,Zackary,Zackery,Zayne,Zyon,Aiyden,Albert,Ali,Alton,Anakin,Angelo,Armani,Arthur,Aston,Barron,Ben,Bishop,Brent,Briar,Brylon,Camren,Canaan,Carl,Cashton,Caysen,Clark,Clinton,Damion,Darren,Daylen,Dayton,Douglas,Elisha,Enoch,Erik,Felipe,Forrest,Frank,Gatlin,Graysen,Isiah,Jacen,Jaceyon,Jaiceon,Jakaden,Jakari,Jakayden,Jamie,Jaquan,Jarvis,Jasen,Jaylon,Jerome,Justus,Kaidyn,Kalel,Kamron,Kashton,Kellan,Kelvin,Kemari,Kendarius,Kharter,Kohen,Kooper,Kymani,Lamarcus,Lawrence,Layne,Leonel,Marquis,Memphis,Nash,Nigel,Omarion,Pablo,Pierson,Randy,Ray,Rayden,Roland,Ruben,Stanley,Stephon,Sterling,Taylon,Tommy,Tristian,Wade,Wallace,Wells,Zamarion,Zayvion,Adrien,Aidyn,Alden,Alec,Alvin,Antwan,Armando,Aubrey,Aydon,Benson,Braylan,Brentlee,Brylan,Carsen,Chris,Christon,Collins,Conor,Corbyn,Coy,Crew,Dakari,Darrell,Darrion,Darryl,Dax,Deanthony,Demari,Deon,Dewayne,Dontavious,Draven,Eden,Emery,Ezequiel,Finnegan,Francis,Gauge,Gentry,Hank,Harlem,Harper,Harris,Hollis,Hosea,Jessie,Johan,Johnathon,Julien,Julio,Julius,Junior,Kace,Kaisen,Karsen,Kaydon,
38 changes: 38 additions & 0 deletions tests/integration_tests/charts/data/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import unittest
import copy
import numpy as np
from datetime import datetime
from io import BytesIO
import time
Expand Down Expand Up @@ -642,6 +643,43 @@ def test_with_invalid_where_parameter_closing_unclosed__400(self):

assert rv.status_code == 400

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_long_where_parameter(self):
long_clause = np.loadtxt(
"tests/example_data/test-long-where-clause-1k.txt", delimiter=",", dtype=str
).tolist()
self.query_context_payload["queries"][0]["filters"][2]["val"] = long_clause
start_a = time.time()
rv = self.post_assert_metric(CHART_DATA_URI, self.query_context_payload, "data")
assert rv.status_code == 200
compute_time_a = time.time() - start_a
table = self.get_table_by_id(1)
del self.query_context_payload["queries"][0]["time_range"]
sqla_query = table.get_sqla_query(
**{
"granularity": None,
"from_dttm": None,
"to_dttm": None,
"groupby": ["gender"],
"metrics": ["count"],
"is_timeseries": False,
"filter": self.query_context_payload["queries"][0]["filters"],
"extras": {},
}
)
start_b = time.time()
sql = table.database.compile_sqla_query(sqla_query.sqla_query)
result = table.database.get_df(sql, schema=table.schema)
assert result
compute_time_b = time.time() - start_b

buffer_time = 2
self.assertLess(
compute_time_a,
compute_time_b + buffer_time,
f"computation of post query payload: {compute_time_a} sec is higher than unparsed sqla querying time: {compute_time_b} sec plus {buffer_time} sec buffer time",
)

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_with_where_parameter_including_comment___200(self):
self.query_context_payload["queries"][0]["filters"] = []
Expand Down
29 changes: 9 additions & 20 deletions tests/unit_tests/sql_parse_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from unittest.mock import Mock

import pytest
import sqlglot
import sqlparse
from pytest_mock import MockerFixture
from sqlalchemy import text
Expand Down Expand Up @@ -818,9 +819,7 @@ def test_is_valid_ctas() -> None:

A valid CTAS has a ``SELECT`` as its last statement.
"""
assert (
ParsedQuery("SELECT * FROM table", strip_comments=True).is_valid_ctas() is True
)
assert ParsedQuery("SELECT * FROM table").is_valid_ctas() is True

assert (
ParsedQuery(
Expand All @@ -829,7 +828,6 @@ def test_is_valid_ctas() -> None:
SELECT * FROM table
-- comment 2
""",
strip_comments=True,
).is_valid_ctas()
is True
)
Expand All @@ -842,7 +840,6 @@ def test_is_valid_ctas() -> None:
SELECT @value as foo;
-- comment 2
""",
strip_comments=True,
).is_valid_ctas()
is True
)
Expand All @@ -854,7 +851,6 @@ def test_is_valid_ctas() -> None:
EXPLAIN SELECT * FROM table
-- comment 2
""",
strip_comments=True,
).is_valid_ctas()
is False
)
Expand All @@ -865,7 +861,6 @@ def test_is_valid_ctas() -> None:
SELECT * FROM table;
INSERT INTO TABLE (foo) VALUES (42);
""",
strip_comments=True,
).is_valid_ctas()
is False
)
Expand All @@ -877,9 +872,7 @@ def test_is_valid_cvas() -> None:

A valid CVAS has a single ``SELECT`` statement.
"""
assert (
ParsedQuery("SELECT * FROM table", strip_comments=True).is_valid_cvas() is True
)
assert ParsedQuery("SELECT * FROM table").is_valid_cvas() is True

assert (
ParsedQuery(
Expand All @@ -888,7 +881,6 @@ def test_is_valid_cvas() -> None:
SELECT * FROM table
-- comment 2
""",
strip_comments=True,
).is_valid_cvas()
is True
)
Expand All @@ -901,7 +893,6 @@ def test_is_valid_cvas() -> None:
SELECT @value as foo;
-- comment 2
""",
strip_comments=True,
).is_valid_cvas()
is False
)
Expand All @@ -913,7 +904,6 @@ def test_is_valid_cvas() -> None:
EXPLAIN SELECT * FROM table
-- comment 2
""",
strip_comments=True,
).is_valid_cvas()
is False
)
Expand All @@ -924,7 +914,6 @@ def test_is_valid_cvas() -> None:
SELECT * FROM table;
INSERT INTO TABLE (foo) VALUES (42);
""",
strip_comments=True,
).is_valid_cvas()
is False
)
Expand Down Expand Up @@ -1180,17 +1169,17 @@ def test_messy_breakdown_statements() -> None:
]


def test_sqlparse_formatting():
def test_sqlglot_formatting():
"""
Test that ``from_unixtime`` is formatted correctly.
"""
assert sqlparse.format(
assert sqlglot.transpile(
"SELECT extract(HOUR from from_unixtime(hour_ts) "
"AT TIME ZONE 'America/Los_Angeles') from table",
reindent=True,
) == (
"SELECT extract(HOUR\n from from_unixtime(hour_ts) "
"AT TIME ZONE 'America/Los_Angeles')\nfrom table"
pretty=True,
)[0] == (
"SELECT\n EXTRACT(HOUR FROM FROM_UNIXTIME(hour_ts) AT TIME ZONE 'America/Los_Angeles')"
"\nFROM table"
)


Expand Down
Loading