diff --git a/eva/binder/statement_binder.py b/eva/binder/statement_binder.py index 6aa6c7f82..2e6014036 100644 --- a/eva/binder/statement_binder.py +++ b/eva/binder/statement_binder.py @@ -31,6 +31,7 @@ from eva.parser.alias import Alias from eva.parser.create_mat_view_statement import CreateMaterializedViewStatement from eva.parser.drop_statement import DropTableStatement +from eva.parser.explain_statement import ExplainStatement from eva.parser.load_statement import LoadDataStatement from eva.parser.select_statement import SelectStatement from eva.parser.statement import AbstractStatement @@ -75,6 +76,10 @@ def _bind_abstract_expr(self, node: AbstractExpression): for child in node.children: self.bind(child) + @bind.register(ExplainStatement) + def _bind_explain_statement(self, node: ExplainStatement): + self.bind(node.explainable_stmt) + @bind.register(SelectStatement) def _bind_select_statement(self, node: SelectStatement): self.bind(node.from_table) diff --git a/eva/executor/explain_executor.py b/eva/executor/explain_executor.py new file mode 100644 index 000000000..19e6ac385 --- /dev/null +++ b/eva/executor/explain_executor.py @@ -0,0 +1,41 @@ +# coding=utf-8 +# Copyright 2018-2022 EVA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pandas as pd + +from eva.executor.abstract_executor import AbstractExecutor +from eva.models.storage.batch import Batch +from eva.planner.abstract_plan import AbstractPlan +from eva.planner.explain_plan import ExplainPlan + + +class ExplainExecutor(AbstractExecutor): + def __init__(self, node: ExplainPlan): + super().__init__(node) + + def validate(self): + pass + + def exec(self): + # Traverse optimized physical plan, which is commonly supported. + # Logical plan can be also printted by passing explainable_opr + # attribute of the node, but is not done for now. + plan_str = self._exec(self._node.children[0], 0) + yield Batch(pd.DataFrame([plan_str])) + + def _exec(self, node: AbstractPlan, depth: int): + cur_str = " " * depth * 4 + "|__ " + str(node.__class__.__name__) + "\n" + for child in node.children: + cur_str += self._exec(child, depth + 1) + return cur_str diff --git a/eva/executor/plan_executor.py b/eva/executor/plan_executor.py index acf2e9150..a40707d62 100644 --- a/eva/executor/plan_executor.py +++ b/eva/executor/plan_executor.py @@ -20,6 +20,7 @@ from eva.executor.create_udf_executor import CreateUDFExecutor from eva.executor.drop_executor import DropExecutor from eva.executor.drop_udf_executor import DropUDFExecutor +from eva.executor.explain_executor import ExplainExecutor from eva.executor.function_scan_executor import FunctionScanExecutor from eva.executor.hash_join_executor import HashJoinExecutor from eva.executor.insert_executor import InsertExecutor @@ -121,9 +122,14 @@ def _build_execution_tree(self, plan: AbstractPlan) -> AbstractExecutor: executor_node = PredicateExecutor(node=plan) elif plan_opr_type == PlanOprType.SHOW_INFO: executor_node = ShowInfoExecutor(node=plan) - # Build Executor Tree for children - for children in plan.children: - executor_node.append_child(self._build_execution_tree(children)) + elif plan_opr_type == PlanOprType.EXPLAIN: + executor_node = ExplainExecutor(node=plan) + + # EXPLAIN does not need to build execution tree for its children + if plan_opr_type != PlanOprType.EXPLAIN: + # Build Executor Tree for children + for children in plan.children: + executor_node.append_child(self._build_execution_tree(children)) return executor_node diff --git a/eva/models/server/response.py b/eva/models/server/response.py index 7a9c9d1cd..43195a28c 100644 --- a/eva/models/server/response.py +++ b/eva/models/server/response.py @@ -48,12 +48,12 @@ def __str__(self): if self.query_time is not None: return ( "@status: %s\n" - "@batch: %s\n" + "@batch: \n %s\n" "@query_time: %s" % (self.status, self.batch, self.query_time) ) else: return ( "@status: %s\n" - "@batch: %s\n" + "@batch: \n %s\n" "@error: %s" % (self.status, self.batch, self.error) ) diff --git a/eva/models/storage/batch.py b/eva/models/storage/batch.py index 36dd5e2cf..b1953c3a7 100644 --- a/eva/models/storage/batch.py +++ b/eva/models/storage/batch.py @@ -126,13 +126,10 @@ def compare_is_contained(cls, batch1: Batch, batch2: Batch) -> None: ) def __str__(self) -> str: - # reduce the nesting depth to accelerate printing ndarray objects - with pd.option_context("display.pprint_nest_depth", 1): - return ( - "Batch Object:\n" - "@dataframe: %s\n" - "@batch_size: %d\n" % (self._frames, len(self)) - ) + with pd.option_context( + "display.pprint_nest_depth", 1, "display.max_colwidth", 100 + ): + return f"{self._frames}" def __eq__(self, other: Batch): return self._frames[sorted(self.columns)].equals( diff --git a/eva/optimizer/operators.py b/eva/optimizer/operators.py index b98abc83e..54258f751 100644 --- a/eva/optimizer/operators.py +++ b/eva/optimizer/operators.py @@ -54,6 +54,7 @@ class OperatorType(IntEnum): LOGICAL_CREATE_MATERIALIZED_VIEW = auto() LOGICAL_SHOW = auto() LOGICALDROPUDF = auto() + LOGICALEXPLAIN = auto() LOGICALDELIMITER = auto() @@ -1057,3 +1058,23 @@ def __eq__(self, other): def __hash__(self) -> int: return super().__hash__() + + +class LogicalExplain(Operator): + def __init__(self, children: List = None): + super().__init__(OperatorType.LOGICALEXPLAIN, children) + assert len(children) == 1, "EXPLAIN command only takes one child" + self._explainable_opr = children[0] + + @property + def explainable_opr(self): + return self._explainable_opr + + def __eq__(self, other): + is_subtree_equal = super().__eq__(other) + if not isinstance(other, LogicalExplain): + return False + return is_subtree_equal and self._explainable_opr == other.explainable_opr + + def __hash__(self) -> int: + return hash((super().__hash__(), self._explainable_opr)) diff --git a/eva/optimizer/rules/rules.py b/eva/optimizer/rules/rules.py index 9eb0c3040..f72b425fd 100644 --- a/eva/optimizer/rules/rules.py +++ b/eva/optimizer/rules/rules.py @@ -26,6 +26,7 @@ from eva.optimizer.rules.rules_base import Promise, Rule, RuleType from eva.parser.types import JoinType from eva.planner.create_mat_view_plan import CreateMaterializedViewPlan +from eva.planner.explain_plan import ExplainPlan from eva.planner.hash_join_build_plan import HashJoinBuildPlan from eva.planner.predicate_plan import PredicatePlan from eva.planner.project_plan import ProjectPlan @@ -42,6 +43,7 @@ LogicalCreateUDF, LogicalDrop, LogicalDropUDF, + LogicalExplain, LogicalFilter, LogicalFunctionScan, LogicalGet, @@ -807,5 +809,24 @@ def apply(self, before: LogicalShow, context: OptimizerContext): return after +class LogicalExplainToPhysical(Rule): + def __init__(self): + pattern = Pattern(OperatorType.LOGICALEXPLAIN) + pattern.append_child(Pattern(OperatorType.DUMMY)) + super().__init__(RuleType.LOGICAL_EXPLAIN_TO_PHYSICAL, pattern) + + def promise(self): + return Promise.LOGICAL_EXPLAIN_TO_PHYSICAL + + def check(self, grp_id: int, context: OptimizerContext): + return True + + def apply(self, before: LogicalExplain, context: OptimizerContext): + after = ExplainPlan(before.explainable_opr) + for child in before.children: + after.append_child(child) + return after + + # IMPLEMENTATION RULES END ############################################## diff --git a/eva/optimizer/rules/rules_base.py b/eva/optimizer/rules/rules_base.py index b8e3fb5b5..1baac8313 100644 --- a/eva/optimizer/rules/rules_base.py +++ b/eva/optimizer/rules/rules_base.py @@ -68,6 +68,7 @@ class RuleType(Flag): LOGICAL_PROJECT_TO_PHYSICAL = auto() LOGICAL_SHOW_TO_PHYSICAL = auto() LOGICAL_DROP_UDF_TO_PHYSICAL = auto() + LOGICAL_EXPLAIN_TO_PHYSICAL = auto() IMPLEMENTATION_DELIMETER = auto() NUM_RULES = auto() @@ -103,6 +104,7 @@ class Promise(IntEnum): LOGICAL_PROJECT_TO_PHYSICAL = auto() LOGICAL_SHOW_TO_PHYSICAL = auto() LOGICAL_DROP_UDF_TO_PHYSICAL = auto() + LOGICAL_EXPLAIN_TO_PHYSICAL = auto() IMPLEMENTATION_DELIMETER = auto() # TRANSFORMATION RULES (LOGICAL -> LOGICAL) diff --git a/eva/optimizer/rules/rules_manager.py b/eva/optimizer/rules/rules_manager.py index 159f7aa45..6679e2d1d 100644 --- a/eva/optimizer/rules/rules_manager.py +++ b/eva/optimizer/rules/rules_manager.py @@ -32,6 +32,7 @@ LogicalDerivedGetToPhysical, LogicalDropToPhysical, LogicalDropUDFToPhysical, + LogicalExplainToPhysical, LogicalFilterToPhysical, LogicalFunctionScanToPhysical, ) @@ -110,6 +111,7 @@ def __init__(self): if ray_enabled else SequentialLogicalProjectToPhysical(), LogicalShowToPhysical(), + LogicalExplainToPhysical(), ] if ray_enabled: diff --git a/eva/optimizer/statement_to_opr_convertor.py b/eva/optimizer/statement_to_opr_convertor.py index 1cd3eaff6..a84eddc84 100644 --- a/eva/optimizer/statement_to_opr_convertor.py +++ b/eva/optimizer/statement_to_opr_convertor.py @@ -19,6 +19,7 @@ LogicalCreateUDF, LogicalDrop, LogicalDropUDF, + LogicalExplain, LogicalFilter, LogicalFunctionScan, LogicalGet, @@ -40,6 +41,7 @@ from eva.parser.create_udf_statement import CreateUDFStatement from eva.parser.drop_statement import DropTableStatement from eva.parser.drop_udf_statement import DropUDFStatement +from eva.parser.explain_statement import ExplainStatement from eva.parser.insert_statement import InsertTableStatement from eva.parser.load_statement import LoadDataStatement from eva.parser.rename_statement import RenameTableStatement @@ -292,6 +294,10 @@ def visit_show(self, statement: ShowStatement): show_opr = LogicalShow(statement.show_type) self._plan = show_opr + def visit_explain(self, statement: ExplainStatement): + explain_opr = LogicalExplain([self.visit(statement.explainable_stmt)]) + self._plan = explain_opr + def visit(self, statement: AbstractStatement): """Based on the instance of the statement the corresponding visit is called. @@ -322,6 +328,8 @@ def visit(self, statement: AbstractStatement): self.visit_materialized_view(statement) elif isinstance(statement, ShowStatement): self.visit_show(statement) + elif isinstance(statement, ExplainStatement): + self.visit_explain(statement) return self._plan @property diff --git a/eva/parser/evaql/evaql_parser.g4 b/eva/parser/evaql/evaql_parser.g4 index 8a6584a09..b472353b7 100644 --- a/eva/parser/evaql/evaql_parser.g4 +++ b/eva/parser/evaql/evaql_parser.g4 @@ -35,7 +35,12 @@ dmlStatement ; utilityStatement - : simpleDescribeStatement | helpStatement | showStatement + : simpleDescribeStatement | helpStatement | showStatement | explainStatement + ; + +explainableStatement + : selectStatement | insertStatement | updateStatement | deleteStatement + | createMaterializedView ; // Data Definition Language @@ -378,6 +383,10 @@ showStatement : SHOW (UDFS | TABLES) ; +explainStatement + : EXPLAIN explainableStatement + ; + // Common Clauses // DB Objects diff --git a/eva/parser/explain_statement.py b/eva/parser/explain_statement.py new file mode 100644 index 000000000..e74f8b18c --- /dev/null +++ b/eva/parser/explain_statement.py @@ -0,0 +1,38 @@ +# coding=utf-8 +# Copyright 2018-2022 EVA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from eva.parser.statement import AbstractStatement +from eva.parser.types import StatementType + + +class ExplainStatement(AbstractStatement): + def __init__(self, explainable_stmt: AbstractStatement): + super().__init__(StatementType.EXPLAIN) + self._explainable_stmt = explainable_stmt + + def __str__(self) -> str: + print_str = "EXPLAIN {}".format(str(self._explainable_stmt)) + return print_str + + @property + def explainable_stmt(self) -> AbstractStatement: + return self._explainable_stmt + + def __eq__(self, other): + if not isinstance(other, ExplainStatement): + return False + return self._explainable_stmt == other.explainable_stmt + + def __hash__(self) -> int: + return hash((super().__hash__(), self.explainable_stmt)) diff --git a/eva/parser/parser_visitor/__init__.py b/eva/parser/parser_visitor/__init__.py index 1f803848c..719ded7e2 100644 --- a/eva/parser/parser_visitor/__init__.py +++ b/eva/parser/parser_visitor/__init__.py @@ -18,6 +18,7 @@ from eva.parser.parser_visitor._common_clauses_ids import CommonClauses from eva.parser.parser_visitor._create_statements import CreateTable from eva.parser.parser_visitor._drop_statement import DropTable +from eva.parser.parser_visitor._explain_statement import Explain from eva.parser.parser_visitor._expressions import Expressions from eva.parser.parser_visitor._functions import Functions from eva.parser.parser_visitor._insert_statements import Insert @@ -48,6 +49,7 @@ class ParserVisitor( RenameTable, DropTable, Show, + Explain, ): def visitRoot(self, ctx: evaql_parser.RootContext): for child in ctx.children: diff --git a/eva/parser/parser_visitor/_explain_statement.py b/eva/parser/parser_visitor/_explain_statement.py new file mode 100644 index 000000000..b0b7e36be --- /dev/null +++ b/eva/parser/parser_visitor/_explain_statement.py @@ -0,0 +1,23 @@ +# coding=utf-8 +# Copyright 2018-2022 EVA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from eva.parser.evaql.evaql_parser import evaql_parser +from eva.parser.evaql.evaql_parserVisitor import evaql_parserVisitor +from eva.parser.explain_statement import ExplainStatement + + +class Explain(evaql_parserVisitor): + def visitExplainStatement(self, ctx: evaql_parser.ExplainStatementContext): + explainable_stmt = self.visit(ctx.explainableStatement()) + return ExplainStatement(explainable_stmt) diff --git a/eva/parser/types.py b/eva/parser/types.py index 1e81058f6..cf12260ba 100644 --- a/eva/parser/types.py +++ b/eva/parser/types.py @@ -39,6 +39,7 @@ class StatementType(Enum): CREATE_MATERIALIZED_VIEW = (auto(),) SHOW = (auto(),) DROP_UDF = auto() + EXPLAIN = (auto(),) # add other types diff --git a/eva/planner/abstract_plan.py b/eva/planner/abstract_plan.py index d1ca92000..eb587f1de 100644 --- a/eva/planner/abstract_plan.py +++ b/eva/planner/abstract_plan.py @@ -76,6 +76,10 @@ def is_logical(self): def __hash__(self) -> int: return hash(self.opr_type) + @abstractmethod + def __str__(self) -> str: + return "AbstractPlan" + def __copy__(self): # deepcopy the children cls = self.__class__ diff --git a/eva/planner/create_mat_view_plan.py b/eva/planner/create_mat_view_plan.py index 56a02a34d..ab913bf52 100644 --- a/eva/planner/create_mat_view_plan.py +++ b/eva/planner/create_mat_view_plan.py @@ -53,6 +53,13 @@ def if_not_exists(self): def columns(self): return self._columns + def __str__(self): + return "CreateMaterializedViewPlan(view={}, \ + columns={}, \ + if_not_exists={})".format( + self._view, self._columns, self._if_not_exists + ) + def __hash__(self) -> int: return hash( (super().__hash__(), self.view, self.if_not_exists, tuple(self.columns)) diff --git a/eva/planner/create_plan.py b/eva/planner/create_plan.py index 5be04a57c..b8dacb83b 100644 --- a/eva/planner/create_plan.py +++ b/eva/planner/create_plan.py @@ -53,6 +53,13 @@ def if_not_exists(self): def column_list(self): return self._column_list + def __str__(self): + return "CreatePlan(table_ref={}, \ + column_list={}, \ + if_not_exists={})".format( + self._table_ref, self._column_list, self._if_not_exists + ) + def __hash__(self) -> int: return hash( ( diff --git a/eva/planner/create_udf_plan.py b/eva/planner/create_udf_plan.py index ead99645c..792413a91 100644 --- a/eva/planner/create_udf_plan.py +++ b/eva/planner/create_udf_plan.py @@ -83,6 +83,21 @@ def impl_path(self): def udf_type(self): return self._udf_type + def __str__(self): + return "CreateUDFPlan(name={}, \ + if_not_exists={}, \ + inputs={}, \ + outputs={}, \ + impl_file_path={}, \ + udf_type={})".format( + self._name, + self._if_not_exists, + self._inputs, + self._outputs, + self._impl_path, + self._udf_type, + ) + def __hash__(self) -> int: return hash( ( diff --git a/eva/planner/drop_plan.py b/eva/planner/drop_plan.py index 2a623579d..5220839ac 100644 --- a/eva/planner/drop_plan.py +++ b/eva/planner/drop_plan.py @@ -41,5 +41,10 @@ def table_refs(self): def if_exists(self): return self._if_exists + def __str__(self): + return "DropPlan(table_refs={}, if_exists={})".format( + self._table_refs, self._if_exists + ) + def __hash__(self) -> int: return hash((super().__hash__(), tuple(self._table_refs), self.if_exists)) diff --git a/eva/planner/drop_udf_plan.py b/eva/planner/drop_udf_plan.py index 6974524db..1a9f8c2ac 100644 --- a/eva/planner/drop_udf_plan.py +++ b/eva/planner/drop_udf_plan.py @@ -41,5 +41,8 @@ def name(self): def if_exists(self): return self._if_exists + def __str__(self): + return "DropUDFPlan(name={}, if_exists={})".format(self._name, self._if_exists) + def __hash__(self) -> int: return hash((super().__hash__(), self.name, self.if_exists)) diff --git a/eva/planner/explain_plan.py b/eva/planner/explain_plan.py new file mode 100644 index 000000000..3a45a5016 --- /dev/null +++ b/eva/planner/explain_plan.py @@ -0,0 +1,32 @@ +# coding=utf-8 +# Copyright 2018-2022 EVA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from eva.planner.abstract_plan import AbstractPlan +from eva.planner.types import PlanOprType + + +class ExplainPlan(AbstractPlan): + def __init__(self, explainable_plan: AbstractPlan): + super().__init__(PlanOprType.EXPLAIN) + self._explainable_plan = explainable_plan + + @property + def explainable_plan(self): + return self._explainable_plan + + def __str__(self) -> str: + return "ExplainPlan()" + + def __hash__(self) -> int: + return hash((super().__hash__(), self._explainable_plan)) diff --git a/eva/planner/function_scan_plan.py b/eva/planner/function_scan_plan.py index e7aec18d6..31fd0fc10 100644 --- a/eva/planner/function_scan_plan.py +++ b/eva/planner/function_scan_plan.py @@ -40,5 +40,9 @@ def func_expr(self): def do_unnest(self): return self._do_unnest + def __str__(self): + plan = "UnnestFunctionScanPlan" if self._do_unnest else "FunctionScanPlan" + return "{}(func_expr={})".format(plan, self._func_expr) + def __hash__(self) -> int: return hash((super().__hash__(), self.func_expr, self.do_unnest)) diff --git a/eva/planner/hash_join_build_plan.py b/eva/planner/hash_join_build_plan.py index 2f3be1632..812161850 100644 --- a/eva/planner/hash_join_build_plan.py +++ b/eva/planner/hash_join_build_plan.py @@ -35,5 +35,11 @@ def __init__(self, join_type: JoinType, build_keys: List[DataFrameColumn]): self.build_keys = build_keys super().__init__(PlanOprType.HASH_BUILD) + def __str__(self): + return "HashJoinBuildPlan(join_type={}, \ + build_keys={})".format( + self.join_type, self.build_keys + ) + def __hash__(self) -> int: return hash((super().__hash__(), self.join_type, tuple(self.build_keys or []))) diff --git a/eva/planner/hash_join_probe_plan.py b/eva/planner/hash_join_probe_plan.py index 757d6dae7..ccd15358a 100644 --- a/eva/planner/hash_join_probe_plan.py +++ b/eva/planner/hash_join_probe_plan.py @@ -44,6 +44,14 @@ def __init__( self.join_project = join_project super().__init__(PlanOprType.HASH_JOIN, join_type, join_predicate) + def __str__(self): + return "HashJoinProbePlan(join_type={}, \ + probe_keys={}, \ + join_predicate={}, \ + join_project={})".format( + self.join_type, self.probe_keys, self.join_predicate, self.join_project + ) + def __hash__(self) -> int: return hash( ( diff --git a/eva/planner/insert_plan.py b/eva/planner/insert_plan.py index 25ce0ebdc..5b6b8c56f 100644 --- a/eva/planner/insert_plan.py +++ b/eva/planner/insert_plan.py @@ -43,6 +43,13 @@ def __init__( self.columns_list = column_list self.value_list = value_list + def __str__(self): + return "InsertPlan(table_metainfo={}, \ + column_list={}, \ + value_list={})".format( + self.table_metainfo, self.columns_list, self.value_list + ) + def __hash__(self) -> int: return hash( ( diff --git a/eva/planner/lateral_join_plan.py b/eva/planner/lateral_join_plan.py index 7d8ce130c..5b8082b96 100644 --- a/eva/planner/lateral_join_plan.py +++ b/eva/planner/lateral_join_plan.py @@ -29,5 +29,11 @@ def __init__(self, join_predicate: AbstractExpression): PlanOprType.LATERAL_JOIN, JoinType.LATERAL_JOIN, join_predicate ) + def __str__(self): + return "LateralJoinPlan(join_project={}, \ + join_predicate={})".format( + self.join_project, self.join_predicate + ) + def __hash__(self) -> int: return hash((super().__hash__(), tuple(self.join_project))) diff --git a/eva/planner/limit_plan.py b/eva/planner/limit_plan.py index bbd71c709..c8496168b 100644 --- a/eva/planner/limit_plan.py +++ b/eva/planner/limit_plan.py @@ -40,5 +40,8 @@ def limit_expression(self): def limit_value(self): return self._limit_count.value + def __str__(self): + return "LimitPlan(limit_count={})".format(self._limit_count) + def __hash__(self) -> int: return hash((super().__hash__(), self._limit_count)) diff --git a/eva/planner/orderby_plan.py b/eva/planner/orderby_plan.py index 6576eb6a8..4d86c6134 100644 --- a/eva/planner/orderby_plan.py +++ b/eva/planner/orderby_plan.py @@ -42,5 +42,8 @@ def sort_types(self): def orderby_list(self): return self._orderby_list + def __str__(self): + return "OrderByPlan(orderby_list={})".format(self._orderby_list) + def __hash__(self) -> int: return hash((super().__hash__(), tuple(self._orderby_list))) diff --git a/eva/planner/pp_plan.py b/eva/planner/pp_plan.py index 476b79633..501171b59 100644 --- a/eva/planner/pp_plan.py +++ b/eva/planner/pp_plan.py @@ -29,3 +29,6 @@ class PPScanPlan(AbstractScan): def __init__(self, predicate: AbstractExpression): super().__init__(PlanOprType.PP_FILTER, predicate) + + def __str__(self): + return "AbstractPlan" diff --git a/eva/planner/predicate_plan.py b/eva/planner/predicate_plan.py index ee8cb907e..60e8edf36 100644 --- a/eva/planner/predicate_plan.py +++ b/eva/planner/predicate_plan.py @@ -28,5 +28,8 @@ def __init__(self, predicate: AbstractExpression): self.predicate = predicate super().__init__(PlanOprType.PREDICATE_FILTER) + def __str__(self): + return "PredicatePlan(predicate={})".format(self.predicate) + def __hash__(self) -> int: return hash((super().__hash__(), self.predicate)) diff --git a/eva/planner/project_plan.py b/eva/planner/project_plan.py index 2e71f7f15..f6d5a06d1 100644 --- a/eva/planner/project_plan.py +++ b/eva/planner/project_plan.py @@ -29,5 +29,8 @@ def __init__(self, target_list: List[AbstractExpression]): self.target_list = target_list super().__init__(PlanOprType.PROJECT) + def __str__(self): + return "ProjectPlan(target_list={})".format(self.target_list) + def __hash__(self) -> int: return hash((super().__hash__(), tuple(self.target_list))) diff --git a/eva/planner/rename_plan.py b/eva/planner/rename_plan.py index 6c0550d21..eaecea436 100644 --- a/eva/planner/rename_plan.py +++ b/eva/planner/rename_plan.py @@ -40,5 +40,11 @@ def old_table(self): def new_name(self): return self._new_name + def __str__(self): + return "RenamePlan(old_table={}, \ + new_name={})".format( + self._old_table, self._new_name + ) + def __hash__(self) -> int: return hash((super().__hash__(), self.old_table, self.new_name)) diff --git a/eva/planner/sample_plan.py b/eva/planner/sample_plan.py index 3818ab29f..ac28fffbe 100644 --- a/eva/planner/sample_plan.py +++ b/eva/planner/sample_plan.py @@ -36,5 +36,8 @@ def __init__(self, sample_freq: ConstantValueExpression): def sample_freq(self): return self._sample_freq + def __str__(self): + return "SamplePlan(sample_freq={})".format(self._sample_freq) + def __hash__(self) -> int: return hash((super().__hash__(), self.sample_freq)) diff --git a/eva/planner/seq_scan_plan.py b/eva/planner/seq_scan_plan.py index c956792e1..e7723adcb 100644 --- a/eva/planner/seq_scan_plan.py +++ b/eva/planner/seq_scan_plan.py @@ -45,5 +45,12 @@ def __init__( def columns(self): return self._columns + def __str__(self): + return "SeqScanPlan(predicate={}, \ + columns={}, \ + alias={})".format( + self._predicate, self._columns, self.alias + ) + def __hash__(self) -> int: return hash((super().__hash__(), tuple(self.columns or []), self.alias)) diff --git a/eva/planner/show_info_plan.py b/eva/planner/show_info_plan.py index 115bd00cb..9a04f6ea4 100644 --- a/eva/planner/show_info_plan.py +++ b/eva/planner/show_info_plan.py @@ -26,5 +26,8 @@ def __init__(self, show_type: ShowType): def show_type(self): return self._show_type + def __str__(self): + return "ShowUDFPlan" if self._show_type == ShowType.UDF else "ShowTablePlan" + def __hash__(self) -> int: return hash((super().__hash__(), self.show_type)) diff --git a/eva/planner/storage_plan.py b/eva/planner/storage_plan.py index 40b53eb77..4a255f9dc 100644 --- a/eva/planner/storage_plan.py +++ b/eva/planner/storage_plan.py @@ -93,6 +93,27 @@ def predicate(self): def sampling_rate(self): return self._sampling_rate + def __str__(self): + return "StoragePlan(video={}, \ + batch_mem_size={}, \ + skip_frames={}, \ + offset={}, \ + limit={}, \ + total_shards={}, \ + curr_shard={}, \ + predicate={}, \ + sampling_rate={})".format( + self._video, + self._batch_mem_size, + self._skip_frames, + self._offset, + self._limit, + self._total_shards, + self._curr_shard, + self._predicate, + self._sampling_rate, + ) + def __hash__(self) -> int: return hash( ( diff --git a/eva/planner/types.py b/eva/planner/types.py index 2222ca163..10725e85c 100644 --- a/eva/planner/types.py +++ b/eva/planner/types.py @@ -42,4 +42,5 @@ class PlanOprType(Enum): PROJECT = auto() SHOW_INFO = auto() DROP_UDF = auto() + EXPLAIN = auto() # add other types diff --git a/eva/planner/union_plan.py b/eva/planner/union_plan.py index 87857835c..6d80ac0a5 100644 --- a/eva/planner/union_plan.py +++ b/eva/planner/union_plan.py @@ -33,5 +33,8 @@ def __init__(self, all: bool): def all(self): return self._all + def __str__(self): + return "UnionAllPlan" if self._all else "UnionPlan" + def __hash__(self) -> int: return hash((super().__hash__(), self._all)) diff --git a/test/binder/test_statement_binder.py b/test/binder/test_statement_binder.py index 6d773efa2..c64ff8b7c 100644 --- a/test/binder/test_statement_binder.py +++ b/test/binder/test_statement_binder.py @@ -117,6 +117,13 @@ def test_bind_create_mat_statement(self): binder._bind_create_mat_statement(mat_statement) mock_binder.assert_called_with(mat_statement.query) + def test_bind_explain_statement(self): + with patch.object(StatementBinder, "bind") as mock_binder: + binder = StatementBinder(StatementBinderContext()) + stmt = MagicMock() + binder._bind_explain_statement(stmt) + mock_binder.assert_called_with(stmt.explainable_stmt) + @patch("eva.binder.statement_binder.CatalogManager") @patch("eva.binder.statement_binder.path_to_class") def test_bind_func_expr(self, mock_path_to_class, mock_catalog): diff --git a/test/integration_tests/test_explain_executor.py b/test/integration_tests/test_explain_executor.py new file mode 100644 index 000000000..c0f31fac4 --- /dev/null +++ b/test/integration_tests/test_explain_executor.py @@ -0,0 +1,55 @@ +# coding=utf-8 +# Copyright 2018-2022 EVA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from test.util import create_sample_video, create_table, file_remove, load_inbuilt_udfs + +from eva.catalog.catalog_manager import CatalogManager +from eva.server.command_handler import execute_query_fetch_all + +NUM_FRAMES = 10 + + +class ExplainExecutorTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + CatalogManager().reset() + create_sample_video(NUM_FRAMES) + load_query = """LOAD FILE 'dummy.avi' INTO MyVideo;""" + execute_query_fetch_all(load_query) + load_inbuilt_udfs() + cls.table1 = create_table("table1", 100, 3) + cls.table2 = create_table("table2", 500, 3) + cls.table3 = create_table("table3", 1000, 3) + + @classmethod + def tearDownClass(cls): + file_remove("dummy.avi") + drop_query = """DROP TABLE table1;""" + execute_query_fetch_all(drop_query) + drop_query = """DROP TABLE table2;""" + execute_query_fetch_all(drop_query) + drop_query = """DROP TABLE table3;""" + execute_query_fetch_all(drop_query) + + def test_explain_simple_select(self): + select_query = "EXPLAIN SELECT id, data FROM MyVideo" + batch = execute_query_fetch_all(select_query) + expected_output = """|__ SeqScanPlan\n |__ StoragePlan\n""" + self.assertEqual(batch.frames[0][0], expected_output) + + select_query = "EXPLAIN SELECT id, data FROM MyVideo JOIN LATERAL DummyObjectDetector(data) AS T ;" + batch = execute_query_fetch_all(select_query) + expected_output = """|__ ProjectPlan\n |__ LateralJoinPlan\n |__ SeqScanPlan\n |__ StoragePlan\n |__ FunctionScanPlan\n""" + self.assertEqual(batch.frames[0][0], expected_output) diff --git a/test/integration_tests/test_select_executor.py b/test/integration_tests/test_select_executor.py index 8a1f3370c..b28e1aaf7 100644 --- a/test/integration_tests/test_select_executor.py +++ b/test/integration_tests/test_select_executor.py @@ -49,6 +49,12 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): file_remove("dummy.avi") + drop_query = """DROP TABLE table1;""" + execute_query_fetch_all(drop_query) + drop_query = """DROP TABLE table2;""" + execute_query_fetch_all(drop_query) + drop_query = """DROP TABLE table3;""" + execute_query_fetch_all(drop_query) def test_sort_on_nonprojected_column(self): """This tests doing an order by on a column diff --git a/test/optimizer/rules/test_rules.py b/test/optimizer/rules/test_rules.py index e9139f5a0..e8e74f020 100644 --- a/test/optimizer/rules/test_rules.py +++ b/test/optimizer/rules/test_rules.py @@ -44,6 +44,7 @@ LogicalDerivedGetToPhysical, LogicalDropToPhysical, LogicalDropUDFToPhysical, + LogicalExplainToPhysical, LogicalFilterToPhysical, LogicalFunctionScanToPhysical, LogicalGetToSeqScan, @@ -135,6 +136,9 @@ def test_rules_promises_order(self): self.assertTrue( Promise.LOGICAL_DROP_TO_PHYSICAL < Promise.IMPLEMENTATION_DELIMETER ) + self.assertTrue( + Promise.LOGICAL_EXPLAIN_TO_PHYSICAL < Promise.IMPLEMENTATION_DELIMETER + ) def test_supported_rules(self): # adding/removing rules should update this test @@ -187,6 +191,7 @@ def test_supported_rules(self): LogicalFilterToPhysical(), LogicalProjectToPhysical(), LogicalShowToPhysical(), + LogicalExplainToPhysical(), ] ray_enabled = ConfigurationManager().get_value("experimental", "ray") diff --git a/test/optimizer/test_statement_to_opr_convertor.py b/test/optimizer/test_statement_to_opr_convertor.py index fe6e64a14..fb1e586d5 100644 --- a/test/optimizer/test_statement_to_opr_convertor.py +++ b/test/optimizer/test_statement_to_opr_convertor.py @@ -39,6 +39,7 @@ from eva.parser.create_udf_statement import CreateUDFStatement from eva.parser.drop_statement import DropTableStatement from eva.parser.drop_udf_statement import DropUDFStatement +from eva.parser.explain_statement import ExplainStatement from eva.parser.insert_statement import InsertTableStatement from eva.parser.rename_statement import RenameTableStatement from eva.parser.select_statement import SelectStatement @@ -198,6 +199,16 @@ def test_visit_should_call_rename(self): mock.assert_called_once() mock.assert_called_with(stmt) + def test_visit_should_call_explain(self): + stmt = MagicMock(spec=ExplainStatement) + convertor = StatementToPlanConvertor() + mock = MagicMock() + convertor.visit_explain = mock + + convertor.visit(stmt) + mock.assert_called_once() + mock.assert_called_once_with(stmt) + def test_visit_should_call_drop(self): stmt = MagicMock(spec=DropTableStatement) convertor = StatementToPlanConvertor() diff --git a/test/parser/test_parser.py b/test/parser/test_parser.py index 60fc6d067..0b5eea8ed 100644 --- a/test/parser/test_parser.py +++ b/test/parser/test_parser.py @@ -42,6 +42,51 @@ class ParserTests(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + def test_explain_dml_statement(self): + parser = Parser() + + explain_query = "EXPLAIN SELECT CLASS FROM TAIPAI;" + eva_statement_list = parser.parse(explain_query) + + # check explain stmt itself + self.assertIsInstance(eva_statement_list, list) + self.assertEqual(len(eva_statement_list), 1) + self.assertEqual(eva_statement_list[0].stmt_type, StatementType.EXPLAIN) + + # check inner stmt + inner_stmt = eva_statement_list[0].explainable_stmt + self.assertEqual(inner_stmt.stmt_type, StatementType.SELECT) + + # check inner stmt from + self.assertIsNotNone(inner_stmt.from_table) + self.assertIsInstance(inner_stmt.from_table, TableRef) + self.assertEqual(inner_stmt.from_table.table.table_name, "TAIPAI") + + def test_explain_ddl_statement(self): + parser = Parser() + + select_query = """SELECT id, FastRCNNObjectDetector(frame).labels FROM MyVideo + WHERE id<5; """ + explain_query = "EXPLAIN CREATE MATERIALIZED VIEW uadtrac_fastRCNN (id, labels) AS {}".format( + select_query + ) + + eva_statement_list = parser.parse(explain_query) + + # check explain stmt itself + self.assertIsInstance(eva_statement_list, list) + self.assertEqual(len(eva_statement_list), 1) + self.assertEqual(eva_statement_list[0].stmt_type, StatementType.EXPLAIN) + + # check inner stmt + inner_stmt = eva_statement_list[0].explainable_stmt + self.assertEqual(inner_stmt.stmt_type, StatementType.CREATE_MATERIALIZED_VIEW) + + # check inner stmt from + self.assertIsNotNone( + inner_stmt.view_ref, TableRef(TableInfo("uadetrac_fastRCNN")) + ) + def test_create_statement(self): parser = Parser()