From cd6fba2f6cf6e189ba26097c73b9dbe4f1d0edbc Mon Sep 17 00:00:00 2001 From: somu-imply <93540295+somu-imply@users.noreply.github.com> Date: Thu, 14 Apr 2022 21:59:17 -0700 Subject: [PATCH] Handling planning with alias for time for group by and order by (#12418) An outer scan query, that requires ordering on a column, should be considered an invalid query. --- .../sql/calcite/rel/DruidOuterQueryRel.java | 6 +- .../druid/sql/calcite/rel/DruidQuery.java | 27 +++++--- .../sql/calcite/CalciteJoinQueryTest.java | 58 ++++++++++++++++ .../druid/sql/calcite/CalciteQueryTest.java | 69 ++++++++++++++++++- 4 files changed, 148 insertions(+), 12 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidOuterQueryRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidOuterQueryRel.java index 6c0160760d2e..d9bd16343ef1 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidOuterQueryRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidOuterQueryRel.java @@ -32,8 +32,8 @@ import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.Druids; import org.apache.druid.query.QueryDataSource; -import org.apache.druid.query.TableDataSource; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -46,7 +46,9 @@ */ public class DruidOuterQueryRel extends DruidRel { - private static final TableDataSource DUMMY_DATA_SOURCE = new TableDataSource("__subquery__"); + private static final QueryDataSource DUMMY_DATA_SOURCE = new QueryDataSource( + Druids.newScanQueryBuilder().dataSource("__subquery__").eternityInterval().build() + ); private final PartialDruidQuery partialQuery; private RelNode sourceRel; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index 1fa63ff6d609..a0627dfa4f18 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -1199,14 +1199,25 @@ private ScanQuery toScanQuery(final QueryFeatureInspector queryFeatureInspector) orderByColumns = Collections.emptyList(); } - if (!queryFeatureInspector.feature(QueryFeature.SCAN_CAN_ORDER_BY_NON_TIME) - && (orderByColumns.size() > 1 - || orderByColumns.stream() - .anyMatch(orderBy -> !orderBy.getColumnName().equals(ColumnHolder.TIME_COLUMN_NAME)))) { - // Cannot handle this ordering. - // Scan cannot ORDER BY non-time columns. - plannerContext.setPlanningError("SQL query requires order by non-time column %s that is not supported.", orderByColumns); - return null; + if (!queryFeatureInspector.feature(QueryFeature.SCAN_CAN_ORDER_BY_NON_TIME) && !orderByColumns.isEmpty()) { + if (orderByColumns.size() > 1 || !ColumnHolder.TIME_COLUMN_NAME.equals(orderByColumns.get(0).getColumnName())) { + // Cannot handle this ordering. + // Scan cannot ORDER BY non-time columns. + plannerContext.setPlanningError( + "SQL query requires order by non-time column %s that is not supported.", + orderByColumns + ); + return null; + } + if (!dataSource.isConcrete()) { + // Cannot handle this ordering. + // Scan cannot ORDER BY non-time columns. + plannerContext.setPlanningError( + "SQL query requires order by non-time column on a datasource[%s], which is not supported.", + dataSource + ); + return null; + } } // Compute the list of columns to select, sorted and deduped. diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java index 2f58c6cd79bf..2d5b3b043dc5 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java @@ -104,6 +104,64 @@ @RunWith(JUnitParamsRunner.class) public class CalciteJoinQueryTest extends BaseCalciteQueryTest { + + @Test + public void testInnerJoinWithLimitAndAlias() throws Exception + { + minTopNThreshold = 1; + Map context = new HashMap<>(QUERY_CONTEXT_DEFAULT); + context.put(PlannerConfig.CTX_KEY_USE_APPROXIMATE_TOPN, false); + testQuery( + "select t1.b1 from (select __time as b1 from numfoo group by 1 order by 1) as t1 inner join (\n" + + " select __time as b2 from foo group by 1 order by 1\n" + + ") as t2 on t1.b1 = t2.b2 ", + context, // turn on exact topN + ImmutableList.of( + newScanQueryBuilder() + .intervals(querySegmentSpec(Filtration.eternity())) + .dataSource( + JoinDataSource.create( + new QueryDataSource( + GroupByQuery.builder() + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDataSource(new TableDataSource("numfoo")) + .setDimensions(new DefaultDimensionSpec("__time", "_d0", ColumnType.LONG)) + .setContext(context) + .build() + ), + new QueryDataSource( + GroupByQuery.builder() + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDataSource(new TableDataSource("foo")) + .setDimensions(new DefaultDimensionSpec("__time", "d0", ColumnType.LONG)) + .setContext(context) + .build() + ), + "j0.", + "(\"_d0\" == \"j0.d0\")", + JoinType.INNER, + null, + ExprMacroTable.nil() + ) + ) + .columns("_d0") + .context(context) + .build() + ), + ImmutableList.of( + new Object[]{946684800000L}, + new Object[]{946771200000L}, + new Object[]{946857600000L}, + new Object[]{978307200000L}, + new Object[]{978393600000L}, + new Object[]{978480000000L} + ) + ); + } + + @Test public void testExactTopNOnInnerJoinWithLimit() throws Exception { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index ad68fd8e4531..333591d981ce 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -6719,6 +6719,12 @@ public void testMinMaxAvgDailyCountWithLimit() throws Exception ) ) .setInterval(querySegmentSpec(Filtration.eternity())) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of(), + 1 + ) + ) .setGranularity(Granularities.ALL) .setAggregatorSpecs( useDefault @@ -6752,7 +6758,7 @@ public void testMinMaxAvgDailyCountWithLimit() throws Exception new FieldAccessPostAggregator(null, "_a2:count") ) ), - expressionPostAgg("p0", "timestamp_extract(\"_a3\",'EPOCH','UTC')") + expressionPostAgg("s0", "timestamp_extract(\"_a3\",'EPOCH','UTC')") ) ) .setContext(QUERY_CONTEXT_DEFAULT) @@ -7002,7 +7008,7 @@ public void testExplainExactCountDistinctOfSemiJoinResult() throws Exception + " )\n" + ")"; final String legacyExplanation = - "DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}], signature=[{a0:LONG}])\n" + "DruidOuterQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"query\",\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"list\",\"batchSize\":20480,\"filter\":null,\"context\":null,\"descending\":false,\"granularity\":{\"type\":\"all\"}}},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[],\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false}], signature=[{a0:LONG}])\n" + " DruidJoinQueryRel(condition=[=(SUBSTRING($3, 1, 1), $8)], joinType=[inner], query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"__join__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false}], signature=[{d0:STRING}])\n" + " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"filter\":null,\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, cnt:LONG, dim1:STRING, dim2:STRING, dim3:STRING, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX}])\n" + " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":null,\"extractionFn\":null}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false}], signature=[{d0:STRING}])\n"; @@ -11100,6 +11106,65 @@ public void testUnicodeFilterAndGroupBy() throws Exception ); } + + @Test + public void testOrderByAlongWithAliasOrderByTimeGroupByMulti() throws Exception + { + testQuery( + "select __time as bug, dim2 from druid.foo group by 1, 2 order by 1 limit 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("__time", "d0", ColumnType.LONG), + new DefaultDimensionSpec("dim2", "d1", ColumnType.STRING) + ) + ) + .setLimitSpec( + new DefaultLimitSpec( + Collections.singletonList( + new OrderByColumnSpec("d0", Direction.ASCENDING, StringComparators.NUMERIC) + ), + 1 + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{946684800000L, "a"} + ) + ); + } + + + @Test + public void testOrderByAlongWithAliasOrderByTimeGroupByOneCol() throws Exception + { + testQuery( + "select __time as bug from druid.foo group by 1 order by 1 limit 1", + ImmutableList.of( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .dimension( + new DefaultDimensionSpec("__time", "d0", ColumnType.LONG) + ) + .threshold(1) + .metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{946684800000L} + ) + ); + } + @Test public void testProjectAfterSort() throws Exception {