apache · cryptoe · Jun 15, 2023 · Jun 5, 2023 · Jun 6, 2023 · Jun 9, 2023
diff --git a/...nsions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/...nsions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1393,67 +1393,69 @@ private Yielder<Object[]> getFinalResultsYielder(
 
       return Yielders.each(
           Sequences.concat(
-              StreamSupport.stream(queryKernel.getResultPartitionsForStage(finalStageId).spliterator(), false)
-                           .map(
-                               readablePartition -> {
-                                 try {
-                                   return new FrameChannelSequence(
-                                       inputChannels.openChannel(
-                                           new StagePartition(
-                                               queryKernel.getStageDefinition(finalStageId).getId(),
-                                               readablePartition.getPartitionNumber()
-                                           )
-                                       )
-                                   );
-                                 }
-                                 catch (IOException e) {
-                                   throw new RuntimeException(e);
-                                 }
-                               }
-                           ).collect(Collectors.toList())
-          ).flatMap(
-              frame -> {
-                final Cursor cursor = FrameProcessors.makeCursor(
-                    frame,
-                    queryKernel.getStageDefinition(finalStageId).getFrameReader()
-                );
-
-                final ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory();
-                final ColumnMappings columnMappings = task.getQuerySpec().getColumnMappings();
-                @SuppressWarnings("rawtypes")
-                final List<ColumnValueSelector> selectors =
-                    columnMappings.getMappings()
-                                  .stream()
-                                  .map(
-                                      mapping ->
-                                          columnSelectorFactory.makeColumnValueSelector(mapping.getQueryColumn())
-                                  ).collect(Collectors.toList());
-
-                final List<SqlTypeName> sqlTypeNames = task.getSqlTypeNames();
-                final List<Object[]> retVal = new ArrayList<>();
-                while (!cursor.isDone()) {
-                  final Object[] row = new Object[columnMappings.size()];
-                  for (int i = 0; i < row.length; i++) {
-                    final Object value = selectors.get(i).getObject();
-                    if (sqlTypeNames == null || task.getSqlResultsContext() == null) {
-                      // SQL type unknown, or no SQL results context: pass-through as is.
-                      row[i] = value;
-                    } else {
-                      row[i] = SqlResults.coerce(
-                          context.jsonMapper(),
-                          task.getSqlResultsContext(),
-                          value,
-                          sqlTypeNames.get(i)
-                      );
-                    }
-                  }
-                  retVal.add(row);
-                  cursor.advance();
-                }
-
-                return Sequences.simple(retVal);
-              }
-          ).withBaggage(resultReaderExec::shutdownNow)
+                       StreamSupport.stream(queryKernel.getResultPartitionsForStage(finalStageId).spliterator(), false)
+                                    .map(
+                                        readablePartition -> {
+                                          try {
+                                            return new FrameChannelSequence(
+                                                inputChannels.openChannel(
+                                                    new StagePartition(
+                                                        queryKernel.getStageDefinition(finalStageId).getId(),
+                                                        readablePartition.getPartitionNumber()
+                                                    )
+                                                )
+                                            );
+                                          }
+                                          catch (IOException e) {
+                                            throw new RuntimeException(e);
+                                          }
+                                        }
+                                    ).collect(Collectors.toList())
+                   ).flatMap(
+                       frame -> {
+                         final Cursor cursor = FrameProcessors.makeCursor(
+                             frame,
+                             queryKernel.getStageDefinition(finalStageId).getFrameReader()
+                         );
+
+                         final ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory();
+                         final ColumnMappings columnMappings = task.getQuerySpec().getColumnMappings();
+                         @SuppressWarnings("rawtypes")
+                         final List<ColumnValueSelector> selectors =
+                             columnMappings.getMappings()
+                                           .stream()
+                                           .map(
+                                               mapping ->
+                                                   columnSelectorFactory.makeColumnValueSelector(mapping.getQueryColumn())
+                                           ).collect(Collectors.toList());
+
+                         final List<SqlTypeName> sqlTypeNames = task.getSqlTypeNames();
+                         final List<Object[]> retVal = new ArrayList<>();
+                         while (!cursor.isDone()) {
+                           final Object[] row = new Object[columnMappings.size()];
+                           for (int i = 0; i < row.length; i++) {
+                             final Object value = selectors.get(i).getObject();
+                             if (sqlTypeNames == null || task.getSqlResultsContext() == null) {
+                               // SQL type unknown, or no SQL results context: pass-through as is.
+                               row[i] = value;
+                             } else {
+                               row[i] = SqlResults.coerce(
+                                   context.jsonMapper(),
+                                   task.getSqlResultsContext(),
+                                   value,
+                                   sqlTypeNames.get(i)
+                               );
+                             }
+                           }
+                           retVal.add(row);
+                           cursor.advance();
+                         }
+
+                         return Sequences.simple(retVal);
+                       }
+                   )
+                   .limit(Limits.MAX_SELECT_RESULT_ROWS)
+                   .withBaggage(resultReaderExec::shutdownNow)
       );
     } else {
       return null;

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/Limits.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/Limits.java
@@ -90,4 +90,11 @@ public class Limits
    * {@link ClusterStatisticsMergeMode#SEQUENTIAL} mode is chosen.
    */
   public static final long MAX_WORKERS_FOR_PARALLEL_MERGE = 100;
+
+  /**
+   * Max number of rows in the query reports of the SELECT queries run by MSQ. This ensures that the reports donot blow
+   * up for queries operating on larger datasets. The full result of the select query should be available once the
+   * MSQ is able to run async queries
+   */
+  public static final long MAX_SELECT_RESULT_ROWS = 3_000;
 }
diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java
@@ -22,13 +22,16 @@
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.data.input.impl.CsvInputFormat;
 import org.apache.druid.data.input.impl.JsonInputFormat;
 import org.apache.druid.data.input.impl.LocalInputSource;
 import org.apache.druid.frame.util.DurableStorageUtils;
 import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.granularity.Granularities;
+import org.apache.druid.math.expr.ExprEval;
 import org.apache.druid.math.expr.ExprMacroTable;
 import org.apache.druid.msq.indexing.MSQSpec;
 import org.apache.druid.msq.indexing.MSQTuningConfig;
@@ -1784,6 +1787,66 @@ public void testGroupByOnFooWithDurableStoragePathAssertions() throws IOExceptio
     }
   }
 
+  @Test
+  public void testSelectRowsGetTruncatedInReports() throws IOException
+  {
+    RowSignature dummyRowSignature = RowSignature.builder().add("timestamp", ColumnType.LONG).build();
+
+    final int numFiles = 200;
+
+    final File toRead = MSQTestFileUtils.getResourceAsTemporaryFile(temporaryFolder, this, "/wikipedia-sampled.json");
+    final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(toRead.getAbsolutePath());
+
+    String externalFiles = String.join(", ", Collections.nCopies(numFiles, toReadFileNameAsJson));
+
+    List<Object[]> result = new ArrayList<>();
+    for (int i = 0; i < Limits.MAX_SELECT_RESULT_ROWS; ++i) {
+      result.add(new Object[]{1});
+    }
+
+    testSelectQuery()
+        .setSql(StringUtils.format(
+            " SELECT 1 as \"timestamp\"\n"
+            + "FROM TABLE(\n"
+            + "  EXTERN(\n"
+            + "    '{ \"files\": [%s],\"type\":\"local\"}',\n"
+            + "    '{\"type\": \"csv\", \"hasHeaderRow\": true}',\n"
+            + "    '[{\"name\": \"timestamp\", \"type\": \"string\"}]'\n"
+            + "  )\n"
+            + ")",
+            externalFiles
+        ))
+        .setExpectedRowSignature(dummyRowSignature)
+        .setExpectedMSQSpec(
+            MSQSpec
+                .builder()
+                .query(newScanQueryBuilder()
+                           .dataSource(new ExternalDataSource(
+                               new LocalInputSource(null, null, Collections.nCopies(numFiles, toRead)),
+                               new CsvInputFormat(null, null, null, true, 0),
+                               RowSignature.builder().add("timestamp", ColumnType.STRING).build()
+                           ))
+                           .intervals(querySegmentSpec(Filtration.eternity()))
+                           .columns("v0")
+                           .virtualColumns(new ExpressionVirtualColumn("v0", ExprEval.of(1L).toExpr(), ColumnType.LONG))
+                           .context(defaultScanQueryContext(
+                               context,
+                               RowSignature.builder().add("v0", ColumnType.LONG).build()
+                           ))
+                           .build()
+                )
+                .columnMappings(new ColumnMappings(
+                    ImmutableList.of(
+                        new ColumnMapping("v0", "timestamp")
+                    )
+                ))
+                .tuningConfig(MSQTuningConfig.defaultConfig())
+                .build())
+        .setQueryContext(context)
+        .setExpectedResultRows(result)
+        .verifyResults();
+  }
+
   @Test
   public void testMultiValueStringWithIncorrectType() throws IOException
   {

diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java
@@ -1317,7 +1317,7 @@ public void verifyResults()
     {
       Preconditions.checkArgument(expectedResultRows != null, "Result rows cannot be null");
       Preconditions.checkArgument(expectedRowSignature != null, "Row signature cannot be null");
-      Preconditions.checkArgument(expectedMSQSpec != null, "MultiStageQuery Query spec not ");
+      Preconditions.checkArgument(expectedMSQSpec != null, "MultiStageQuery Query spec cannot be null ");
       Pair<MSQSpec, Pair<List<MSQResultsReport.ColumnAndType>, List<Object[]>>> specAndResults = runQueryWithResult();
 
       if (specAndResults == null) { // A fault was expected and the assertion has been done in the runQueryWithResult