Skip to content

Commit

Permalink
[SPARK-32590][SQL] Remove fullOutput from RowDataSourceScanExec
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Remove `fullOutput` from `RowDataSourceScanExec`

### Why are the changes needed?
`RowDataSourceScanExec` requires the full output instead of the scan output after column pruning. However, in v2 code path, we don't have the full output anymore so we just pass the pruned output. `RowDataSourceScanExec.fullOutput` is actually meaningless so we should remove it.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing tests

Closes #29415 from huaxingao/rm_full_output.

Authored-by: Huaxin Gao <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
  • Loading branch information
huaxingao authored and cloud-fan committed Aug 14, 2020
1 parent 339eec5 commit 14003d4
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,15 @@ trait DataSourceScanExec extends LeafExecNode {

/** Physical plan node for scanning data from a relation. */
case class RowDataSourceScanExec(
fullOutput: Seq[Attribute],
requiredColumnsIndex: Seq[Int],
output: Seq[Attribute],
requiredSchema: StructType,
filters: Set[Filter],
handledFilters: Set[Filter],
rdd: RDD[InternalRow],
@transient relation: BaseRelation,
tableIdentifier: Option[TableIdentifier])
extends DataSourceScanExec with InputRDDCodegen {

def output: Seq[Attribute] = requiredColumnsIndex.map(fullOutput)

override lazy val metrics =
Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))

Expand All @@ -136,14 +134,14 @@ case class RowDataSourceScanExec(
if (handledFilters.contains(filter)) s"*$filter" else s"$filter"
}
Map(
"ReadSchema" -> output.toStructType.catalogString,
"ReadSchema" -> requiredSchema.catalogString,
"PushedFilters" -> markedFilters.mkString("[", ", ", "]"))
}

// Don't care about `rdd` and `tableIdentifier` when canonicalizing.
override def doCanonicalize(): SparkPlan =
copy(
fullOutput.map(QueryPlan.normalizeExpressions(_, fullOutput)),
output.map(QueryPlan.normalizeExpressions(_, output)),
rdd = null,
tableIdentifier = None)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
case l @ LogicalRelation(baseRelation: TableScan, _, _, _) =>
RowDataSourceScanExec(
l.output,
l.output.indices,
l.output.toStructType,
Set.empty,
Set.empty,
toCatalystRDD(l, baseRelation.buildScan()),
Expand Down Expand Up @@ -379,8 +379,8 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
.map(relation.attributeMap)

val scan = RowDataSourceScanExec(
relation.output,
requestedColumns.map(relation.output.indexOf),
requestedColumns,
requestedColumns.toStructType,
pushedFilters.toSet,
handledFilters,
scanBuilder(requestedColumns, candidatePredicates, pushedFilters),
Expand All @@ -401,8 +401,8 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
(projectSet ++ filterSet -- handledSet).map(relation.attributeMap).toSeq

val scan = RowDataSourceScanExec(
relation.output,
requestedColumns.map(relation.output.indexOf),
requestedColumns,
requestedColumns.toStructType,
pushedFilters.toSet,
handledFilters,
scanBuilder(requestedColumns, candidatePredicates, pushedFilters),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
}
val rdd = v1Relation.buildScan()
val unsafeRowRDD = DataSourceStrategy.toCatalystRDD(v1Relation, output, rdd)
val originalOutputNames = relation.table.schema().map(_.name)
val requiredColumnsIndex = output.map(_.name).map(originalOutputNames.indexOf)
val dsScan = RowDataSourceScanExec(
output,
requiredColumnsIndex,
output.toStructType,
translated.toSet,
pushed.toSet,
unsafeRowRDD,
Expand Down

0 comments on commit 14003d4

Please sign in to comment.