Skip to content

Commit

Permalink
Update null count in the column stats for UNION stats estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
shahidki31 committed May 11, 2021
1 parent b59d5ab commit 02a4b87
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ case class FilterEstimation(plan: Filter) extends Logging {
attr: Attribute,
isNull: Boolean,
update: Boolean): Option[Double] = {
if (!colStatsMap.contains(attr) || !colStatsMap(attr).hasCountStats) {
if (!colStatsMap.contains(attr) || colStatsMap(attr).nullCount.isEmpty) {
logDebug("[CBO] No statistics for " + attr)
return None
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,20 @@ object UnionEstimation {
val outputAttrStats = new ArrayBuffer[(Attribute, ColumnStat)]()
attrToComputeMinMaxStats.foreach {
case (attrs, outputIndex) =>
var nullCount: Option[BigInt] = None
val dataType = unionOutput(outputIndex).dataType
val statComparator = createStatComparator(dataType)
val minMaxValue = attrs.zipWithIndex.foldLeft[(Option[Any], Option[Any])]((None, None)) {
case ((minVal, maxVal), (attr, childIndex)) =>
val colStat = union.children(childIndex).stats.attributeStats(attr)
// Update null count
nullCount = if (nullCount.isDefined && colStat.nullCount.isDefined) {
Some(nullCount.get + colStat.nullCount.get)
} else if (colStat.nullCount.isDefined) {
colStat.nullCount
} else {
nullCount
}
val min = if (minVal.isEmpty || statComparator(colStat.min.get, minVal.get)) {
colStat.min
} else {
Expand All @@ -103,10 +112,11 @@ object UnionEstimation {
}
(min, max)
}
val newStat = ColumnStat(min = minMaxValue._1, max = minMaxValue._2)
val newStat = ColumnStat(min = minMaxValue._1, max = minMaxValue._2,
nullCount = nullCount)
outputAttrStats += unionOutput(outputIndex) -> newStat
}
AttributeMap(outputAttrStats.toSeq)
AttributeMap(outputAttrStats)
} else {
AttributeMap.empty[ColumnStat]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,14 @@ class UnionEstimationSuite extends StatsEstimationTestBase {
distinctCount = Some(2),
min = Some(1),
max = Some(4),
nullCount = Some(0),
nullCount = Some(1),
avgLen = Some(4),
maxLen = Some(4)),
attrDouble -> ColumnStat(
distinctCount = Some(2),
min = Some(5.0),
max = Some(4.0),
nullCount = Some(0),
nullCount = Some(2),
avgLen = Some(4),
maxLen = Some(4)),
attrShort -> ColumnStat(min = Some(s1), max = Some(s2)),
Expand All @@ -96,14 +96,14 @@ class UnionEstimationSuite extends StatsEstimationTestBase {
distinctCount = Some(2),
min = Some(3),
max = Some(6),
nullCount = Some(0),
nullCount = Some(1),
avgLen = Some(8),
maxLen = Some(8)),
AttributeReference("cdouble1", DoubleType)() -> ColumnStat(
distinctCount = Some(2),
min = Some(2.0),
max = Some(7.0),
nullCount = Some(0),
nullCount = Some(2),
avgLen = Some(8),
maxLen = Some(8)),
AttributeReference("cshort1", ShortType)() -> ColumnStat(min = Some(s3), max = Some(s4)),
Expand Down Expand Up @@ -139,8 +139,8 @@ class UnionEstimationSuite extends StatsEstimationTestBase {
rowCount = Some(4),
attributeStats = AttributeMap(
Seq(
attrInt -> ColumnStat(min = Some(1), max = Some(6)),
attrDouble -> ColumnStat(min = Some(2.0), max = Some(7.0)),
attrInt -> ColumnStat(min = Some(1), max = Some(6), nullCount = Some(2)),
attrDouble -> ColumnStat(min = Some(2.0), max = Some(7.0), nullCount = Some(4)),
attrShort -> ColumnStat(min = Some(s1), max = Some(s4)),
attrLong -> ColumnStat(min = Some(1L), max = Some(6L)),
attrByte -> ColumnStat(min = Some(b1), max = Some(b4)),
Expand Down

0 comments on commit 02a4b87

Please sign in to comment.