Skip to content

Commit

Permalink
Add processedStrides and processedSplits metrics (facebookincubator#264)
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo authored and zhejiangxiaomai committed May 26, 2023
1 parent 5eac9b8 commit c15991b
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 4 deletions.
1 change: 1 addition & 0 deletions velox/connectors/hive/HiveConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ void HiveDataSource::addSplit(std::shared_ptr<ConnectorSplit> split) {
runtimeStats_.skippedSplitBytes += split_->length;
return;
}
++runtimeStats_.processedSplits;

auto& fileType = reader_->rowType();

Expand Down
10 changes: 9 additions & 1 deletion velox/dwio/common/Statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -517,18 +517,26 @@ struct RuntimeStatistics {
// Number of splits skipped based on statistics.
int64_t skippedSplits{0};

// Number of splits processed based on statistics.
int64_t processedSplits{0};

// Total bytes in splits skipped based on statistics.
int64_t skippedSplitBytes{0};

// Number of strides (row groups) skipped based on statistics.
int64_t skippedStrides{0};

// Number of strides (row groups) processed based on statistics.
int64_t processedStrides{0};

std::unordered_map<std::string, RuntimeCounter> toMap() {
return {
{"skippedSplits", RuntimeCounter(skippedSplits)},
{"processedSplits", RuntimeCounter(processedSplits)},
{"skippedSplitBytes",
RuntimeCounter(skippedSplitBytes, RuntimeCounter::Unit::kBytes)},
{"skippedStrides", RuntimeCounter(skippedStrides)}};
{"skippedStrides", RuntimeCounter(skippedStrides)},
{"processedStrides", RuntimeCounter(processedStrides)}};
}
};

Expand Down
1 change: 1 addition & 0 deletions velox/dwio/parquet/reader/ParquetReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,7 @@ bool ParquetRowReader::advanceToNextRowGroup() {
void ParquetRowReader::updateRuntimeStats(
dwio::common::RuntimeStatistics& stats) const {
stats.skippedStrides += skippedRowGroups_;
stats.processedStrides += rowGroupIds_.size();
}

void ParquetRowReader::resetFilterCaches() {
Expand Down
3 changes: 0 additions & 3 deletions velox/dwio/parquet/reader/TimestampColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ class TimestampColumnReader : public IntegerColumnReader {
common::ScanSpec& scanSpec)
: IntegerColumnReader(nodeType, nodeType, params, scanSpec) {}

static constexpr int64_t JULIAN_TO_UNIX_EPOCH_DAYS = 2440588LL;
static constexpr int64_t SECONDS_PER_DAY = 86400LL;

void read(
vector_size_t offset,
RowSet rows,
Expand Down
4 changes: 4 additions & 0 deletions velox/exec/tests/PrintPlanWithStatsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ TEST_F(PrintPlanWithStatsTest, innerJoinWithTableScan) {
{" prefetchBytes [ ]* sum: .+, count: 1, min: .+, max: .+"},
{" preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
true},
{" processedSplits [ ]* sum: 1, count: 1, min: 1, max: 1"},
{" processedStrides [ ]* sum: 0, count: 1, min: 0, max: 0"},
{" queryThreadIoLatency[ ]* sum: .+, count: .+ min: .+, max: .+"},
{" ramReadBytes [ ]* sum: .+, count: 1, min: .+, max: .+"},
{" readyPreloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
Expand Down Expand Up @@ -284,6 +286,8 @@ TEST_F(PrintPlanWithStatsTest, partialAggregateWithTableScan) {
{" overreadBytes[ ]* sum: 0B, count: 1, min: 0B, max: 0B"},

{" prefetchBytes [ ]* sum: .+, count: 1, min: .+, max: .+"},
{" processedSplits [ ]* sum: 1, count: 1, min: 1, max: 1"},
{" processedStrides [ ]* sum: 0, count: 1, min: 0, max: 0"},
{" preloadedSplits[ ]+sum: .+, count: .+, min: .+, max: .+",
true},
{" queryThreadIoLatency[ ]* sum: .+, count: .+ min: .+, max: .+"},
Expand Down

0 comments on commit c15991b

Please sign in to comment.