Skip to content

Commit

Permalink
[iree-prof-tools] Support zone filtering with substrings. (#231)
Browse files Browse the repository at this point in the history
1) It is much faster then zone_regex for the big models with too many zones.
2) zone_regex and zone_substrs can exist at the same time.
3) when zone_regex is empty, regex match is disabled.
  • Loading branch information
protobird-git authored Apr 3, 2024
1 parent 372c114 commit 74e2d8e
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 22 deletions.
57 changes: 39 additions & 18 deletions iree-prof-tools/iree-prof-output-stdout.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
#include <fstream>
#include <iostream>
#include <map>
#include <optional>
#include <regex>
#include <string>
#include <vector>

#include "iree-prof-tools/iree-prof-output-utils.h"
Expand Down Expand Up @@ -104,6 +106,16 @@ absl::flat_hash_map<int, int64_t> GetThreadDurations(
return filtered_thread_durations;
}

bool HasSubstr(const absl::string_view str,
const std::vector<std::string>& substrs) {
for (const auto& s : substrs) {
if (str.find(s) != str.npos) {
return true;
}
}
return false;
}

template <typename T>
struct Stat {
absl::string_view name;
Expand Down Expand Up @@ -133,14 +145,18 @@ template <typename T>
std::vector<Stat<T>> GetZoneStatsFilteredAndSorted(
const tracy::Worker& worker,
const tracy::unordered_flat_map<int16_t, T>& zones,
const std::regex& zone_regex,
const std::vector<std::string>& zone_substrs,
const std::optional<std::regex>& zone_regex,
const absl::flat_hash_map<int, int64_t>& thread_durations) {
std::vector<Stat<T>> zone_stats_filtered;
absl::flat_hash_map<absl::string_view, int> zone_stats_filtered_index;
for (const auto& z : zones) {
for (const auto& t : z.second.zones) {
const char* zone_name = worker.GetZoneName(*t.Zone());
if (!std::regex_search(zone_name, zone_regex)) {
bool matched =
!zone_substrs.empty() && HasSubstr(zone_name, zone_substrs) ||
zone_regex && std::regex_search(zone_name, *zone_regex);
if (!matched) {
continue;
}

Expand Down Expand Up @@ -313,14 +329,15 @@ void OutputTable(const std::vector<std::vector<std::string>>& output_table,
}
}

// Output tabulated information of tracy zones filtered with |zone_regex| and
// |thread_regex|.
// Output tabulated information of tracy zones filtered with |zone_substrs|,
// |zone_regex| and |thread_regex|.
template <typename T>
void OutputToStream(const tracy::Worker& worker,
const tracy::unordered_flat_map<int16_t, T>& zones,
bool output_zone_stats,
bool output_per_op_stats,
const std::regex& zone_regex,
const std::vector<std::string>& zone_substrs,
const std::optional<std::regex>& zone_regex,
const std::regex& thread_regex,
absl::string_view header,
IreeProfOutputStdout::DurationUnit unit,
Expand All @@ -340,7 +357,7 @@ void OutputToStream(const tracy::Worker& worker,
}

auto zone_stats_filtered = GetZoneStatsFilteredAndSorted(
worker, zones, zone_regex, thread_durations);
worker, zones, zone_substrs, zone_regex, thread_durations);
if (output_zone_stats) {
os << header << " Zone Stats" << ": "
<< zone_stats_filtered.size() << "\n";
Expand Down Expand Up @@ -533,16 +550,20 @@ std::unique_ptr<IreeProfOutputStdout::OutputStream> CreateOutputStream(

} // namespace

IreeProfOutputStdout::IreeProfOutputStdout(bool output_stdout,
absl::string_view csv_file_path,
bool output_zone_stats,
bool output_per_op_stats,
const std::string& zone_regex,
const std::string& thread_regex,
DurationUnit unit)
IreeProfOutputStdout::IreeProfOutputStdout(
bool output_stdout,
absl::string_view csv_file_path,
bool output_zone_stats,
bool output_per_op_stats,
const std::vector<std::string>& zone_substrs,
const std::string& zone_regex,
const std::string& thread_regex,
DurationUnit unit)
: output_zone_stats_(output_zone_stats),
output_per_op_stats_(output_per_op_stats),
zone_regex_(zone_regex),
zone_substrs_(zone_substrs),
zone_regex_(zone_regex.empty() ? std::nullopt
: std::optional<std::regex>(zone_regex)),
thread_regex_(thread_regex),
unit_(unit),
os_(CreateOutputStream(output_stdout, csv_file_path, unit)) {
Expand All @@ -565,8 +586,8 @@ absl::Status IreeProfOutputStdout::Output(tracy::Worker& worker) {
os() << "[TRACY-CPU]" << " CPU Zones" << ": "
<< worker.GetZoneCount() << "\n";
OutputToStream(worker, worker.GetSourceLocationZones(), output_zone_stats_,
output_per_op_stats_, zone_regex_, thread_regex_,
"[TRACY-CPU]", unit_, os());
output_per_op_stats_, zone_substrs_, zone_regex_,
thread_regex_, "[TRACY-CPU]", unit_, os());
}

if (!worker.GetGpuData().empty()) {
Expand All @@ -580,8 +601,8 @@ absl::Status IreeProfOutputStdout::Output(tracy::Worker& worker) {
os() << "[TRACY-GPU]" << " GPU Zones" << ": "
<< worker.GetGpuZoneCount() << "\n";
OutputToStream(worker, worker.GetGpuSourceLocationZones(),
output_zone_stats_, output_per_op_stats_, zone_regex_,
thread_regex_, "[TRACY-GPU]", unit_, os());
output_zone_stats_, output_per_op_stats_, zone_substrs_,
zone_regex_, thread_regex_, "[TRACY-GPU]", unit_, os());
}

if (!worker.GetMemNameMap().empty()) {
Expand Down
5 changes: 4 additions & 1 deletion iree-prof-tools/iree-prof-output-stdout.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define IREE_PROF_OUTPUT_STDOUT_H_

#include <memory>
#include <optional>
#include <regex>
#include <string>
#include <vector>
Expand All @@ -33,6 +34,7 @@ class IreeProfOutputStdout : public IreeProfOutput {
absl::string_view csv_file_path,
bool output_zone_stats,
bool output_per_op_stats,
const std::vector<std::string>& zone_substrs,
const std::string& zone_regex,
const std::string& thread_regex,
DurationUnit unit);
Expand All @@ -52,7 +54,8 @@ class IreeProfOutputStdout : public IreeProfOutput {

const bool output_zone_stats_;
const bool output_per_op_stats_;
const std::regex zone_regex_;
const std::vector<std::string> zone_substrs_;
const std::optional<std::regex> zone_regex_;
const std::regex thread_regex_;
const DurationUnit unit_;
const std::unique_ptr<OutputStream> os_;
Expand Down
14 changes: 11 additions & 3 deletions iree-prof-tools/iree-prof-output.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,16 @@ ABSL_FLAG(bool, output_zones_stdout, true,
"Whether to print Tracy result of individual zones to stdout.");
ABSL_FLAG(bool, output_ops_stdout, true,
"Whether to print Tracy result of ML operation to stdout.");
ABSL_FLAG(std::string, zone_regex,
"iree_hal_buffer_map_(zero|fill|read|write|copy)|_dispatch_[0-9]+",
"ECMAScript regex of tracy zones to output to stdout.");
ABSL_FLAG(std::vector<std::string>, zone_substrs,
(std::vector<std::string>{"iree_hal_buffer_map_", "_dispatch_"}),
"Comma-separated substrings of tracy zones to output to stdout. If "
"empty, no zones will be matched with substrs. Note that zones can "
"still be matched with --zone_regex flag.");
ABSL_FLAG(std::string, zone_regex, "",
"ECMAScript regex of tracy zones to output to stdout. If empty, no "
"zones will be matched with regex. Note that it could be much slow "
"if the model has too many zones. Zones would be matched faster with "
"--zone_substrs flag.");
ABSL_FLAG(std::string, thread_regex, ".",
"ECMAScript regex of threads to output to stdout.");
ABSL_FLAG(std::string, duration_unit, "milliseconds",
Expand Down Expand Up @@ -74,6 +81,7 @@ void Output(tracy::Worker& worker) {
output_csv_file,
absl::GetFlag(FLAGS_output_zones_stdout),
absl::GetFlag(FLAGS_output_ops_stdout),
absl::GetFlag(FLAGS_zone_substrs),
absl::GetFlag(FLAGS_zone_regex),
absl::GetFlag(FLAGS_thread_regex),
ToUnit(absl::GetFlag(FLAGS_duration_unit)))
Expand Down

0 comments on commit 74e2d8e

Please sign in to comment.