Skip to content

Commit

Permalink
Add perf reporting for ccl async mode (#16658)
Browse files Browse the repository at this point in the history
### Ticket
#16648 

### Problem description
Need perf reporting for async all gather

### What's changed
<img width="1292" alt="Screenshot 2025-01-24 at 8 42 29 PM"
src="https://github.com/user-attachments/assets/33c64cf1-9f31-4567-89ed-813acc16e49d"
/>


### Checklist
- [ ] Post commit CI passes
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] **(For models and ops writers)** Full [new
models](https://github.com/tenstorrent/tt-metal/actions/workflows/full-new-models-suite.yaml)
tests passes
- [ ] New/Existing tests provide coverage for changes
  • Loading branch information
Aswinmcw authored and williamlyTT committed Jan 30, 2025
1 parent 64f111d commit 826e408
Show file tree
Hide file tree
Showing 5 changed files with 493 additions and 16 deletions.
186 changes: 186 additions & 0 deletions tests/ttnn/unit_tests/operations/ccl/perf/async_perf_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

import pandas as pd
import os
import re
import time
import shutil


def perf_report(file_path):
df = pd.read_csv(file_path)

df = df[df["OP TO OP LATENCY [ns]"] != 0]
df = df[df["METAL TRACE ID"].notna() & (df["METAL TRACE ID"] != "")]

def remove_keys_from_attributes(attributes):
attributes = attributes.replace(";", ",").replace("'", '"')

keys_to_remove = ["receiver_device_id", "ring_index", "sender_device_id"]

try:
attributes_dict = eval(attributes)

attributes_dict["topology"] = attributes_dict.get("topology", "").split("::")[-1]

if "ring_size" not in attributes_dict:
raise KeyError("Missing 'ring_size' attribute")

attributes_dict["n_chips"] = int(attributes_dict["ring_size"])

for key in keys_to_remove:
if key in attributes_dict:
del attributes_dict[key]

modified_attributes = str(attributes_dict).replace(",", ";").replace('"', "'")
return modified_attributes
except Exception as e:
print(f"Error processing attributes: {e}")
return attributes

df["ATTRIBUTES"] = df["ATTRIBUTES"].apply(remove_keys_from_attributes)

def safe_parse_attributes(attributes):
attributes = attributes.replace(";", ",")

try:
attr_dict = eval(attributes)
return attr_dict
except Exception as e:
print(f"Error processing attributes: {e}")
return {}

df["topology"] = df["ATTRIBUTES"].apply(
lambda x: safe_parse_attributes(x).get("topology", "") if isinstance(safe_parse_attributes(x), dict) else ""
)

df["dim"] = df["ATTRIBUTES"].apply(
lambda x: safe_parse_attributes(x).get("dim", safe_parse_attributes(x).get("scatter_dim", ""))
if isinstance(safe_parse_attributes(x), dict)
else ""
)

df["num_links"] = df["ATTRIBUTES"].apply(
lambda x: safe_parse_attributes(x).get("num_links", "") if isinstance(safe_parse_attributes(x), dict) else ""
)

df["output_mem_config"] = df["ATTRIBUTES"].apply(
lambda x: ", ".join(
[
match.split("::")[1]
for match in re.findall(
r"(BufferType::\w+|TensorMemoryLayout::\w+)",
str(safe_parse_attributes(x).get("output_mem_config", "")),
)
]
)
if isinstance(safe_parse_attributes(x), dict)
else ""
)

df["n_chips"] = df["ATTRIBUTES"].apply(
lambda x: int(safe_parse_attributes(x).get("ring_size", ""))
if isinstance(safe_parse_attributes(x), dict)
else 0
)

group_columns = [
"ATTRIBUTES",
"INPUT_0_W",
"INPUT_0_Z",
"INPUT_0_Y",
"INPUT_0_X",
"INPUT_0_LAYOUT",
"INPUT_0_DATATYPE",
"OUTPUT_0_W",
"OUTPUT_0_Z",
"OUTPUT_0_Y",
"OUTPUT_0_X",
"OUTPUT_0_LAYOUT",
"OUTPUT_0_DATATYPE",
]

grouped = df.groupby(group_columns)

numeric_columns = [
"HOST DURATION [ns]",
"Cycles Count",
"OP TO OP LATENCY [ns]",
"DEVICE FW DURATION [ns]",
"DEVICE KERNEL DURATION [ns]",
]

averages_data = []

for i, (group, group_df) in enumerate(grouped, start=1):
group_df = group_df.iloc[2 * group_df["n_chips"].iloc[0] :]

group_df = group_df.sort_values(by=["DEVICE ID", "OP TO OP LATENCY [ns]"]).reset_index(drop=True)
group_df = group_df.groupby("DEVICE ID").apply(lambda x: x.iloc[0:-1]).reset_index(drop=True)

group_df.rename(columns={"INPUT_0_LAYOUT": "Layout", "INPUT_0_DATATYPE": "Data Type"}, inplace=True)

group_df["Input Shape"] = group_df.apply(
lambda row: f"[{int(row['INPUT_0_W'])}, {int(row['INPUT_0_Z'])}, {int(row['INPUT_0_Y'])}, {int(row['INPUT_0_X'])}]",
axis=1,
)
group_df["Output Shape"] = group_df.apply(
lambda row: f"[{int(row['OUTPUT_0_W'])}, {int(row['OUTPUT_0_Z'])}, {int(row['OUTPUT_0_Y'])}, {int(row['OUTPUT_0_X'])}]",
axis=1,
)
group_df["Cycles Count"] = group_df["DEVICE FW END CYCLE"] - group_df["DEVICE FW START CYCLE"]

group_file_path = file_path.replace(".csv", f"_group_{i}.csv")

group_df.to_csv(group_file_path, index=False)

group_data = {
"Input Shape": group_df["Input Shape"].iloc[0],
"OP CODE": group_df["OP CODE"].iloc[0],
"dim": group_df["dim"].iloc[0] if "dim" in group_df else "",
"num_links": group_df["num_links"].iloc[0] if "num_links" in group_df else "",
"output_mem_config": group_df["output_mem_config"].iloc[0] if "output_mem_config" in group_df else "",
"topology": group_df["topology"].iloc[0],
"Layout": group_df["Layout"].iloc[0] if "Layout" in group_df else "",
"Data Type": group_df["Data Type"].iloc[0] if "Data Type" in group_df else "",
}

for column in numeric_columns:
min_val = round(group_df[column].min(), 2)
largest_vals = group_df[column].nlargest(3)
max_val = round(largest_vals.iloc[-1], 2)
if min_val == max_val:
avg_val = min_val
else:
avg_val = round(group_df[column][~group_df[column].isin(largest_vals.head(2))].mean(), 2)

group_data[column] = f"{min_val} - {avg_val} - {max_val}"

averages_data.append(group_data)

averages_df = pd.DataFrame(averages_data)
op_code = averages_df.iloc[0]["OP CODE"]

today = time.strftime("%Y_%m_%d")
if op_code == "AllGather":
ccl_perf_file_path = f"CCL_all_gather_Perf_{today}.csv"
elif op_code == "AllGatherAsyn":
ccl_perf_file_path = f"CCL_all_gather_async_Perf_{today}.csv"
elif op_code == "ReduceScatter":
ccl_perf_file_path = f"CCL_reduce_scatter_Perf_{today}.csv"
else:
ccl_perf_file_path = f"CCL_Perf_{today}.csv"

shutil.copy(file_path, ccl_perf_file_path)

logs_dir = "generated/profiler/.logs"
os.makedirs(logs_dir, exist_ok=True)
shutil.copy(ccl_perf_file_path, logs_dir)

averages_df.to_csv(ccl_perf_file_path, index=False)

print(f"CCL Perf report CSV saved to: {ccl_perf_file_path}")

return averages_df
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/bin/sh
MODULE_DIR="tests/ttnn/unit_tests/operations/ccl/perf"

# Defaults
DEBUG=false
TARGET="n300"

# Function to display help
show_help() {
echo "Usage: ./tests/ttnn/unit_tests/operations/ccl/perf/run_profile.sh [OPTIONS]"
echo
echo "Options:"
echo " -d, --debug Enable debug mode to show real-time output."
echo " -t, --target Specify the target configuration (t3000 or n300 or tg). Default is n300."
echo " -h, --help Display this help message."
echo
echo "Example:"
echo " ./tests/ttnn/unit_tests/operations/ccl/perf/run_profile.sh --debug --target n300"
echo " ./tests/ttnn/unit_tests/operations/ccl/perf/run_profile.sh -h"
}

# Parse command-line arguments
while [ $# -gt 0 ]; do
case "$1" in
--debug|-d)
DEBUG=true
shift
;;
--help|-h)
show_help
exit 0
;;
--target|-t)
# Ensure there is an argument following the target flag
if [ -z "$2" ]; then
echo "Error: No target specified after $1."
show_help
exit 1
fi

TARGET="$2" # Set the target configuration
shift 2

# Validate the target value
if [ "$TARGET" != "t3000" ] && [ "$TARGET" != "tg" ] && [ "$TARGET" != "n300" ]; then
echo "Error: Invalid target configuration: $TARGET. Must be 't3000' or 'n300' or 'tg'."
exit 1
fi
;;
*)
echo "Unknown option: $1"
show_help
exit 1
;;
esac
done

# Function to run the profiling command and extract the CSV path
run_profile_and_extract_csv() {
command="./tt_metal/tools/profiler/profile_this.py -n all_gather_async_$TARGET -c 'pytest tests/ttnn/unit_tests/operations/ccl/perf/test_ccl_async_perf.py::test_all_gather_async_$TARGET'"

if [ "$DEBUG" = true ]; then
echo "Running profiling command for target $TARGET in debug mode..."
full_output=$(eval $command 2>&1 | tee /dev/tty)
else
echo "Running profiling command for target $TARGET..."
full_output=$(eval $command 2>&1)
fi

# Extract the CSV path
csv_path=$(echo "$full_output" | grep -oE 'OPs csv generated at: (.+\.csv)' | sed -E 's/OPs csv generated at: //')

if [ -n "$csv_path" ]; then
echo "CSV path found: $csv_path"
echo "Generating performance report..."

tmp_file="/tmp/perf_report_output.log"
PYTHONPATH="$MODULE_DIR" python3 -c "
import sys
import pandas as pd
from async_perf_csv import perf_report
from tabulate import tabulate
try:
# Generate the report and convert it to a DataFrame
average_df = perf_report('$csv_path')
# Print the DataFrame in a pretty table format
print('Min - Avg - Max by Common Runs:')
print(tabulate(average_df, headers='keys', tablefmt='pretty'))
except Exception as e:
print(f'Error in performance report generation: {e}', file=sys.stderr)
sys.exit(1)
" 2>&1 | tee "$tmp_file"

if grep -q "Error in performance report generation" "$tmp_file"; then
echo "Error: Performance report generation failed."
exit 1
fi

else
echo "CSV path not found in the command output."
exit 1
fi
}

# Run the function
run_profile_and_extract_csv
Loading

0 comments on commit 826e408

Please sign in to comment.