Skip to content

Commit

Permalink
Print summary results in page for E2E (#1353)
Browse files Browse the repository at this point in the history
  • Loading branch information
mengfei25 authored Feb 13, 2025
1 parent 80c3755 commit 3510f91
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 28 deletions.
8 changes: 0 additions & 8 deletions .github/actions/inductor-xpu-e2e-test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,6 @@ runs:
LOG_NAME=inductor_${suite}_${dt}_${mode}_xpu_${scenario}_all.log
rm -f ${LOG_DIR}/${LOG_NAME}
find ${LOG_DIR}/ -name "inductor_${suite}_${dt}_${mode}_xpu_${scenario}_card*.log" |xargs cat >> ${LOG_DIR}/${LOG_NAME} 2>&1
if [ "${scenario}" == "accuracy" ];then
python ../torch-xpu-ops/.github/ci_expected_accuracy/check_expected.py \
--suite $suite \
--mode $mode \
--dtype $dt \
--csv_file ${LOG_DIR}/inductor_${suite}_${dt}_${mode}_xpu_${scenario}.csv \
2>&1 |tee -a inductor_log/summary_accuracy.log
fi
done
done
done
Expand Down
8 changes: 3 additions & 5 deletions .github/ci_expected_accuracy/check_expected.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@
print("Not run/in models:", len(lost_models), lost_models)
print(f"Pass rate: {len(passed_models) / len(model_names) * 100:.2f}%")

if len(new_pass_models + new_models) > 0:
print("NOTE: New models result, please update the reference", new_pass_models, new_models)
if args.update:
refer_data.to_csv(refer_file, sep=',', encoding='utf-8', index=False)
print("Updated. Now, confirm the changes to .csvs and `git add` them if satisfied.")
# update reference csv
if len(new_pass_models + new_models) > 0 and args.update:
refer_data.to_csv(refer_file, sep=',', encoding='utf-8', index=False)
108 changes: 108 additions & 0 deletions .github/scripts/e2e_summary.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#!/bin/bash

results_dir="$1"
check_file="$(dirname "$0")/../ci_expected_accuracy/check_expected.py"

# Accuracy
accuracy=$(find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" -c)
if [ "${accuracy}" -gt 0 ];then
echo "### Accuracy"
printf "| Category | Total | \$\${\\color{green}Passed}\$\$ | Pass Rate | \$\${\\color{red}Failed}\$\$ | "
printf "\$\${\\color{blue}Xfailed}\$\$ | \$\${\\color{orange}Timeout}\$\$ | New Passed | New Enabled | Not Run |\n"
printf "| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"
echo > tmp-summary.txt
echo > tmp-details.txt
for csv in $(find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" |sort)
do
category="$(echo "${csv}" |sed 's/.*inductor_//;s/_xpu_accuracy.*//')"
suite="$(echo "${csv}" |sed 's/.*inductor_//;s/_.*//;s/timm/timm_models/')"
mode="$(echo "${csv}" |sed 's/_xpu_accuracy.*//;s/.*_//')"
dt="$(echo "${csv}" |sed -E 's/.*inductor_[a-z]*_//;s/models_//;s/_infer.*|_train.*//')"
python "${check_file}" --suite "${suite}" --mode "${mode}" --dtype "${dt}" --csv_file "${csv}" > tmp-result.txt
test_result="$(sed 's/, /,/g' tmp-result.txt |awk '{
if($0 ~/Total/){
total = $3;
}
if($0 ~/Passed/){
passed = $3;
}
if($0 ~/Pass rate/){
pass_rate = $3;
}
if($0 ~/Real failed/){
failed = $4;
failed_models = $5;
}
if($0 ~/Expected failed/){
xfail = $4;
xfail_models = $5;
}
if($0 ~/timeout/){
timeout = $4;
timeout_models = $5;
}
if($0 ~/Failed to passed/){
new_passed = $5;
new_passed_models = $6;
}
if($0 ~/Not run/){
not_run = $4;
not_run_models = $5;
}
if($0 ~/New models/){
new_enabled = $3;
new_enabled_models = $4;
}
}END {
printf(" %d | %d | %s | %d | %d | %d | %d | %d | %d\n",
total, passed, pass_rate, failed, xfail, timeout, new_passed, new_enabled, not_run);
}')"
echo "| ${category} | ${test_result} |" >> tmp-summary.txt
sed -i '
s/Real failed models:/$${\\color{red}Real \\space failed \\space models}$$:/g;
s/Expected failed models:/$${\\color{blue}Expected \\space failed \\space models}$$:/g;
s/Warning timeout models:/$${\\color{orange}Warning \\space timeout \\space models}$$:/g;
s/Failed to passed models:/$${\\color{green}Failed \\space to \\space passed \\space models}$$:/g;
' tmp-result.txt
{
echo "<table><thead><tr><th colspan=2>$(sed 's/=//g' tmp-result.txt |head -n 1)</th></tr></thead><tbody>"
sed "1d" tmp-result.txt |awk -F: '{printf("<tr><td>%s</td><td>%s</td></tr>\n", $1, $2)}'
echo -e "</tbody></table>\n"
} >> tmp-details.txt
done
cat tmp-summary.txt
grep -v "<td> 0 \[\]</td>" tmp-details.txt
rm -rf tmp-*.txt
fi

# Performance
performance=$(find "${results_dir}" -name "*.csv" |grep -E "_xpu_performance.csv" -c)
if [ "${performance}" -gt 0 ];then
echo "### Performance"
echo "| Category | Total | \$\${\\color{green}Passed}\$\$ | Pass Rate | Speedup |"
echo "| --- | --- | --- | --- | --- |"
for csv in $(find "${results_dir}" -name "*.csv" |grep -E "_xpu_performance.csv" |sort)
do
category="$(echo "${csv}" |sed 's/.*inductor_//;s/_xpu_performance.*//')"
test_result="$(awk -M -v PREC=1024 -F ',' 'BEGIN{
total = 0;
pass = 0;
fail = 0;
speedup = 1;
}{
if ($1 == "xpu") {
total++;
if ($4 > 0) {
pass++;
speedup *= $4;
}else {
fail++;
}
}
}END{
printf("%d | %d | %.2f% | %.3f\n", total, pass, pass/total*100, speedup^(1/pass))
}' "${csv}")"
echo "| ${category} | ${test_result} |"
done
echo
fi
10 changes: 6 additions & 4 deletions .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -285,16 +285,18 @@ jobs:
mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
failed_models=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
# Print summary
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${timeout_models} -ne 0 ];then
TIMEOUT_MODELS="$(
grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log
grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
)"
echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
fi
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
fi
- name: Upload Inductor XPU E2E Data
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/nightly_ondemand_rolling.yml
Original file line number Diff line number Diff line change
Expand Up @@ -296,16 +296,18 @@ jobs:
mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
failed_models=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
# Print summary
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${timeout_models} -ne 0 ];then
TIMEOUT_MODELS="$(
grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log
grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
)"
echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
fi
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
fi
- name: Upload Inductor XPU E2E Data
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/nightly_ondemand_whl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -241,16 +241,18 @@ jobs:
mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
failed_models=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
# Print summary
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${timeout_models} -ne 0 ];then
TIMEOUT_MODELS="$(
grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log
grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
)"
echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
fi
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
fi
- name: Upload Inductor XPU E2E Data
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,11 @@ jobs:
run: |
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
failed_case=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
if [ ${failed_case} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log
# Print summary
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
exit 1
fi
- name: Upload Inductor XPU E2E Data
Expand Down

0 comments on commit 3510f91

Please sign in to comment.