Print summary results in page for E2E (#1353)

intel · Feb 13, 2025 · 3510f91 · 3510f91
1 parent 80c3755
commit 3510f91
Show file tree

Hide file tree

Showing 7 changed files with 134 additions and 28 deletions.
diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml
@@ -140,14 +140,6 @@ runs:
                 LOG_NAME=inductor_${suite}_${dt}_${mode}_xpu_${scenario}_all.log
                 rm -f ${LOG_DIR}/${LOG_NAME}
                 find ${LOG_DIR}/ -name "inductor_${suite}_${dt}_${mode}_xpu_${scenario}_card*.log" |xargs cat >> ${LOG_DIR}/${LOG_NAME} 2>&1
-                if [ "${scenario}" == "accuracy" ];then
-                  python ../torch-xpu-ops/.github/ci_expected_accuracy/check_expected.py \
-                        --suite $suite \
-                        --mode $mode \
-                        --dtype $dt \
-                        --csv_file ${LOG_DIR}/inductor_${suite}_${dt}_${mode}_xpu_${scenario}.csv \
-                        2>&1 |tee -a inductor_log/summary_accuracy.log
-                fi
               done
             done
           done

diff --git a/.github/ci_expected_accuracy/check_expected.py b/.github/ci_expected_accuracy/check_expected.py
@@ -92,8 +92,6 @@
 print("Not run/in models:", len(lost_models), lost_models)
 print(f"Pass rate: {len(passed_models) / len(model_names) * 100:.2f}%")
 
-if len(new_pass_models + new_models) > 0:
-    print("NOTE: New models result, please update the reference", new_pass_models, new_models)
-    if args.update:
-        refer_data.to_csv(refer_file, sep=',', encoding='utf-8', index=False)
-        print("Updated. Now, confirm the changes to .csvs and `git add` them if satisfied.")
+# update reference csv
+if len(new_pass_models + new_models) > 0 and args.update:
+    refer_data.to_csv(refer_file, sep=',', encoding='utf-8', index=False)
diff --git a/.github/scripts/e2e_summary.sh b/.github/scripts/e2e_summary.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+results_dir="$1"
+check_file="$(dirname "$0")/../ci_expected_accuracy/check_expected.py"
+
+# Accuracy
+accuracy=$(find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" -c)
+if [ "${accuracy}" -gt 0 ];then
+    echo "### Accuracy"
+    printf "| Category | Total | \$\${\\color{green}Passed}\$\$ | Pass Rate | \$\${\\color{red}Failed}\$\$ | "
+    printf "\$\${\\color{blue}Xfailed}\$\$ | \$\${\\color{orange}Timeout}\$\$ | New Passed | New Enabled | Not Run |\n"
+    printf "| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"
+    echo > tmp-summary.txt
+    echo > tmp-details.txt
+    for csv in $(find "${results_dir}" -name "*.csv" |grep -E "_xpu_accuracy.csv" |sort)
+    do
+        category="$(echo "${csv}" |sed 's/.*inductor_//;s/_xpu_accuracy.*//')"
+        suite="$(echo "${csv}" |sed 's/.*inductor_//;s/_.*//;s/timm/timm_models/')"
+        mode="$(echo "${csv}" |sed 's/_xpu_accuracy.*//;s/.*_//')"
+        dt="$(echo "${csv}" |sed -E 's/.*inductor_[a-z]*_//;s/models_//;s/_infer.*|_train.*//')"
+        python "${check_file}" --suite "${suite}" --mode "${mode}" --dtype "${dt}" --csv_file "${csv}" > tmp-result.txt
+        test_result="$(sed 's/, /,/g' tmp-result.txt |awk '{
+            if($0 ~/Total/){
+                total = $3;
+            }
+            if($0 ~/Passed/){
+                passed = $3;
+            }
+            if($0 ~/Pass rate/){
+                pass_rate = $3;
+            }
+            if($0 ~/Real failed/){
+                failed = $4;
+                failed_models = $5;
+            }
+            if($0 ~/Expected failed/){
+                xfail = $4;
+                xfail_models = $5;
+            }
+            if($0 ~/timeout/){
+                timeout = $4;
+                timeout_models = $5;
+            }
+            if($0 ~/Failed to passed/){
+                new_passed = $5;
+                new_passed_models = $6;
+            }
+            if($0 ~/Not run/){
+                not_run = $4;
+                not_run_models = $5;
+            }
+            if($0 ~/New models/){
+                new_enabled = $3;
+                new_enabled_models = $4;
+            }
+        }END {
+            printf(" %d | %d | %s | %d | %d | %d | %d | %d | %d\n",
+                total, passed, pass_rate, failed, xfail, timeout, new_passed, new_enabled, not_run);
+        }')"
+        echo "| ${category} | ${test_result} |" >> tmp-summary.txt
+        sed -i '
+            s/Real failed models:/$${\\color{red}Real \\space failed \\space models}$$:/g;
+            s/Expected failed models:/$${\\color{blue}Expected \\space failed \\space models}$$:/g;
+            s/Warning timeout models:/$${\\color{orange}Warning \\space timeout \\space models}$$:/g;
+            s/Failed to passed models:/$${\\color{green}Failed \\space to \\space passed \\space models}$$:/g;
+        ' tmp-result.txt
+        {
+            echo "<table><thead><tr><th colspan=2>$(sed 's/=//g' tmp-result.txt |head -n 1)</th></tr></thead><tbody>"
+            sed "1d" tmp-result.txt |awk -F: '{printf("<tr><td>%s</td><td>%s</td></tr>\n", $1, $2)}'
+            echo -e "</tbody></table>\n"
+        } >> tmp-details.txt
+    done
+    cat tmp-summary.txt
+    grep -v "<td> 0 \[\]</td>" tmp-details.txt
+    rm -rf tmp-*.txt
+fi
+
+# Performance
+performance=$(find "${results_dir}" -name "*.csv" |grep -E "_xpu_performance.csv" -c)
+if [ "${performance}" -gt 0 ];then
+    echo "### Performance"
+    echo "| Category | Total | \$\${\\color{green}Passed}\$\$ | Pass Rate | Speedup |"
+    echo "| --- | --- | --- | --- | --- |"
+    for csv in $(find "${results_dir}" -name "*.csv" |grep -E "_xpu_performance.csv" |sort)
+    do
+        category="$(echo "${csv}" |sed 's/.*inductor_//;s/_xpu_performance.*//')"
+        test_result="$(awk -M -v PREC=1024 -F ',' 'BEGIN{
+            total = 0;
+            pass = 0;
+            fail = 0;
+            speedup = 1;
+        }{
+            if ($1 == "xpu") {
+                total++;
+                if ($4 > 0) {
+                    pass++;
+                    speedup *= $4;
+                }else {
+                    fail++;
+                }
+            }
+        }END{
+            printf("%d | %d | %.2f% | %.3f\n", total, pass, pass/total*100, speedup^(1/pass))
+        }' "${csv}")"
+        echo "| ${category} | ${test_result} |"
+    done
+    echo
+fi
diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml
@@ -285,16 +285,18 @@ jobs:
           mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
           find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
           tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
-          failed_models=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
-          timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
+          # Print summary
+          bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
+          failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
+          timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
           if [ ${timeout_models} -ne 0 ];then
             TIMEOUT_MODELS="$(
-              grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log
+              grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
             )"
             echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
           fi
           if [ ${failed_models} -ne 0 ];then
-            grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1
+            grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
             exit 1
           fi
       - name: Upload Inductor XPU E2E Data

diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml
@@ -296,16 +296,18 @@ jobs:
           mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
           find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
           tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
-          failed_models=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
-          timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
+          # Print summary
+          bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
+          failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
+          timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
           if [ ${timeout_models} -ne 0 ];then
             TIMEOUT_MODELS="$(
-              grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log
+              grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
             )"
             echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
           fi
           if [ ${failed_models} -ne 0 ];then
-            grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1
+            grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
             exit 1
           fi
       - name: Upload Inductor XPU E2E Data

diff --git a/.github/workflows/nightly_ondemand_whl.yml b/.github/workflows/nightly_ondemand_whl.yml
@@ -241,16 +241,18 @@ jobs:
           mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
           find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days
           tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs
-          failed_models=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
-          timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
+          # Print summary
+          bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
+          failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
+          timeout_models=$(grep "timeout models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
           if [ ${timeout_models} -ne 0 ];then
             TIMEOUT_MODELS="$(
-              grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log
+              grep -B 1 "timeout models: [1-9]" ${GITHUB_STEP_SUMMARY}
             )"
             echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
           fi
           if [ ${failed_models} -ne 0 ];then
-            grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1
+            grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
             exit 1
           fi
       - name: Upload Inductor XPU E2E Data

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -184,9 +184,11 @@ jobs:
         run: |
           rm -rf ${{ github.workspace }}/upload_files
           cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
-          failed_case=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
-          if [ ${failed_case} -ne 0 ];then
-            grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log
+          # Print summary
+          bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh ${{ github.workspace }}/upload_files >> ${GITHUB_STEP_SUMMARY}
+          failed_models=$(grep "Real failed models: *[1-9]" ${GITHUB_STEP_SUMMARY} |wc -l || true)
+          if [ ${failed_models} -ne 0 ];then
+            grep -E "Real failed models: [1-9]|Summary for" ${GITHUB_STEP_SUMMARY} |grep "failed" -B 1
             exit 1
           fi
       - name: Upload Inductor XPU E2E Data