Skip to content

Commit

Permalink
Check dynamo graph-breaks in CI (#96346)
Browse files Browse the repository at this point in the history
- add graph-breaks baselines
- add check_graph_breaks script (message users on regress or improvement)
- hook up test.sh for existing accuracy job

Refactor graph-break CI check

Take steps toward merging checker with existing check flow,
consider merging it all the way inside the bench runner.

csvs
Pull Request resolved: pytorch/pytorch#96346
Approved by: https://github.com/ezyang
  • Loading branch information
wconstab authored and cyyever committed Mar 23, 2023
1 parent 193bf71 commit 8fe782c
Show file tree
Hide file tree
Showing 10 changed files with 377 additions and 1 deletion.
7 changes: 6 additions & 1 deletion .ci/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,12 @@ test_single_dynamo_benchmark() {
--output "$TEST_REPORTS_DIR/${name}_${suite}.csv"
python benchmarks/dynamo/check_csv.py \
-f "$TEST_REPORTS_DIR/${name}_${suite}.csv"
if [[ "${TEST_CONFIG}" != *cpu_accuracy* ]] && [[ "${TEST_CONFIG}" != *dynamic* ]]; then
# because I haven't tracked the cpu-side or dynamic expected artifacts yet, and need to differentiate filenames
python benchmarks/dynamo/check_graph_breaks.py \
--actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
--expected "benchmarks/dynamo/ci_expected_accuracy/${name}_${suite}${shard_id}.csv"
fi
fi
}

Expand All @@ -341,7 +347,6 @@ test_dynamo_benchmark() {
else
# Check inference with --float32
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --float32 "$@"

if [[ "${TEST_CONFIG}" != *cpu_accuracy* ]]; then
# Check training with --amp
test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
Expand Down
83 changes: 83 additions & 0 deletions benchmarks/dynamo/check_graph_breaks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import argparse
import sys
import textwrap

import pandas as pd


def get_field(csv, model_name: str, field: str, typ=float):
return typ(csv.loc[csv["name"] == model_name][field])


def check_graph_breaks(actual_csv, expected_csv, expected_filename):

failed = []
improved = []

for model in actual_csv["name"]:

graph_breaks = get_field(actual_csv, model, "graph_breaks", typ=int)
expected_graph_breaks = get_field(expected_csv, model, "graph_breaks", typ=int)

if graph_breaks == expected_graph_breaks:
status = "PASS"
elif graph_breaks > expected_graph_breaks:
status = "FAIL"
failed.append(model)
elif graph_breaks < expected_graph_breaks:
status = "IMPROVED"
improved.append(model)
print(
f"""
{model:34}:
graph_breaks={graph_breaks},
expected_graph_breaks={expected_graph_breaks},
{status}
"""
)

msg = ""
if failed or improved:
if failed:
msg += textwrap.dedent(
f"""
Error: {len(failed)} models have new dynamo graph breaks:
{' '.join(failed)}
"""
)
if improved:
msg += textwrap.dedent(
f"""
Improvement: {len(improved)} models have fixed dynamo graph breaks:
{' '.join(improved)}
"""
)
msg += textwrap.dedent(
f"""
If this change is expected, you can update `{expected_filename}` to reflect the new baseline.
This can either be done manually, or by downloading artifacts from your PR CI job.
(Search artifacts files for test-reports-test-inductor_torchbench, _timm, _huggingface)
"""
)
return failed or improved, msg


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--actual", type=str, required=True)
parser.add_argument("--expected", type=str, required=True)
args = parser.parse_args()

actual = pd.read_csv(args.actual)
expected = pd.read_csv(args.expected)

failed, msg = check_graph_breaks(actual, expected, args.expected)
if failed:
print(msg)
sys.exit(1)


if __name__ == "__main__":
main()
43 changes: 43 additions & 0 deletions benchmarks/dynamo/ci_expected_accuracy/inference_huggingface.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name,graph_breaks
AlbertForMaskedLM,0
AlbertForQuestionAnswering,0
BartForCausalLM,7
BertForMaskedLM,0
BertForQuestionAnswering,0
BlenderbotForCausalLM,0
BlenderbotSmallForCausalLM,7
BlenderbotSmallForConditionalGeneration,0
CamemBert,0
DebertaForMaskedLM,47
DebertaForQuestionAnswering,47
DebertaV2ForMaskedLM,0
DistilBertForMaskedLM,0
DistilBertForQuestionAnswering,0
DistillGPT2,0
ElectraForCausalLM,3
ElectraForQuestionAnswering,0
GPT2ForSequenceClassification,1
GoogleFnet,41
LayoutLMForMaskedLM,0
LayoutLMForSequenceClassification,1
M2M100ForConditionalGeneration,13
MBartForCausalLM,7
MBartForConditionalGeneration,0
MT5ForConditionalGeneration,0
MegatronBertForCausalLM,0
MegatronBertForQuestionAnswering,0
MobileBertForMaskedLM,0
MobileBertForQuestionAnswering,0
PLBartForCausalLM,7
PLBartForConditionalGeneration,10
PegasusForCausalLM,8
PegasusForConditionalGeneration,11
RobertaForCausalLM,0
RobertaForQuestionAnswering,0
Speech2Text2ForCausalLM,8
T5ForConditionalGeneration,0
T5Small,0
TrOCRForCausalLM,7
XGLMForCausalLM,8
XLNetLMHeadModel,0
YituTechConvBert,3
29 changes: 29 additions & 0 deletions benchmarks/dynamo/ci_expected_accuracy/inference_timm_models0.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name,graph_breaks
adv_inception_v3,0
beit_base_patch16_224,0
coat_lite_mini,0
convit_base,15
convmixer_768_32,0
convnext_base,0
crossvit_9_240,0
cspdarknet53,0
deit_base_distilled_patch16_224,0
dla102,0
dm_nfnet_f0,0
dpn107,0
eca_botnext26ts_256,0
eca_halonext26ts,0
ese_vovnet19b_dw,0
fbnetc_100,0
fbnetv3_b,0
gernet_l,0
ghostnet_100,0
gluon_inception_v3,0
gmixer_24_224,0
gmlp_s16_224,0
hrnet_w18,0
inception_v3,0
jx_nest_base,0
lcnet_050,0
levit_128,0
mixer_b16_224,0
31 changes: 31 additions & 0 deletions benchmarks/dynamo/ci_expected_accuracy/inference_timm_models1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name,graph_breaks
mixnet_l,0
mnasnet_100,0
mobilenetv2_100,0
mobilenetv3_large_100,0
mobilevit_s,0
nfnet_l0,0
pit_b_224,0
pnasnet5large,0
poolformer_m36,0
regnety_002,0
repvgg_a2,0
res2net101_26w_4s,0
res2net50_14w_8s,0
res2next50,0
resmlp_12_224,0
resnest101e,0
rexnet_100,0
sebotnet33ts_256,0
selecsls42b,0
spnasnet_100,0
swin_base_patch4_window7_224,0
swsl_resnext101_32x16d,0
tf_efficientnet_b0,0
tf_mixnet_l,0
tinynet_a,0
tnt_s_patch16_224,0
twins_pcpvt_base,0
visformer_small,0
vit_base_patch16_224,0
volo_d1_224,0
49 changes: 49 additions & 0 deletions benchmarks/dynamo/ci_expected_accuracy/inference_torchbench.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name,graph_breaks
BERT_pytorch,0
Background_Matting,0
LearningToPaint,0
Super_SloMo,0
alexnet,0
attention_is_all_you_need_pytorch,0
dcgan,0
densenet121,0
dlrm,0
drq,0
fastNLP_Bert,4
functorch_dp_cifar10,0
functorch_maml_omniglot,0
hf_Albert,0
hf_Bart,11
hf_Bert,0
hf_DistilBert,0
hf_GPT2,0
hf_Reformer,5
hf_T5_large,0
lennard_jones,0
maml_omniglot,0
mnasnet1_0,0
mobilenet_v2,0
mobilenet_v3_large,0
nvidia_deeprecommender,0
opacus_cifar10,0
pyhpc_isoneutral_mixing,0
pytorch_CycleGAN_and_pix2pix,0
pytorch_stargan,0
pytorch_unet,0
resnet152,0
resnet18,0
resnet50,0
resnext50_32x4d,0
shufflenet_v2_x1_0,0
soft_actor_critic,0
speech_transformer,9
squeezenet1_1,0
timm_efficientnet,0
timm_regnet,0
timm_resnest,0
timm_vision_transformer,0
timm_vision_transformer_large,0
timm_vovnet,0
tts_angular,1
vgg16,0
yolov3,1
37 changes: 37 additions & 0 deletions benchmarks/dynamo/ci_expected_accuracy/training_huggingface.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name,graph_breaks
AlbertForMaskedLM,7
AlbertForQuestionAnswering,7
BartForCausalLM,15
BertForMaskedLM,7
BertForQuestionAnswering,7
BlenderbotSmallForCausalLM,15
BlenderbotSmallForConditionalGeneration,7
CamemBert,7
DebertaForMaskedLM,55
DebertaForQuestionAnswering,55
DebertaV2ForMaskedLM,0
DistilBertForMaskedLM,7
DistilBertForQuestionAnswering,7
DistillGPT2,7
ElectraForCausalLM,11
ElectraForQuestionAnswering,7
GPT2ForSequenceClassification,9
LayoutLMForMaskedLM,7
LayoutLMForSequenceClassification,9
MBartForCausalLM,15
MegatronBertForCausalLM,7
MegatronBertForQuestionAnswering,7
MobileBertForMaskedLM,4
MobileBertForQuestionAnswering,4
PLBartForCausalLM,15
PLBartForConditionalGeneration,18
PegasusForCausalLM,16
PegasusForConditionalGeneration,16
RobertaForCausalLM,7
RobertaForQuestionAnswering,7
Speech2Text2ForCausalLM,16
T5ForConditionalGeneration,7
T5Small,7
TrOCRForCausalLM,15
XLNetLMHeadModel,7
YituTechConvBert,11
25 changes: 25 additions & 0 deletions benchmarks/dynamo/ci_expected_accuracy/training_timm_models0.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name,graph_breaks
adv_inception_v3,7
beit_base_patch16_224,7
coat_lite_mini,7
convmixer_768_32,4
convnext_base,7
crossvit_9_240,7
cspdarknet53,9
deit_base_distilled_patch16_224,7
dla102,7
dm_nfnet_f0,7
dpn107,9
eca_botnext26ts_256,9
ese_vovnet19b_dw,9
fbnetc_100,9
gernet_l,9
ghostnet_100,9
gluon_inception_v3,7
gmixer_24_224,7
gmlp_s16_224,7
hrnet_w18,4
inception_v3,7
jx_nest_base,7
lcnet_050,9
mixer_b16_224,7
30 changes: 30 additions & 0 deletions benchmarks/dynamo/ci_expected_accuracy/training_timm_models1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name,graph_breaks
mixnet_l,9
mnasnet_100,9
mobilenetv2_100,9
mobilenetv3_large_100,9
mobilevit_s,9
nfnet_l0,7
pit_b_224,7
pnasnet5large,6
poolformer_m36,7
regnety_002,9
repvgg_a2,9
res2net101_26w_4s,7
res2net50_14w_8s,7
res2next50,7
resmlp_12_224,7
resnest101e,7
rexnet_100,9
selecsls42b,7
spnasnet_100,9
swin_base_patch4_window7_224,7
swsl_resnext101_32x16d,7
tf_efficientnet_b0,9
tf_mixnet_l,9
tinynet_a,9
tnt_s_patch16_224,7
twins_pcpvt_base,7
visformer_small,7
vit_base_patch16_224,7
volo_d1_224,7
Loading

0 comments on commit 8fe782c

Please sign in to comment.