[CI] Enable amp accuracy check for inductor cpu (#127758)
This is to enable inductor AMP accuracy check for on CPU in CI workflow to capture issue early. Three suites are included: timms, huggingface as well as torchbench.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/127758
Approved by: https://github.com/jgong5, https://github.com/desertfire
diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index dec2c17..50fce82 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -587,10 +587,14 @@
test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
else
if [[ "${TEST_CONFIG}" == *cpu_inductor* ]]; then
+ local dt="float32"
+ if [[ "${TEST_CONFIG}" == *amp* ]]; then
+ dt="amp"
+ fi
if [[ "${TEST_CONFIG}" == *freezing* ]]; then
- test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --float32 --freezing "$@"
+ test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --"$dt" --freezing "$@"
else
- test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --float32 "$@"
+ test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --"$dt" "$@"
fi
elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml
index 345814a..baa1685 100644
--- a/.github/workflows/inductor.yml
+++ b/.github/workflows/inductor.yml
@@ -171,6 +171,11 @@
{ config: "cpu_inductor_timm_freezing", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_inductor_torchbench_freezing", shard: 1, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_inductor_torchbench_freezing", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
+ { config: "cpu_inductor_huggingface_amp_freezing", shard: 1, num_shards: 1, runner: "linux.16xlarge.spr" },
+ { config: "cpu_inductor_timm_amp_freezing", shard: 1, num_shards: 2, runner: "linux.16xlarge.spr" },
+ { config: "cpu_inductor_timm_amp_freezing", shard: 2, num_shards: 2, runner: "linux.16xlarge.spr" },
+ { config: "cpu_inductor_torchbench_amp_freezing", shard: 1, num_shards: 2, runner: "linux.16xlarge.spr" },
+ { config: "cpu_inductor_torchbench_amp_freezing", shard: 2, num_shards: 2, runner: "linux.16xlarge.spr" },
{ config: "dynamic_cpu_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
{ config: "dynamic_cpu_inductor_timm", shard: 1, num_shards: 2, runner: "linux.12xlarge" },
{ config: "dynamic_cpu_inductor_timm", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_amp_freezing_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_amp_freezing_inference.csv
new file mode 100644
index 0000000..349239b
--- /dev/null
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_amp_freezing_inference.csv
@@ -0,0 +1,185 @@
+name,accuracy,graph_breaks
+
+
+
+AlbertForMaskedLM,pass,0
+
+
+
+AlbertForQuestionAnswering,pass,0
+
+
+
+AllenaiLongformerBase,pass,4
+
+
+
+BartForCausalLM,pass,0
+
+
+
+BartForConditionalGeneration,pass,0
+
+
+
+BertForMaskedLM,pass,0
+
+
+
+BertForQuestionAnswering,pass,0
+
+
+
+BlenderbotForCausalLM,pass_due_to_skip,0
+
+
+
+BlenderbotSmallForCausalLM,pass,0
+
+
+
+BlenderbotSmallForConditionalGeneration,pass,0
+
+
+
+CamemBert,pass,0
+
+
+
+DebertaForMaskedLM,pass,0
+
+
+
+DebertaForQuestionAnswering,pass,0
+
+
+
+DebertaV2ForMaskedLM,pass_due_to_skip,0
+
+
+
+DebertaV2ForQuestionAnswering,pass,0
+
+
+
+DistilBertForMaskedLM,pass,0
+
+
+
+DistilBertForQuestionAnswering,pass,0
+
+
+
+DistillGPT2,pass,0
+
+
+
+ElectraForCausalLM,pass,0
+
+
+
+ElectraForQuestionAnswering,pass,0
+
+
+
+GPT2ForSequenceClassification,pass,2
+
+
+
+GoogleFnet,pass,0
+
+
+
+LayoutLMForMaskedLM,pass,0
+
+
+
+LayoutLMForSequenceClassification,pass,2
+
+
+
+M2M100ForConditionalGeneration,pass,0
+
+
+
+MBartForCausalLM,pass,0
+
+
+
+MBartForConditionalGeneration,pass,0
+
+
+
+MT5ForConditionalGeneration,pass,0
+
+
+
+MegatronBertForCausalLM,pass,0
+
+
+
+MegatronBertForQuestionAnswering,pass,0
+
+
+
+MobileBertForMaskedLM,pass,0
+
+
+
+MobileBertForQuestionAnswering,pass,0
+
+
+
+OPTForCausalLM,pass,0
+
+
+
+PLBartForCausalLM,pass,0
+
+
+
+PLBartForConditionalGeneration,pass,0
+
+
+
+PegasusForCausalLM,pass,0
+
+
+
+PegasusForConditionalGeneration,pass,0
+
+
+
+RobertaForCausalLM,pass,0
+
+
+
+RobertaForQuestionAnswering,pass,0
+
+
+
+Speech2Text2ForCausalLM,pass,0
+
+
+
+T5ForConditionalGeneration,pass,0
+
+
+
+T5Small,pass,0
+
+
+
+TrOCRForCausalLM,pass,0
+
+
+
+XGLMForCausalLM,pass,0
+
+
+
+XLNetLMHeadModel,pass,0
+
+
+
+YituTechConvBert,pass,0
diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_timm_amp_freezing_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_timm_amp_freezing_inference.csv
new file mode 100644
index 0000000..c137fc1
--- /dev/null
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_timm_amp_freezing_inference.csv
@@ -0,0 +1,245 @@
+name,accuracy,graph_breaks
+
+
+
+adv_inception_v3,pass,0
+
+
+
+beit_base_patch16_224,pass,0
+
+
+
+botnet26t_256,pass,0
+
+
+
+cait_m36_384,pass,0
+
+
+
+coat_lite_mini,pass,0
+
+
+
+convit_base,pass,0
+
+
+
+convmixer_768_32,pass,0
+
+
+
+convnext_base,pass,0
+
+
+
+crossvit_9_240,pass,0
+
+
+
+cspdarknet53,pass,0
+
+
+
+deit_base_distilled_patch16_224,pass,0
+
+
+
+dla102,pass,0
+
+
+
+dm_nfnet_f0,pass,0
+
+
+
+dpn107,pass,0
+
+
+
+eca_botnext26ts_256,pass,0
+
+
+
+eca_halonext26ts,pass,0
+
+
+
+ese_vovnet19b_dw,pass,0
+
+
+
+fbnetc_100,pass,0
+
+
+
+fbnetv3_b,pass,0
+
+
+
+gernet_l,pass,0
+
+
+
+ghostnet_100,pass,0
+
+
+
+gluon_inception_v3,pass,0
+
+
+
+gmixer_24_224,pass,0
+
+
+
+gmlp_s16_224,pass,0
+
+
+
+hrnet_w18,pass,0
+
+
+
+inception_v3,pass,0
+
+
+
+jx_nest_base,pass,0
+
+
+
+lcnet_050,fail_accuracy,0
+
+
+
+levit_128,pass,0
+
+
+
+mixer_b16_224,pass,0
+
+
+
+mixnet_l,pass,0
+
+
+
+mnasnet_100,pass,0
+
+
+
+mobilenetv2_100,pass,0
+
+
+
+mobilenetv3_large_100,pass,0
+
+
+
+mobilevit_s,pass,0
+
+
+
+nfnet_l0,pass,0
+
+
+
+pit_b_224,pass,0
+
+
+
+pnasnet5large,pass,0
+
+
+
+poolformer_m36,pass,0
+
+
+
+regnety_002,pass,0
+
+
+
+repvgg_a2,pass,0
+
+
+
+res2net101_26w_4s,pass,0
+
+
+
+res2net50_14w_8s,pass,0
+
+
+
+res2next50,pass,0
+
+
+
+resmlp_12_224,pass,0
+
+
+
+resnest101e,pass,0
+
+
+
+rexnet_100,pass,0
+
+
+
+sebotnet33ts_256,pass,0
+
+
+
+selecsls42b,pass,0
+
+
+
+spnasnet_100,pass,0
+
+
+
+swin_base_patch4_window7_224,pass,0
+
+
+
+swsl_resnext101_32x16d,pass,0
+
+
+
+tf_efficientnet_b0,pass,0
+
+
+
+tf_mixnet_l,pass,0
+
+
+
+tinynet_a,pass,0
+
+
+
+tnt_s_patch16_224,pass,0
+
+
+
+twins_pcpvt_base,pass,0
+
+
+
+visformer_small,pass,0
+
+
+
+vit_base_patch16_224,pass,0
+
+
+
+volo_d1_224,pass,0
+
+
+
+xcit_large_24_p8_224,pass,0
diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_amp_freezing_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_amp_freezing_inference.csv
new file mode 100644
index 0000000..179f383
--- /dev/null
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_amp_freezing_inference.csv
@@ -0,0 +1,353 @@
+name,accuracy,graph_breaks
+
+
+
+BERT_pytorch,pass,0
+
+
+
+Background_Matting,pass_due_to_skip,0
+
+
+
+DALLE2_pytorch,model_fail_to_load,0
+
+
+
+LearningToPaint,pass,0
+
+
+
+Super_SloMo,pass,0
+
+
+
+alexnet,pass,0
+
+
+
+basic_gnn_edgecnn,pass,0
+
+
+
+basic_gnn_gcn,pass,6
+
+
+
+basic_gnn_gin,pass,0
+
+
+
+basic_gnn_sage,pass,0
+
+
+
+dcgan,pass,0
+
+
+
+demucs,pass,3
+
+
+
+densenet121,pass,0
+
+
+
+detectron2_fasterrcnn_r_101_c4,fail_accuracy,42
+
+
+
+detectron2_fasterrcnn_r_101_dc5,fail_accuracy,42
+
+
+
+detectron2_fasterrcnn_r_101_fpn,fail_accuracy,46
+
+
+
+detectron2_fasterrcnn_r_50_c4,fail_accuracy,42
+
+
+
+detectron2_fasterrcnn_r_50_dc5,fail_accuracy,42
+
+
+
+detectron2_fasterrcnn_r_50_fpn,fail_accuracy,46
+
+
+
+detectron2_fcos_r_50_fpn,fail_accuracy,23
+
+
+
+detectron2_maskrcnn_r_101_c4,fail_accuracy,57
+
+
+
+detectron2_maskrcnn_r_101_fpn,fail_accuracy,63
+
+
+
+detectron2_maskrcnn_r_50_c4,fail_accuracy,57
+
+
+
+detectron2_maskrcnn_r_50_fpn,fail_accuracy,63
+
+
+
+dlrm,pass,0
+
+
+
+doctr_det_predictor,pass,5
+
+
+
+doctr_reco_predictor,fail_accuracy,4
+
+
+
+drq,pass,0
+
+
+
+fastNLP_Bert,pass,4
+
+
+
+functorch_dp_cifar10,pass,0
+
+
+
+functorch_maml_omniglot,pass,0
+
+
+
+hf_Albert,pass,0
+
+
+
+hf_Bart,pass,0
+
+
+
+hf_Bert,pass,0
+
+
+
+hf_Bert_large,pass,0
+
+
+
+hf_BigBird,pass,61
+
+
+
+hf_DistilBert,pass,0
+
+
+
+hf_GPT2,pass,0
+
+
+
+hf_GPT2_large,pass_due_to_skip,0
+
+
+
+hf_Longformer,pass,4
+
+
+
+hf_Reformer,pass,5
+
+
+
+hf_T5,pass,0
+
+
+
+hf_T5_base,pass,0
+
+
+
+hf_T5_large,pass_due_to_skip,0
+
+
+
+hf_distil_whisper,pass,0
+
+
+
+lennard_jones,pass,0
+
+
+
+llama,pass,0
+
+
+
+maml,pass_due_to_skip,0
+
+
+
+maml_omniglot,pass,0
+
+
+
+mnasnet1_0,pass,0
+
+
+
+mobilenet_v2,pass,0
+
+
+
+mobilenet_v2_quantized_qat,pass,2
+
+
+
+mobilenet_v3_large,pass,0
+
+
+
+moco,model_fail_to_load,0
+
+
+
+moondream,pass,0
+
+
+
+nvidia_deeprecommender,pass,0
+
+
+
+opacus_cifar10,pass,0
+
+
+
+phlippe_densenet,pass,0
+
+
+
+phlippe_resnet,pass,0
+
+
+
+pyhpc_equation_of_state,pass,0
+
+
+
+pyhpc_isoneutral_mixing,pass,0
+
+
+
+pyhpc_turbulent_kinetic_energy,pass,0
+
+
+
+pytorch_CycleGAN_and_pix2pix,pass,0
+
+
+
+pytorch_stargan,pass,0
+
+
+
+pytorch_unet,pass,0
+
+
+
+resnet152,pass,0
+
+
+
+resnet18,pass,0
+
+
+
+resnet50,pass,0
+
+
+
+resnet50_quantized_qat,pass,2
+
+
+
+resnext50_32x4d,pass,0
+
+
+
+shufflenet_v2_x1_0,pass,0
+
+
+
+soft_actor_critic,pass,0
+
+
+
+speech_transformer,pass,10
+
+
+
+squeezenet1_1,pass,0
+
+
+
+stable_diffusion_unet,pass_due_to_skip,0
+
+
+
+timm_efficientdet,model_fail_to_load,0
+
+
+
+timm_efficientnet,pass,0
+
+
+
+timm_nfnet,pass,0
+
+
+
+timm_regnet,pass,0
+
+
+
+timm_resnest,pass,0
+
+
+
+timm_vision_transformer,pass,0
+
+
+
+timm_vision_transformer_large,pass_due_to_skip,0
+
+
+
+timm_vovnet,pass,0
+
+
+
+torch_multimodal_clip,pass,0
+
+
+
+tts_angular,pass,2
+
+
+
+vgg16,pass,0
+
+
+
+vision_maskrcnn,fail_accuracy,28
+
+
+
+yolov3,pass,0