[CI] Remove inductor skip list for timm_models (#98840)
Summary: check against the expected csv file instead of skipping tests
Pull Request resolved: https://github.com/pytorch/pytorch/pull/98840
Approved by: https://github.com/ezyang
diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_dynamic_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_dynamic_inference.csv
index f91af65..cef9d08 100644
--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_dynamic_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_dynamic_inference.csv
@@ -1,7 +1,10 @@
name,accuracy,graph_breaks
adv_inception_v3,pass,0
beit_base_patch16_224,pass,0
+botnet26t_256,pass,0
+cait_m36_384,fail_accuracy,0
coat_lite_mini,pass,0
+convit_base,fail_to_run,4
convmixer_768_32,pass,0
convnext_base,pass,0
crossvit_9_240,pass,0
@@ -18,6 +21,7 @@
gernet_l,pass,0
ghostnet_100,pass,0
gluon_inception_v3,pass,0
+gluon_xception65,pass,0
gmixer_24_224,pass,0
gmlp_s16_224,pass,0
hrnet_w18,pass,0
@@ -56,3 +60,4 @@
visformer_small,pass,0
vit_base_patch16_224,pass,0
volo_d1_224,pass,0
+xcit_large_24_p8_224,pass,0
diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_dynamic_training.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_dynamic_training.csv
index afd8a2b..2e89b59 100644
--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_dynamic_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_dynamic_training.csv
@@ -1,7 +1,10 @@
name,accuracy,graph_breaks
adv_inception_v3,pass,9
beit_base_patch16_224,pass,9
+botnet26t_256,pass,11
+cait_m36_384,pass,9
coat_lite_mini,pass,9
+convit_base,fail_to_run,8
convmixer_768_32,pass,6
convnext_base,pass,9
crossvit_9_240,pass,9
@@ -11,17 +14,21 @@
dm_nfnet_f0,pass,9
dpn107,pass,11
eca_botnext26ts_256,pass,11
+eca_halonext26ts,pass,11
ese_vovnet19b_dw,pass,11
fbnetc_100,pass,11
+fbnetv3_b,pass,11
gernet_l,pass,11
ghostnet_100,pass,11
gluon_inception_v3,pass,9
+gluon_xception65,pass,9
gmixer_24_224,pass,9
gmlp_s16_224,pass,9
hrnet_w18,pass,6
inception_v3,pass,9
jx_nest_base,pass,9
lcnet_050,pass,11
+levit_128,pass,9
mixer_b16_224,pass,9
mixnet_l,pass,11
mnasnet_100,pass,11
@@ -39,6 +46,8 @@
res2next50,pass,9
resmlp_12_224,pass,9
resnest101e,pass,9
+rexnet_100,pass,11
+sebotnet33ts_256,pass,11
selecsls42b,pass,9
spnasnet_100,pass,11
swin_base_patch4_window7_224,pass,9
@@ -51,3 +60,4 @@
visformer_small,pass,9
vit_base_patch16_224,pass,9
volo_d1_224,pass,9
+xcit_large_24_p8_224,pass,9
diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_inference.csv
index efb7e20..e7a27ae 100644
--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_inference.csv
@@ -1,6 +1,8 @@
name,accuracy,graph_breaks
adv_inception_v3,pass,0
beit_base_patch16_224,pass,0
+botnet26t_256,pass,0
+cait_m36_384,fail_accuracy,0
coat_lite_mini,pass,0
convit_base,pass,15
convmixer_768_32,pass,0
@@ -19,6 +21,7 @@
gernet_l,pass,0
ghostnet_100,pass,0
gluon_inception_v3,pass,0
+gluon_xception65,pass,0
gmixer_24_224,pass,0
gmlp_s16_224,pass,0
hrnet_w18,pass,0
@@ -57,3 +60,4 @@
visformer_small,pass,0
vit_base_patch16_224,pass,0
volo_d1_224,pass,0
+xcit_large_24_p8_224,pass,0
diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv
index afd8a2b..a45bb9d 100644
--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv
@@ -1,7 +1,10 @@
name,accuracy,graph_breaks
adv_inception_v3,pass,9
beit_base_patch16_224,pass,9
+botnet26t_256,pass,11
+cait_m36_384,OOM,4
coat_lite_mini,pass,9
+convit_base,pass,25
convmixer_768_32,pass,6
convnext_base,pass,9
crossvit_9_240,pass,9
@@ -11,17 +14,21 @@
dm_nfnet_f0,pass,9
dpn107,pass,11
eca_botnext26ts_256,pass,11
+eca_halonext26ts,pass,11
ese_vovnet19b_dw,pass,11
fbnetc_100,pass,11
+fbnetv3_b,pass,11
gernet_l,pass,11
ghostnet_100,pass,11
gluon_inception_v3,pass,9
+gluon_xception65,pass,9
gmixer_24_224,pass,9
gmlp_s16_224,pass,9
hrnet_w18,pass,6
inception_v3,pass,9
jx_nest_base,pass,9
lcnet_050,pass,11
+levit_128,pass,9
mixer_b16_224,pass,9
mixnet_l,pass,11
mnasnet_100,pass,11
@@ -39,6 +46,8 @@
res2next50,pass,9
resmlp_12_224,pass,9
resnest101e,pass,9
+rexnet_100,pass,11
+sebotnet33ts_256,pass,11
selecsls42b,pass,9
spnasnet_100,pass,11
swin_base_patch4_window7_224,pass,9
@@ -51,3 +60,4 @@
visformer_small,pass,9
vit_base_patch16_224,pass,9
volo_d1_224,pass,9
+xcit_large_24_p8_224,pass,9
diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py
index 0ea1114..2b81190 100644
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@@ -170,11 +170,6 @@
"AllenaiLongformerBase",
"DebertaV2ForQuestionAnswering", # OOM
"OPTForCausalLM", # OOM
- # TIMM
- "cait_m36_384", # Accuracy
- "botnet26t_256", # accuracy https://github.com/pytorch/pytorch/issues/93847
- "gluon_xception65", # accuracy https://github.com/pytorch/pytorch/issues/93847
- "xcit_large_24_p8_224", # TIMEOUT
]
CI_SKIP[CI("inductor", training=False, device="cpu")] = [
@@ -233,15 +228,6 @@
"M2M100ForConditionalGeneration", # OOM
"XGLMForCausalLM", # OOM
"MT5ForConditionalGeneration", # fails accuracy
- # TIMM
- "convit_base", # fp64_OOM
- "eca_halonext26ts", # accuracy
- "fbnetv3_b", # accuracy
- "levit_128", # fp64_OOM
- # https://github.com/pytorch/pytorch/issues/94066
- "rexnet_100", # Accuracy failed for key name stem.bn.weight.grad
- "sebotnet33ts_256", # Accuracy failed for key name stem.conv1.conv.weight.grad
- "xcit_large_24_p8_224", # fp64_OOM
]
# Skips for dynamic=True
@@ -258,7 +244,6 @@
CI_SKIP[CI("inductor", training=False, dynamic=True)] = [
*CI_SKIP[CI("aot_eager", training=False, dynamic=True)],
*CI_SKIP[CI("inductor", training=False)],
- "convit_base", # _print_Pow: assert exp.is_integer
]
CI_SKIP[CI("inductor", training=True, dynamic=True)] = [
@@ -1283,8 +1268,6 @@
)
self.args.cosine = True
fp64_outputs = None
- if self.args.ci and self.args.training:
- return record_status("fp64_OOM")
tolerance, cos_similarity = self.get_tolerance_and_cosine_flag(
self.args.training, current_device, name
@@ -1351,7 +1334,11 @@
print(
"TorchDynamo optimized model failed to run because of following error"
)
- accuracy_status = "fail_to_run"
+ accuracy_status = (
+ "OOM"
+ if isinstance(e, torch.cuda.OutOfMemoryError)
+ else "fail_to_run"
+ )
return record_status(
accuracy_status, dynamo_start_stats=start_stats
)
@@ -2105,14 +2092,6 @@
"hf_Longformer",
"timm_nfnet",
"timm_efficientdet",
- # timm
- "beit_base_patch16_224",
- "cait_m36_384",
- "convmixer_768_32",
- "deit_base_distilled_patch16_224",
- "dm_nfnet_f0",
- "dpn107",
- "dm_nfnet_f0",
}
)
if args.training:
diff --git a/benchmarks/dynamo/timm_models.py b/benchmarks/dynamo/timm_models.py
index 34f6330..87d8251 100755
--- a/benchmarks/dynamo/timm_models.py
+++ b/benchmarks/dynamo/timm_models.py
@@ -68,26 +68,7 @@
"xcit_large_24_p8_224": 4,
}
-REQUIRE_HIGHER_TOLERANCE = set("botnet26t_256")
-
-SKIP = {
- # Unusual training setup
- "levit_128",
-}
-
-SKIP_TRAIN = {
- # segfault: Internal Triton PTX codegen error
- "eca_halonext26ts",
-}
-
-NONDETERMINISTIC = {
- # https://github.com/pytorch/pytorch/issues/94066
- "sebotnet33ts_256",
-}
-
-MAX_BATCH_SIZE_FOR_ACCURACY_CHECK = {
- "cait_m36_384": 4,
-}
+REQUIRE_HIGHER_TOLERANCE = set("sebotnet33ts_256")
SCALED_COMPUTE_LOSS = {
"ese_vovnet19b_dw",
@@ -256,13 +237,6 @@
)
batch_size = batch_size or recorded_batch_size
- # Control the memory footprint for few models
- if self.args.accuracy and model_name in MAX_BATCH_SIZE_FOR_ACCURACY_CHECK:
- batch_size = min(batch_size, MAX_BATCH_SIZE_FOR_ACCURACY_CHECK[model_name])
-
- # example_inputs = torch.randn(
- # (batch_size,) + input_size, device=device, dtype=data_dtype
- # )
torch.manual_seed(1337)
input_tensor = torch.randint(
256, size=(batch_size,) + input_size, device=device