Make CI_SKIPS into a consolidated dict (#92769)
This makes it easier to add more configurations without causing a
thicket of if statements selecting the correct variable.
Signed-off-by: Edward Z. Yang <ezyang@meta.com>
Pull Request resolved: https://github.com/pytorch/pytorch/pull/92769
Approved by: https://github.com/voznesenskym, https://github.com/desertfire
diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py
index 8424650..6fde717 100644
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@@ -15,6 +15,7 @@
import time
import warnings
from contextlib import contextmanager
+from typing import NamedTuple
import numpy as np
import pandas as pd
@@ -60,7 +61,16 @@
current_batch_size = None
output_filename = None
-CI_SKIP_AOT_EAGER_INFERENCE = [
+
+class CI(NamedTuple):
+ backend: str # aot_eager or inductor
+ training: bool
+ dynamic: bool = False
+
+
+CI_SKIP = collections.defaultdict(list)
+
+CI_SKIP[CI("aot_eager", training=False)] = [
# TorchBench
"DALLE2_pytorch", # AttributeError: text_encodings
"demucs", # OOM
@@ -84,8 +94,8 @@
"DebertaV2ForQuestionAnswering", # OOM
]
-CI_SKIP_AOT_EAGER_TRAINING = [
- *CI_SKIP_AOT_EAGER_INFERENCE,
+CI_SKIP[CI("aot_eager", training=True)] = [
+ *CI_SKIP[CI("aot_eager", training=False)],
# TorchBench
"Background_Matting", # fp64_OOM
"hf_T5_base", # fp64_OOM
@@ -107,14 +117,14 @@
"xcit_large_24_p8_224", # fp64_OOM
]
-CI_SKIP_AOT_EAGER_DYNAMIC_TRAINING = [
- *CI_SKIP_AOT_EAGER_TRAINING,
+CI_SKIP[CI("aot_eager", training=True, dynamic=True)] = [
+ *CI_SKIP[CI("aot_eager", training=True)],
"crossvit_9_240", # torch._C._nn.upsample_bicubic2d
"twins_pcpvt_base", # timeout
]
-CI_SKIP_INDUCTOR_INFERENCE = [
- *CI_SKIP_AOT_EAGER_INFERENCE,
+CI_SKIP[CI("inductor", training=False)] = [
+ *CI_SKIP[CI("aot_eager", training=False)],
# TorchBench
"detectron2",
"hf_T5", # accuracy
@@ -136,8 +146,8 @@
"ghostnet_100", # Accuracy
]
-CI_SKIP_INDUCTOR_TRAINING = [
- *CI_SKIP_INDUCTOR_INFERENCE,
+CI_SKIP[CI("inductor", training=True)] = [
+ *CI_SKIP[CI("inductor", training=False)],
# TorchBench
"Background_Matting", # fp64_OOM
"dlrm", # Fails on CI - unable to repro locally
@@ -1844,27 +1854,18 @@
# Only dump error on CI
args.quiet = True
args.repeat = 2
- if args.backend == "aot_eager":
- if args.dynamic_ci_skips_only:
- assert args.training and args.dynamic_shapes
- args.filter = list(
- set(CI_SKIP_AOT_EAGER_DYNAMIC_TRAINING)
- - set(CI_SKIP_AOT_EAGER_TRAINING)
- )
- else:
- args.exclude_exact = (
- CI_SKIP_AOT_EAGER_DYNAMIC_TRAINING
- if args.training and args.dynamic_shapes
- else CI_SKIP_AOT_EAGER_TRAINING
- if args.training
- else CI_SKIP_AOT_EAGER_INFERENCE
- )
- elif args.inductor:
- args.exclude_exact = (
- CI_SKIP_INDUCTOR_TRAINING
- if args.training
- else CI_SKIP_INDUCTOR_INFERENCE
+ if args.dynamic_ci_skips_only:
+ # Test only the incremental set of jobs whose skipped was
+ # caused solely by turning on dynamic shapes
+ assert args.dynamic_shapes
+ ci = functools.partial(CI, args.backend, training=args.training)
+ args.filter = list(
+ set(CI_SKIP[ci(dynamic=True)]) - set(CI_SKIP[ci(dynamic=False)])
)
+ else:
+ args.exclude_exact = CI_SKIP[
+ CI(args.backend, training=args.training, dynamic=args.dynamic_shapes)
+ ]
if args.ddp:
# TODO: we could also hook DDP bench up to --speedup bench, _not_ for mgpu e2e perf,
# but just to measure impact on singlenode of performing graph-breaks.