Enable inductor CI for TorchBench (#87465)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/87465
Approved by: https://github.com/malfet
diff --git a/.github/ci_commit_pins/text.txt b/.github/ci_commit_pins/text.txt
new file mode 100644
index 0000000..c0e01da
--- /dev/null
+++ b/.github/ci_commit_pins/text.txt
@@ -0,0 +1 @@
+5b78d074bd303eb230d30567646fcf0358ee2dd4
diff --git a/.github/scripts/filter_test_configs.py b/.github/scripts/filter_test_configs.py
index bb53144..f5c438c 100755
--- a/.github/scripts/filter_test_configs.py
+++ b/.github/scripts/filter_test_configs.py
@@ -24,7 +24,9 @@
     "functorch",
     "inductor",
     "inductor_distributed",
+    "inductor_huggingface",
     "inductor_timm",
+    "inductor_torchbench",
     "jit_legacy",
     "multigpu",
     "nogpu_AVX512",
diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml
index e839068..eb953ff 100644
--- a/.github/workflows/inductor.yml
+++ b/.github/workflows/inductor.yml
@@ -23,10 +23,11 @@
       cuda-arch-list: 8.6
       test-matrix: |
         { include: [
-          { config: "inductor", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
-          { config: "inductor", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
+          { config: "inductor", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
+          { config: "inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
           { config: "inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
           { config: "inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
+          { config: "inductor_torchbench", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
           { config: "inductor_distributed", shard: 1, num_shards: 1, runner: "linux.g5.12xlarge.nvidia.gpu" },
         ]}
 
diff --git a/.jenkins/pytorch/common_utils.sh b/.jenkins/pytorch/common_utils.sh
index 8af2c93..7fc1dd6 100644
--- a/.jenkins/pytorch/common_utils.sh
+++ b/.jenkins/pytorch/common_utils.sh
@@ -101,22 +101,18 @@
   cat .github/ci_commit_pins/"${1}".txt
 }
 
+function install_torchtext() {
+  local commit
+  commit=$(get_pinned_commit text)
+  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/text.git@${commit}"
+}
+
 function install_torchvision() {
   local commit
   commit=$(get_pinned_commit vision)
   pip_install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${commit}"
 }
 
-function checkout_install_torchvision() {
-  local commit
-  commit=$(get_pinned_commit vision)
-  git clone https://github.com/pytorch/vision
-  pushd vision
-  git checkout "${commit}"
-  time python setup.py install
-  popd
-}
-
 function clone_pytorch_xla() {
   if [[ ! -d ./xla ]]; then
     git clone --recursive --quiet https://github.com/pytorch/xla.git
@@ -194,13 +190,10 @@
 }
 
 function checkout_install_torchbench() {
-  local commit
-  commit=$(get_pinned_commit torchbench)
   git clone https://github.com/pytorch/benchmark torchbench
   pushd torchbench
-  git checkout "${commit}"
+  git checkout no_torchaudio
   python install.py
-  pip_install gym==0.25.2  # workaround issue in 0.26.0
   popd
 }
 
diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh
index 135fb50..17437a5 100755
--- a/.jenkins/pytorch/test.sh
+++ b/.jenkins/pytorch/test.sh
@@ -256,20 +256,15 @@
   # pytest test/test_ops_gradients.py --verbose -k "not _complex and not test_inplace_grad_acos_cuda_float64"
 }
 
-test_inductor_huggingface_shard() {
-  if [[ -z "$NUM_TEST_SHARDS" ]]; then
-    echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
-    exit 1
-  fi
+test_inductor_huggingface() {
   # Use test-reports directory under test folder will allow the CI to automatically pick up
   # the test reports and upload them to S3. Need to use full path here otherwise the script
   # will bark about file not found later on
   TEST_REPORTS_DIR=$(pwd)/test/test-reports
   mkdir -p "$TEST_REPORTS_DIR"
   python benchmarks/dynamo/huggingface.py --ci --training --accuracy \
-    --device cuda --inductor --float32 --total-partitions 1 --partition-id "$1" \
-    --output "$TEST_REPORTS_DIR"/inductor_huggingface_"$1".csv
-  python benchmarks/dynamo/check_csv.py -f "$TEST_REPORTS_DIR"/inductor_huggingface_"$1".csv
+    --device cuda --inductor --float32 --output "$TEST_REPORTS_DIR"/inductor_huggingface.csv
+  python benchmarks/dynamo/check_csv.py -f "$TEST_REPORTS_DIR"/inductor_huggingface.csv
 }
 
 test_inductor_timm_shard() {
@@ -288,6 +283,14 @@
   python benchmarks/dynamo/check_csv.py -f "$TEST_REPORTS_DIR"/inductor_timm_"$1".csv
 }
 
+test_inductor_torchbench() {
+  TEST_REPORTS_DIR=$(pwd)/test/test-reports
+  mkdir -p "$TEST_REPORTS_DIR"
+  PYTHONPATH=$(pwd)/torchbench python benchmarks/dynamo/torchbench.py --ci --training --accuracy \
+    --device cuda --inductor --float32 --output "$TEST_REPORTS_DIR"/inductor_torchbench.csv
+  python benchmarks/dynamo/check_csv.py -f "$TEST_REPORTS_DIR"/inductor_torchbench.csv
+}
+
 test_python_gloo_with_tls() {
   source "$(dirname "${BASH_SOURCE[0]}")/run_glootls_test.sh"
   assert_git_not_dirty
@@ -742,25 +745,32 @@
   install_filelock
   install_triton
   test_dynamo_shard 2
-elif [[ "${TEST_CONFIG}" == *inductor_timm* && $SHARD_NUMBER -lt 3 && $NUM_TEST_SHARDS -gt 1 ]]; then
+elif [[ "${TEST_CONFIG}" == *inductor_huggingface* ]]; then
+  install_torchvision
+  install_filelock
+  install_triton
+  install_huggingface
+  test_inductor_huggingface
+elif [[ "${TEST_CONFIG}" == *inductor_timm* && $NUM_TEST_SHARDS -gt 1 ]]; then
   install_torchvision
   install_filelock
   install_triton
   install_timm
   id=$((SHARD_NUMBER-1))
   test_inductor_timm_shard $id
-elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
+elif [[ "${TEST_CONFIG}" == *inductor_torchbench* ]]; then
+  install_torchtext
+  install_torchvision
+  install_filelock
+  install_triton
+  checkout_install_torchbench
+  test_inductor_torchbench
+elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 ]]; then
   install_torchvision
   install_filelock
   install_triton
   test_inductor
   test_inductor_distributed
-elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
-  install_torchvision
-  install_filelock
-  install_triton
-  install_huggingface
-  test_inductor_huggingface_shard 0
 elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
   test_without_numpy
   install_torchvision
diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py
index 789ebc3..cad954f 100644
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@@ -110,27 +110,16 @@
     # *CI_SKIP_AOT_EAGER_TRAINING,
     # *CI_SKIP_INDCUTOR_INFERENCE,
     # TorchBench
-    "attention_is_all_you_need_pytorch",
-    "drq",
-    "hf_Albert",
-    "hf_Bart",
-    "hf_GPT2",
-    "hf_Reformer",
+    "detectron2",
     "mobilenet_v3_large",
     "moco",
-    "pytorch_struct",
-    "vgg16",
-    "speech_transformer",  # from functionalization
-    "vision_maskrcnn",  # from functionalization
-    "timm_efficientnet",  # from functionalization (only fails for inductor)
-    "hf_Bert",
-    "soft_actor_critic",
     "tacotron2",
-    "yolov3",
+    "vision_maskrcnn",  # from functionalization
     # OOM
     "Background_Matting",
     "fastNLP_Bert",
     "hf_BigBird",
+    "hf_T5_base",  # fp64_OOM
     "mobilenet_v2",
     "mobilenet_v2_quantized_qat",
     "resnet50_quantized_qat",