Add Freezing Option to Benchmarking (#105616)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/105616
Approved by: https://github.com/desertfire
diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index 9c4f84b..d17598a 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -378,6 +378,11 @@
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs --cpp-wrapper "$@" \
--output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
+ if [[ "$DASHBOARD_TAG" == *freezing_cudagraphs-true* ]] && [[ "$mode" == "inference" ]]; then
+ python "benchmarks/dynamo/$suite.py" \
+ "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" --freezing \
+ --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_${suite}_${dtype}_${mode}_cuda_${target}.csv"
+ fi
if [[ "$DASHBOARD_TAG" == *aotinductor-true* ]] && [[ "$mode" == "inference" ]]; then
python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --export-aot-inductor --disable-cudagraphs "$@" \
diff --git a/.github/workflows/inductor-perf-test-nightly.yml b/.github/workflows/inductor-perf-test-nightly.yml
index 5f36de6..e57429a 100644
--- a/.github/workflows/inductor-perf-test-nightly.yml
+++ b/.github/workflows/inductor-perf-test-nightly.yml
@@ -35,6 +35,11 @@
required: false
type: boolean
default: false
+ freezing_cudagraphs:
+ description: Run inductor_cudagraphs with freezing for inference?
+ required: false
+ type: boolean
+ default: false
aotinductor:
description: Run aot_inductor for inference?
required: false
@@ -94,7 +99,7 @@
if: github.event_name == 'workflow_dispatch'
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7-sm80
- dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}
+ dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}
docker-image: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha
diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py
index 66b68e8..b72bb16 100644
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@@ -2471,6 +2471,9 @@
"--cpp-wrapper", action="store_true", help="turn on cpp/cuda wrapper codegen"
)
parser.add_argument(
+ "--freezing", action="store_true", help="turn on freezing", default=False
+ )
+ parser.add_argument(
"--ci", action="store_true", help="Flag to tell that its a CI run"
)
parser.add_argument(
@@ -3179,6 +3182,8 @@
inductor_config.split_reductions = not args.disable_split_reductions
inductor_config.triton.divisible_by_16 = not args.disable_divisible_by_16
inductor_config.cpp_wrapper = args.cpp_wrapper
+ if args.inference:
+ inductor_config.freezing = args.freezing
runner.setup_amp()