[ROCm] Add debug flag (#36521)
Summary:
This kernel debug flag should help locate the issues we are observing on
some of the CI nodes.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/36521
Differential Revision: D21010612
Pulled By: ezyang
fbshipit-source-id: d746e4eb0af832e770d2231bfee4154b6e703c19
diff --git a/.jenkins/caffe2/test.sh b/.jenkins/caffe2/test.sh
index 0bc29a7..af7ba2e 100755
--- a/.jenkins/caffe2/test.sh
+++ b/.jenkins/caffe2/test.sh
@@ -7,6 +7,10 @@
echo 'Skipping tests'
exit 0
fi
+if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
+ # temporary to locate some kernel issues on the CI nodes
+ export HSAKMT_DEBUG_LEVEL=4
+fi
# Find where cpp tests and Caffe2 itself are installed
if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
diff --git a/.jenkins/pytorch/common.sh b/.jenkins/pytorch/common.sh
index 38519c1..d928c3e 100644
--- a/.jenkins/pytorch/common.sh
+++ b/.jenkins/pytorch/common.sh
@@ -30,6 +30,8 @@
shopt -s expand_aliases
export PYTORCH_TEST_WITH_ROCM=1
alias python="$PYTHON"
+ # temporary to locate some kernel issues on the CI nodes
+ export HSAKMT_DEBUG_LEVEL=4
fi
# This token is used by a parser on Jenkins logs for determining