[ROCm] Add debug flag (#36521)

Summary:
This kernel debug flag should help locate the issues we are observing on
some of the CI nodes.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/36521

Differential Revision: D21010612

Pulled By: ezyang

fbshipit-source-id: d746e4eb0af832e770d2231bfee4154b6e703c19
diff --git a/.jenkins/caffe2/test.sh b/.jenkins/caffe2/test.sh
index 0bc29a7..af7ba2e 100755
--- a/.jenkins/caffe2/test.sh
+++ b/.jenkins/caffe2/test.sh
@@ -7,6 +7,10 @@
   echo 'Skipping tests'
   exit 0
 fi
+if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
+  # temporary to locate some kernel issues on the CI nodes
+  export HSAKMT_DEBUG_LEVEL=4
+fi
 
 # Find where cpp tests and Caffe2 itself are installed
 if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
diff --git a/.jenkins/pytorch/common.sh b/.jenkins/pytorch/common.sh
index 38519c1..d928c3e 100644
--- a/.jenkins/pytorch/common.sh
+++ b/.jenkins/pytorch/common.sh
@@ -30,6 +30,8 @@
   shopt -s expand_aliases
   export PYTORCH_TEST_WITH_ROCM=1
   alias python="$PYTHON"
+  # temporary to locate some kernel issues on the CI nodes
+  export HSAKMT_DEBUG_LEVEL=4
 fi
 
 # This token is used by a parser on Jenkins logs for determining