Enable caffe2 tests for RocM jobs (#41604)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/41604

Reviewed By: ezyang

Differential Revision: D22603703

Pulled By: malfet

fbshipit-source-id: 789ccf2bb79668a5a68006bb877b2d88fb569809
diff --git a/.circleci/cimodel/data/pytorch_build_definitions.py b/.circleci/cimodel/data/pytorch_build_definitions.py
index 22aa53a..70fa343 100644
--- a/.circleci/cimodel/data/pytorch_build_definitions.py
+++ b/.circleci/cimodel/data/pytorch_build_definitions.py
@@ -34,7 +34,7 @@
 
     @staticmethod
     def is_test_phase(phase):
-        return phase in ["test", "test1", "test2"]
+        return "test" in phase
 
     # TODO: Eliminate the special casing for docker paths
     # In the short term, we *will* need to support special casing as docker images are merged for caffe2 and pytorch
@@ -235,7 +235,7 @@
 
         elif compiler_name == "rocm":
             rocm_version = fc.find_prop("compiler_version")
-            restrict_phases = ["build", "test1", "test2"]
+            restrict_phases = ["build", "test1", "test2", "caffe2_test"]
 
         elif compiler_name == "android":
             android_ndk_version = fc.find_prop("compiler_version")
diff --git a/.circleci/config.yml b/.circleci/config.yml
index 19bf4ca..47b3472 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -608,12 +608,24 @@
         command: |
           set -e
 
+          cat >docker_commands.sh \<<EOL
+          # =================== The following code will be executed inside Docker container ===================
+          set -ex
+          export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}
+          ${PARALLEL_FLAGS}
+          source ./workspace/env
+          cd workspace
+          EOL
           if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
-            export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+            echo ".jenkins/pytorch/multigpu-test.sh" >> docker_commands.sh
+          elif [[ ${BUILD_ENVIRONMENT} == *caffe2* ]]; then
+            echo "pip -q install --user -b /tmp/pip_install_onnx \"file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx\"" >> docker_commands.sh
+            echo ".jenkins/caffe2/test.sh" >> docker_commands.sh
           else
-            export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+            echo ".jenkins/pytorch/test.sh" >> docker_commands.sh
           fi
-          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+          echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
+          unbuffer bash command.sh | ts
     - run:
         name: Report results
         no_output_timeout: "5m"
@@ -5733,6 +5745,19 @@
           build_environment: "pytorch-linux-xenial-rocm3.5.1-py3.6-test2"
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-rocm3.5.1-py3.6:8bdba785b1eac4d297d5f5930f979518012a56e0"
           resource_class: pytorch/amd-gpu
+      - pytorch_linux_test:
+          name: pytorch_linux_xenial_rocm3_5_1_py3_6_caffe2_test
+          requires:
+            - pytorch_linux_xenial_rocm3_5_1_py3_6_build
+          filters:
+            branches:
+              only:
+                - master
+                - /ci-all\/.*/
+                - /release\/.*/
+          build_environment: "pytorch-linux-xenial-rocm3.5.1-py3.6-caffe2_test"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-rocm3.5.1-py3.6:8bdba785b1eac4d297d5f5930f979518012a56e0"
+          resource_class: pytorch/amd-gpu
       - pytorch_linux_build:
           name: pytorch_linux_xenial_py3_6_gcc5_4_build
           build_environment: "pytorch-linux-xenial-py3.6-gcc5.4-build"
diff --git a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
index 745c9f8..091c98a 100644
--- a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
+++ b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@@ -146,12 +146,24 @@
         command: |
           set -e
 
+          cat >docker_commands.sh \<<EOL
+          # =================== The following code will be executed inside Docker container ===================
+          set -ex
+          export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}
+          ${PARALLEL_FLAGS}
+          source ./workspace/env
+          cd workspace
+          EOL
           if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
-            export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+            echo ".jenkins/pytorch/multigpu-test.sh" >> docker_commands.sh
+          elif [[ ${BUILD_ENVIRONMENT} == *caffe2* ]]; then
+            echo "pip -q install --user -b /tmp/pip_install_onnx \"file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx\"" >> docker_commands.sh
+            echo ".jenkins/caffe2/test.sh" >> docker_commands.sh
           else
-            export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+            echo ".jenkins/pytorch/test.sh" >> docker_commands.sh
           fi
-          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+          echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
+          unbuffer bash command.sh | ts
     - run:
         name: Report results
         no_output_timeout: "5m"
diff --git a/.jenkins/caffe2/test.sh b/.jenkins/caffe2/test.sh
index 21c7875..0d15bf6 100755
--- a/.jenkins/caffe2/test.sh
+++ b/.jenkins/caffe2/test.sh
@@ -138,6 +138,10 @@
   # This test has been flaky in ROCm CI (but note the tests are
   # cpu-only so should be unrelated to ROCm)
   rocm_ignore_test+=("--ignore $caffe2_pypath/python/operator_test/blobs_queue_db_test.py")
+  # This test is skipped on Jenkins(compiled without MKL) and otherwise known flaky
+  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/convfusion_op_test.py")
+  # This test is skipped on Jenkins(compiled without MKL) and causing segfault on Circle
+  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/pool_op_test.py")
 fi
 
 # NB: Warnings are disabled because they make it harder to see what
diff --git a/caffe2/python/ideep/elementwise_sum_op_test.py b/caffe2/python/ideep/elementwise_sum_op_test.py
index fb92834..9daf340 100644
--- a/caffe2/python/ideep/elementwise_sum_op_test.py
+++ b/caffe2/python/ideep/elementwise_sum_op_test.py
@@ -44,7 +44,7 @@
            batch_size=st.integers(1, 3),
            inputs=st.integers(2, 7),
            inplace=st.booleans(),
-           **mu.gcs)
+           **mu.gcs_cpu_ideep)
     def test_elementwise_sum_fallback(self,
                                       size,
                                       input_channels,
@@ -84,7 +84,7 @@
            batch_size=st.integers(1, 3),
            inputs=st.integers(2, 7),
            inplace=st.booleans(),
-           **mu.gcs)
+           **mu.gcs_cpu_ideep)
     def test_int8_elementwise_sum(self,
                                  size,
                                  input_channels,
diff --git a/caffe2/python/ideep/expanddims_squeeze_op_test.py b/caffe2/python/ideep/expanddims_squeeze_op_test.py
index cdd7cd9..4a4fb73 100644
--- a/caffe2/python/ideep/expanddims_squeeze_op_test.py
+++ b/caffe2/python/ideep/expanddims_squeeze_op_test.py
@@ -33,7 +33,7 @@
     @given(
         squeeze_dims=st.lists(st.integers(0, 3), min_size=1, max_size=3),
         inplace=st.booleans(),
-        **mu.gcs
+        **mu.gcs_cpu_ideep
         )
     def test_squeeze_fallback(self, squeeze_dims, inplace, gc, dc):
         shape = [
@@ -92,7 +92,7 @@
     @given(
         squeeze_dims=st.lists(st.integers(0, 3), min_size=1, max_size=3),
         inplace=st.booleans(),
-        **mu.gcs
+        **mu.gcs_cpu_ideep
         )
     def test_expand_dims_fallback(self, squeeze_dims, inplace, gc, dc):
         oshape = [
diff --git a/caffe2/python/ideep/fc_op_test.py b/caffe2/python/ideep/fc_op_test.py
index e4a9ee0..389a466 100644
--- a/caffe2/python/ideep/fc_op_test.py
+++ b/caffe2/python/ideep/fc_op_test.py
@@ -261,7 +261,7 @@
             self.assertGradientChecks(gc, op, [X, W, b], i, [0])
 
     @given(n=st.integers(2, 5), m=st.integers(2, 5),
-           k=st.integers(2, 5), **mu.gcs)
+           k=st.integers(2, 5), **mu.gcs_cpu_ideep)
     def test_int8_fc_4_dims(self, n, m, k, gc, dc):
         X = np.random.rand(m, k, m, m).astype(np.float32) - 0.5
         w = np.random.rand(n, k, m, m).astype(np.float32) - 0.5
diff --git a/caffe2/python/ideep/pool_op_test.py b/caffe2/python/ideep/pool_op_test.py
index 4fa6398..bd7b283 100644
--- a/caffe2/python/ideep/pool_op_test.py
+++ b/caffe2/python/ideep/pool_op_test.py
@@ -49,7 +49,7 @@
            input_channels=st.integers(1, 3),
            batch_size=st.integers(1, 3),
            method=st.sampled_from(["MaxPool", "AveragePool"]),
-           **mu.gcs)
+           **mu.gcs_cpu_ideep)
     def test_int8_pooling(self, stride, pad, kernel, size,
                          input_channels, batch_size,
                          method, gc, dc):
diff --git a/caffe2/python/ideep/relu_op_test.py b/caffe2/python/ideep/relu_op_test.py
index 4e4451d..79ad242 100644
--- a/caffe2/python/ideep/relu_op_test.py
+++ b/caffe2/python/ideep/relu_op_test.py
@@ -35,7 +35,7 @@
            input_channels=st.integers(1, 3),
            batch_size=st.integers(1, 3),
            inplace=st.booleans(),
-           **mu.gcs)
+           **mu.gcs_cpu_ideep)
     def test_int8_relu(self, size, input_channels, batch_size, inplace, gc, dc):
         relu_fp32 = core.CreateOperator(
             "Relu",