[pytorch-vulkan] floor-divide for tensor, tensor (#112190)
Summary: tsia
Test Plan:
## Compile on Mac and run on Android
```
buck2 build -c ndk.static_linking=true -c pt.enable_qpl=0 --target-platforms=ovr_config//platform/android:arm32-fbsource //xplat/caffe2:pt_vulkan_api_test_binAndroid --show-output && adb push buck-out/v2/gen/fbsource/f1f3f9bed27e143c/xplat/caffe2/__pt_vulkan_api_test_binAndroid__/pt_vulkan_api_test_binAndroid /data/local/tmp
```
Run on android
```
$ adb shell /data/local/tmp/pt_vulkan_api_test_binAndroid
...
[ RUN ] VulkanAPITest.lstm_prepack_success
[ OK ] VulkanAPITest.lstm_prepack_success (11 ms)
[ RUN ] VulkanAPITest.querypool_flushed_shader_log
xplat/caffe2/aten/src/ATen/test/vulkan_api_test.cpp:7667: Skipped
QueryPool is not available
[ SKIPPED ] VulkanAPITest.querypool_flushed_shader_log (0 ms)
[----------] 396 tests from VulkanAPITest (29980 ms total)
[----------] Global test environment tear-down
[==========] 396 tests from 1 test suite ran. (29980 ms total)
[ PASSED ] 395 tests.
[ SKIPPED ] 1 test, listed below:
[ SKIPPED ] VulkanAPITest.querypool_flushed_shader_log
YOU HAVE 7 DISABLED TESTS
```
All Passed.
Full Output: P865232089
Reviewed By: copyrightly
Differential Revision: D50677361
Pull Request resolved: https://github.com/pytorch/pytorch/pull/112190
Approved by: https://github.com/manuelcandales
diff --git a/aten/src/ATen/native/vulkan/glsl/templates/binary_op_params.yaml b/aten/src/ATen/native/vulkan/glsl/templates/binary_op_params.yaml
index c41a760..87bb76d 100644
--- a/aten/src/ATen/native/vulkan/glsl/templates/binary_op_params.yaml
+++ b/aten/src/ATen/native/vulkan/glsl/templates/binary_op_params.yaml
@@ -40,6 +40,9 @@
- NAME: pow
IS_DIV: 0
OPERATOR: pow(X, Y)
+ - NAME: floor_divide
+ IS_DIV: 1
+ OPERATOR: floor(X / Y)
binary_op_tensor_inplace:
parameter_names_with_default_values:
@@ -59,3 +62,6 @@
- NAME: pow_
IS_DIV: 0
OPERATOR: pow(X, Y)
+ - NAME: floor_divide_
+ IS_DIV: 1
+ OPERATOR: floor(X / Y)
diff --git a/aten/src/ATen/native/vulkan/ops/BinaryOp.cpp b/aten/src/ATen/native/vulkan/ops/BinaryOp.cpp
index 754fa49..4bd6611 100644
--- a/aten/src/ATen/native/vulkan/ops/BinaryOp.cpp
+++ b/aten/src/ATen/native/vulkan/ops/BinaryOp.cpp
@@ -539,6 +539,16 @@
VK_KERNEL(floor_mul_scalar_));
}
+Tensor floor_divide_tensor(const Tensor& self, const Tensor& other) {
+ return binary_op_tensor(
+ self, other, c10::optional<Scalar>(), VK_KERNEL(floor_divide));
+}
+
+Tensor& floor_divide_tensor_(Tensor& self, const Tensor& other_arg) {
+ return binary_op_tensor_(
+ self, other_arg, c10::optional<Scalar>(), VK_KERNEL(floor_divide_));
+}
+
#ifdef USE_VULKAN_API
TORCH_LIBRARY_IMPL(aten, Vulkan, m) {
@@ -572,6 +582,12 @@
m.impl(
TORCH_SELECTIVE_NAME("aten::floor_divide_.Scalar"),
TORCH_FN(floor_divide_scalar_));
+ m.impl(
+ TORCH_SELECTIVE_NAME("aten::floor_divide"),
+ TORCH_FN(floor_divide_tensor));
+ m.impl(
+ TORCH_SELECTIVE_NAME("aten::floor_divide_.Tensor"),
+ TORCH_FN(floor_divide_tensor_));
}
#endif /* USE_VULKAN_API */
diff --git a/aten/src/ATen/test/vulkan_api_test.cpp b/aten/src/ATen/test/vulkan_api_test.cpp
index f61aa06..02a6a60 100644
--- a/aten/src/ATen/test/vulkan_api_test.cpp
+++ b/aten/src/ATen/test/vulkan_api_test.cpp
@@ -3720,6 +3720,94 @@
test_floor_divide_scalar_inplace({3, 3, 12, 12}, 0.3, 0.08);
}
+TEST_F(VulkanAPITest, floor_divide_zero_dim_tensor) {
+ c10::InferenceMode mode;
+
+ std::vector<int64_t> input_shape{5, 3, 4, 5};
+ float input_scale = 100.0;
+
+ auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
+ in_cpu = at::mul(in_cpu, input_scale);
+ auto in_vk = in_cpu.vulkan();
+
+ auto other_cpu = at::zeros({}, at::device(at::kCPU).dtype(at::kFloat)) + 10.0f;
+ auto other_vk = other_cpu.vulkan();
+
+ auto out_cpu = at::floor_divide(in_cpu, other_cpu);
+ auto out_vk = at::floor_divide(in_vk, other_vk);
+
+ // max tolerance is 1.0 due to floor.
+ // may consider adding extra check on number of violation. it should be rare.
+ const auto check = checkRtol(out_cpu - out_vk.cpu(), 1.0f);
+ if (!check) {
+ std::cout << "floor_divide test failed with "
+ << "scale: " << input_scale
+ << std::endl;
+ }
+
+ ASSERT_TRUE(check);
+}
+
+TEST_F(VulkanAPITest, floor_divide_tensor) {
+ c10::InferenceMode mode;
+
+ std::vector<int64_t> input_shape{6, 3, 5, 5};
+ float input_scale = 10.0;
+
+ auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
+ in_cpu = at::mul(in_cpu, input_scale);
+ // "other" is at least 0.5 to avoid rounding error causes by very small
+ // values.
+ auto other_cpu =
+ at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat)) + 0.5;
+
+ auto in_vk = in_cpu.vulkan();
+ auto other_vk = other_cpu.vulkan();
+
+ auto out_cpu = at::floor_divide(in_cpu, other_cpu);
+ auto out_vk = at::floor_divide(in_vk, other_vk);
+
+ // max tolerance is 1.0 due to floor.
+ // may consider adding extra check on number of violation. it should be rare.
+ const auto check = checkRtol(out_cpu - out_vk.cpu(), 1.0f);
+ if (!check) {
+ std::cout << "floor_divide test failed with "
+ << "scale: " << input_scale << std::endl;
+ }
+
+ ASSERT_TRUE(check);
+}
+
+TEST_F(VulkanAPITest, floor_divide_tensor_inplace) {
+ c10::InferenceMode mode;
+
+ std::vector<int64_t> input_shape{5, 3, 5, 5};
+ float input_scale = 10.0;
+
+ auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
+ in_cpu = at::mul(in_cpu, input_scale);
+ // "other" is at least 0.5 to avoid rounding error causes by very small
+ // values.
+ auto other_cpu =
+ at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat)) + 0.5;
+
+ auto in_vk = in_cpu.vulkan();
+ auto other_vk = other_cpu.vulkan();
+
+ in_cpu.floor_divide_(other_cpu);
+ in_vk.floor_divide_(other_vk);
+
+ // max tolerance is 1.0 due to floor.
+ // may consider adding extra check on number of violation. it should be rare.
+ const auto check = checkRtol(in_cpu - in_vk.cpu(), 1.0f);
+ if (!check) {
+ std::cout << "floor_divide test failed with "
+ << "scale: " << input_scale << std::endl;
+ }
+
+ ASSERT_TRUE(check);
+}
+
TEST_F(VulkanAPITest, relu) {
const auto in_cpu = at::rand({17, 197, 302, 5}, at::device(at::kCPU).dtype(at::kFloat));
const auto in_vulkan = in_cpu.vulkan();