| #include <iostream> |
| |
| #include <gtest/gtest.h> |
| #include "caffe2/core/context.h" |
| #include "caffe2/core/context_gpu.h" |
| #include "caffe2/core/flags.h" |
| #include "caffe2/operators/utility_ops.h" |
| #include "caffe2/utils/math.h" |
| |
| CAFFE2_DECLARE_string(caffe_test_root); |
| |
| namespace caffe2 { |
| |
| void executeGpuBinaryOpTest( |
| int shapex0, |
| int shapex1, |
| int shapey, |
| std::function<float(int)> input0, |
| std::function<float(int)> input1, |
| std::function<void( |
| int N0, |
| int N1, |
| const float* src0, |
| const float* src1, |
| float* dst, |
| CUDAContext* context)> operation, |
| std::function<float(int)> correct_output) { |
| if (!HasCudaGPU()) |
| return; |
| Workspace ws; |
| DeviceOption option; |
| option.set_device_type(CUDA); |
| CUDAContext context(option); |
| |
| Blob* blobx0 = ws.CreateBlob("X0"); |
| Blob* blobx1 = ws.CreateBlob("X1"); |
| Blob* bloby = ws.CreateBlob("Y"); |
| Blob* bloby_host = ws.CreateBlob("Y_host"); |
| |
| auto* tensorx0 = blobx0->GetMutable<Tensor<CUDAContext>>(); |
| auto* tensorx1 = blobx1->GetMutable<Tensor<CUDAContext>>(); |
| auto* tensory = bloby->GetMutable<Tensor<CUDAContext>>(); |
| |
| vector<int> shapex0_vector{shapex0}; |
| vector<int> shapex1_vector{shapex1}; |
| vector<int> shapey_vector{shapey}; |
| |
| tensorx0->Resize(shapex0_vector); |
| tensorx1->Resize(shapex1_vector); |
| tensory->Resize(shapey_vector); |
| |
| for (int i = 0; i < shapex0; i++) { |
| math::Set<float, CUDAContext>( |
| 1, input0(i), tensorx0->mutable_data<float>() + i, &context); |
| } |
| for (int i = 0; i < shapex1; i++) { |
| math::Set<float, CUDAContext>( |
| 1, input1(i), tensorx1->mutable_data<float>() + i, &context); |
| } |
| operation( |
| shapex0, |
| shapex1, |
| tensorx0->template data<float>(), |
| tensorx1->template data<float>(), |
| tensory->mutable_data<float>(), |
| &context); |
| context.FinishDeviceComputation(); |
| |
| // Copy result to CPU so we can inspect it |
| auto* tensory_host = bloby_host->GetMutable<Tensor<CPUContext>>(); |
| tensory_host->CopyFrom<CUDAContext, CUDAContext>(*tensory, &context); |
| context.FinishDeviceComputation(); |
| |
| for (int i = 0; i < shapey; ++i) { |
| EXPECT_EQ(tensory_host->data<float>()[i], correct_output(i)); |
| } |
| } |
| |
| TEST(MathUtilGPUTest, testAddStripedBatch) { |
| if (!HasCudaGPU()) |
| return; |
| Workspace ws; |
| DeviceOption option; |
| option.set_device_type(CUDA); |
| CUDAContext context(option); |
| Blob* blobx = ws.CreateBlob("X"); |
| Blob* bloby = ws.CreateBlob("Y"); |
| Blob* bloby_host = ws.CreateBlob("Y_host"); |
| |
| vector<int> shapex{33 * 9, 25}; |
| vector<int> shapey{33, 25}; |
| |
| auto* tensorx = blobx->GetMutable<Tensor<CUDAContext>>(); |
| tensorx->Resize(shapex); |
| int stripe = 33 * 25; |
| vector<float> tot(33, 0.0); |
| for (int j = 0; j < 9; j++) { |
| // Have different values for each line |
| for (int k = 0; k < 33; k++) { |
| math::Set<float, CUDAContext>( |
| 33, |
| 1.0 + j + k, |
| tensorx->mutable_data<float>() + j * stripe + k * 25, |
| &context); |
| tot[k] += 1.0 + j + k; |
| } |
| } |
| |
| auto* tensory = bloby->GetMutable<Tensor<CUDAContext>>(); |
| tensory->Resize(shapey); |
| math::Set<float, CUDAContext>( |
| stripe, 0.0, tensory->mutable_data<float>(), &context); |
| |
| math::AddStripedBatch<float, CUDAContext>( |
| stripe, |
| tensorx->template data<float>(), |
| tensory->mutable_data<float>(), |
| stripe, |
| 9, |
| &context); |
| context.FinishDeviceComputation(); |
| |
| // Copy result to CPU so we can inspect it |
| auto* tensory_host = bloby_host->GetMutable<Tensor<CPUContext>>(); |
| tensory_host->CopyFrom<CUDAContext, CUDAContext>(*tensory, &context); |
| context.FinishDeviceComputation(); |
| |
| for (int k = 0; k < 33; k++) { |
| for (int i = 0; i < 25; i++) { |
| EXPECT_EQ(tensory_host->data<float>()[k * 25 + i], tot[k]); |
| } |
| } |
| } |
| |
| TEST(MathUtilGPUTest, testReduceMin) { |
| executeGpuBinaryOpTest( |
| 6, |
| 1, |
| 1, |
| [](int /*i*/) { return 11.0f; }, |
| [](int /*i*/) { return 0.0f; }, |
| [](int N0, |
| int /*N1*/, |
| const float* src0, |
| const float* /*src1*/, |
| float* dst, |
| CUDAContext* context) { |
| Tensor<CUDAContext> aux; |
| math::ReduceMin<float, CUDAContext>(N0, src0, dst, &aux, context); |
| }, |
| [](int /*i*/) { return 11.0f; }); |
| executeGpuBinaryOpTest( |
| 6, |
| 1, |
| 1, |
| [](int i) { return i == 3 ? 11.0f : 17.0f; }, |
| [](int /*i*/) { return 0.0f; }, |
| [](int N0, |
| int /*N1*/, |
| const float* src0, |
| const float* /*src1*/, |
| float* dst, |
| CUDAContext* context) { |
| Tensor<CUDAContext> aux; |
| math::ReduceMin<float, CUDAContext>(N0, src0, dst, &aux, context); |
| }, |
| [](int /*i*/) { return 11.0f; }); |
| } |
| |
| TEST(MathUtilGPUTest, testReduceMax) { |
| executeGpuBinaryOpTest( |
| 6, |
| 1, |
| 1, |
| [](int /*i*/) { return 11.0f; }, |
| [](int /*i*/) { return 0.0f; }, |
| [](int N0, |
| int /*N1*/, |
| const float* src0, |
| const float* /*src1*/, |
| float* dst, |
| CUDAContext* context) { |
| Tensor<CUDAContext> aux; |
| math::ReduceMax<float, CUDAContext>(N0, src0, dst, &aux, context); |
| }, |
| [](int /*i*/) { return 11.0f; }); |
| executeGpuBinaryOpTest( |
| 6, |
| 1, |
| 1, |
| [](int i) { return i == 3 ? 17.0f : 11.0f; }, |
| [](int /*i*/) { return 0.0f; }, |
| [](int N0, |
| int /*N1*/, |
| const float* src0, |
| const float* /*src1*/, |
| float* dst, |
| CUDAContext* context) { |
| Tensor<CUDAContext> aux; |
| math::ReduceMax<float, CUDAContext>(N0, src0, dst, &aux, context); |
| }, |
| [](int /*i*/) { return 17.0f; }); |
| } |
| |
| TEST(MathUtilGPUTest, testElemwiseMax) { |
| executeGpuBinaryOpTest( |
| 13, |
| 13, |
| 13, |
| [](int i) { return 2.0f - i; }, |
| [](int i) { return i - 6.0f; }, |
| [](int N0, |
| int /*N1*/, |
| const float* src0, |
| const float* src1, |
| float* dst, |
| CUDAContext* context) { |
| math::ElemwiseMax<float, CUDAContext>(N0, src0, src1, dst, context); |
| }, |
| [](int i) { return std::max(2.0f - i, i - 6.0f); }); |
| } |
| |
| TEST(MathUtilGPUTest, testCopyVector) { |
| executeGpuBinaryOpTest( |
| 6, |
| 1, |
| 6, |
| [](int i) { return 5.0f - i; }, |
| [](int /*i*/) { return 0.0f; }, |
| [](int N0, |
| int /*N1*/, |
| const float* src0, |
| const float* /*src1*/, |
| float* dst, |
| CUDAContext* context) { |
| math::CopyVector<float, CUDAContext>(N0, src0, dst, context); |
| }, |
| [](int i) { return 5.0f - i; }); |
| } |
| |
| } // namespace caffe2 |