blob: ae54faa4e628ffedf6519575ec203fb38b11fe9b [file] [log] [blame]
#include <gtest/gtest.h>
#include "caffe2/core/blob.h"
#include "caffe2/core/context.h"
#include "caffe2/core/hip/context_hip.h"
#include "caffe2/core/tensor.h"
#include "caffe2/operators/utility_ops.h"
#include "caffe2/proto/caffe2.pb.h"
#include "caffe2/utils/conversions.h"
#include "caffe2/utils/math.h"
namespace caffe2 {
TEST(MathROCBLASTest, GemmNoTransNoTrans) {
if (!HasHipGPU())
return;
Workspace ws;
DeviceOption option;
option.set_device_type(HIP);
HIPContext context(option);
Blob* blobX = ws.CreateBlob("X");
Blob* blobW = ws.CreateBlob("W");
Blob* blobY = ws.CreateBlob("Y");
Blob* blobY_host = ws.CreateBlob("Y_host");
vector<int> shapeX{5, 10};
vector<int> shapeW{10, 6};
vector<int> shapeY{5, 6};
auto* tensorX = blobX->GetMutable<Tensor<HIPContext>>();
tensorX->Resize(shapeX);
auto* tensorW = blobW->GetMutable<Tensor<HIPContext>>();
tensorW->Resize(shapeW);
auto* tensorY = blobY->GetMutable<Tensor<HIPContext>>();
tensorY->Resize(shapeY);
auto* tensorY_host = blobY_host->GetMutable<Tensor<CPUContext>>();
tensorY_host->Resize(shapeY);
EXPECT_EQ(tensorX->size(), 50);
EXPECT_EQ(tensorW->size(), 60);
EXPECT_EQ(tensorY->size(), 30);
math::Set<float, HIPContext>(
tensorX->size(), 1, tensorX->mutable_data<float>(), &context);
math::Set<float, HIPContext>(
tensorW->size(), 1, tensorW->mutable_data<float>(), &context);
const float kOne = 1.0;
const float kPointFive = 0.5;
const float kZero = 0.0;
math::Gemm<float, HIPContext>(
CblasNoTrans,
CblasNoTrans,
5,
6,
10,
kOne,
tensorX->template data<float>(),
tensorW->template data<float>(),
kZero,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
EXPECT_EQ(tensorY_host->size(), 30);
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 10) << i;
}
// Test Accumulate
math::Gemm<float, HIPContext>(
CblasNoTrans,
CblasNoTrans,
5,
6,
10,
kOne,
tensorX->template data<float>(),
tensorW->template data<float>(),
kPointFive,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
EXPECT_EQ(tensorY_host->size(), 30);
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 15) << i;
}
// Test Accumulate
math::Gemm<float, HIPContext>(
CblasNoTrans,
CblasNoTrans,
5,
6,
10,
kPointFive,
tensorX->template data<float>(),
tensorW->template data<float>(),
kOne,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
EXPECT_EQ(tensorY_host->size(), 30);
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 20) << i;
}
}
TEST(MathROCBLASTest, GemmNoTransTrans) {
if (!HasHipGPU())
return;
Workspace ws;
DeviceOption option;
option.set_device_type(HIP);
HIPContext context(option);
Blob* blobX = ws.CreateBlob("X");
Blob* blobW = ws.CreateBlob("W");
Blob* blobY = ws.CreateBlob("Y");
Blob* blobY_host = ws.CreateBlob("Y_host");
vector<int> shapeX{5, 10};
vector<int> shapeW{6, 10};
vector<int> shapeY{5, 6};
auto* tensorX = blobX->GetMutable<Tensor<HIPContext>>();
tensorX->Resize(shapeX);
auto* tensorW = blobW->GetMutable<Tensor<HIPContext>>();
tensorW->Resize(shapeW);
auto* tensorY = blobY->GetMutable<Tensor<HIPContext>>();
tensorY->Resize(shapeY);
auto* tensorY_host = blobY_host->GetMutable<Tensor<CPUContext>>();
tensorY_host->Resize(shapeY);
EXPECT_EQ(tensorX->size(), 50);
EXPECT_EQ(tensorW->size(), 60);
EXPECT_EQ(tensorY->size(), 30);
math::Set<float, HIPContext>(
tensorX->size(), 1, tensorX->mutable_data<float>(), &context);
math::Set<float, HIPContext>(
tensorW->size(), 1, tensorW->mutable_data<float>(), &context);
const float kOne = 1.0;
const float kPointFive = 0.5;
const float kZero = 0.0;
math::Gemm<float, HIPContext>(
CblasNoTrans,
CblasTrans,
5,
6,
10,
kOne,
tensorX->template data<float>(),
tensorW->template data<float>(),
kZero,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
EXPECT_EQ(tensorY_host->size(), 30);
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 10) << i;
}
// Test Accumulate
math::Gemm<float, HIPContext>(
CblasNoTrans,
CblasTrans,
5,
6,
10,
kOne,
tensorX->template data<float>(),
tensorW->template data<float>(),
kPointFive,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
EXPECT_EQ(tensorY_host->size(), 30);
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 15) << i;
}
math::Gemm<float, HIPContext>(
CblasNoTrans,
CblasTrans,
5,
6,
10,
kPointFive,
tensorX->template data<float>(),
tensorW->template data<float>(),
kOne,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
EXPECT_EQ(tensorY_host->size(), 30);
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 20) << i;
}
}
TEST(MathROCBLASTest, GemvNoTrans) {
if (!HasHipGPU())
return;
Workspace ws;
DeviceOption option;
option.set_device_type(HIP);
HIPContext context(option);
Blob* blobA = ws.CreateBlob("A");
Blob* blobX = ws.CreateBlob("X");
Blob* blobY = ws.CreateBlob("Y");
Blob* blobY_host = ws.CreateBlob("Y_host");
vector<int> shapeA{5, 10};
vector<int> shapeX{10};
vector<int> shapeY{5};
auto* tensorA = blobA->GetMutable<Tensor<HIPContext>>();
tensorA->Resize(shapeA);
auto* tensorX = blobX->GetMutable<Tensor<HIPContext>>();
tensorX->Resize(shapeX);
auto* tensorY = blobY->GetMutable<Tensor<HIPContext>>();
tensorY->Resize(shapeY);
auto* tensorY_host = blobY_host->GetMutable<Tensor<CPUContext>>();
tensorY_host->Resize(shapeY);
EXPECT_EQ(tensorA->size(), 50);
EXPECT_EQ(tensorX->size(), 10);
EXPECT_EQ(tensorY->size(), 5);
math::Set<float, HIPContext>(
tensorA->size(), 1, tensorA->mutable_data<float>(), &context);
math::Set<float, HIPContext>(
tensorX->size(), 1, tensorX->mutable_data<float>(), &context);
const float kOne = 1.0;
const float kPointFive = 0.5;
const float kZero = 0.0;
math::Gemv<float, HIPContext>(
CblasNoTrans,
5,
10,
kOne,
tensorA->data<float>(),
tensorX->data<float>(),
kZero,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 10) << i;
}
// Test Accumulate
math::Gemv<float, HIPContext>(
CblasNoTrans,
5,
10,
kOne,
tensorA->data<float>(),
tensorX->data<float>(),
kPointFive,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 15) << i;
}
// Test Accumulate
math::Gemv<float, HIPContext>(
CblasNoTrans,
5,
10,
kPointFive,
tensorA->data<float>(),
tensorX->data<float>(),
kOne,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 20) << i;
}
}
TEST(MathROCBLASTest, GemvTrans) {
if (!HasHipGPU())
return;
Workspace ws;
DeviceOption option;
option.set_device_type(HIP);
HIPContext context(option);
Blob* blobA = ws.CreateBlob("A");
Blob* blobX = ws.CreateBlob("X");
Blob* blobY = ws.CreateBlob("Y");
Blob* blobY_host = ws.CreateBlob("Y_host");
vector<int> shapeA{6, 10};
vector<int> shapeX{6};
vector<int> shapeY{10};
auto* tensorA = blobA->GetMutable<Tensor<HIPContext>>();
tensorA->Resize(shapeA);
auto* tensorX = blobX->GetMutable<Tensor<HIPContext>>();
tensorX->Resize(shapeX);
auto* tensorY = blobY->GetMutable<Tensor<HIPContext>>();
tensorY->Resize(shapeY);
auto* tensorY_host = blobY_host->GetMutable<Tensor<CPUContext>>();
tensorY_host->Resize(shapeY);
EXPECT_EQ(tensorA->size(), 60);
EXPECT_EQ(tensorX->size(), 6);
EXPECT_EQ(tensorY->size(), 10);
math::Set<float, HIPContext>(
tensorA->size(), 1, tensorA->mutable_data<float>(), &context);
math::Set<float, HIPContext>(
tensorX->size(), 1, tensorX->mutable_data<float>(), &context);
const float kOne = 1.0;
const float kPointFive = 0.5;
const float kZero = 0.0;
math::Gemv<float, HIPContext>(
CblasTrans,
6,
10,
kOne,
tensorA->data<float>(),
tensorX->data<float>(),
kZero,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 6) << i;
}
// Test Accumulate
math::Gemv<float, HIPContext>(
CblasTrans,
6,
10,
kOne,
tensorA->data<float>(),
tensorX->data<float>(),
kPointFive,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 9) << i;
}
// Test Accumulate
math::Gemv<float, HIPContext>(
CblasTrans,
6,
10,
kPointFive,
tensorA->data<float>(),
tensorX->data<float>(),
kOne,
tensorY->mutable_data<float>(),
&context);
context.FinishDeviceComputation();
tensorY_host->CopyFrom<HIPContext, HIPContext>(*tensorY, &context);
context.FinishDeviceComputation();
for (int i = 0; i < tensorY_host->size(); ++i) {
CHECK_EQ(tensorY_host->data<float>()[i], 12) << i;
}
}
} // namespace caffe2