blob: 4c9efd4c6606f1b90018e9b2324e9e5a6b15e1c1 [file] [log] [blame]
#include <gtest/gtest.h>
#include "caffe2/core/init.h"
#include "caffe2/core/operator.h"
#include "caffe2/core/tensor.h"
#include "caffe2/utils/math.h"
#include "caffe2/utils/proto_utils.h"
#include "NeuralNetworks.h"
#include "nnapi.h"
namespace caffe2 {
namespace {
// CPU: t1, NN-API: t2
void checkError(const TensorCPU& t1, const TensorCPU& t2, float error) {
CAFFE_ENFORCE_EQ(
t1.sizes(),
t2.sizes(),
"t1.size() = ",
t1.size(),
", t2.size() = ",
t2.size());
float maxError = 0, minError = 0;
if (t1.template IsType<float>()) {
for (auto i = 0; i < t1.size(); ++i) {
const float t1_i = t1.template data<float>()[i];
const float t2_i = t2.template data<float>()[i];
EXPECT_NEAR(t1_i, t2_i, error);
float err = t1_i - t2_i;
if (err > maxError) {
maxError = err;
} else if (err < minError) {
minError = err;
}
}
} else if (t1.template IsType<uint8_t>()) {
for (auto i = 0; i < t1.size(); ++i) {
const uint8_t t1_i = t1.template data<uint8_t>()[i];
const uint8_t t2_i = t2.template data<uint8_t>()[i];
EXPECT_NEAR(t1_i, t2_i, error);
float err = t1_i - t2_i;
if (err > maxError) {
maxError = err;
} else if (err < minError) {
minError = err;
}
}
}
LOG(ERROR) << "maxError = " << maxError << ", minError = " << minError;
}
static void test_relu(int N, int C, int H, int W) {
// CPU reference
Workspace ws;
{
auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU);
t->Resize(N, H, W, C);
CPUContext ctx;
math::RandGaussian<float, CPUContext>(
t->size(), 0, 30, t->mutable_data<float>(), &ctx);
}
NetDef netdef;
{
{
auto& op = *(netdef.add_op());
op.set_type("Relu");
op.add_input("X_cpu");
op.add_output("Y_cpu");
}
netdef.add_external_input("X_cpu");
netdef.add_external_output("Y_cpu");
}
ws.RunNetOnce(netdef);
const auto& t_cpu = ws.GetBlob("Y_cpu")->Get<TensorCPU>(); // cpu
// NN API
netdef.mutable_op(0)->set_output(0, "Y_nn");
netdef.set_external_output(0, "Y_nn");
NetDef initNet;
NNApi model(initNet, netdef, &ws);
std::vector<TensorCPU*> inputs, outputs;
inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU));
EXPECT_TRUE(model.run(inputs, &outputs));
const auto& t_nn = *outputs[0];
checkError(t_cpu, t_nn, 0.01);
}
static void test_conv_NHWC(
int N,
int C,
int H,
int W,
int K,
int kernel,
int pad_t,
int pad_l,
int pad_b,
int pad_r,
int stride_h,
int stride_w) {
Workspace ws;
{
auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU);
t->Resize(N, H, W, C);
CPUContext ctx;
math::RandGaussian<float, CPUContext>(
t->size(), 0, 30, t->mutable_data<float>(), &ctx);
}
{
auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU);
t->Resize(K, kernel, kernel, C);
CPUContext ctx;
math::RandGaussian<float, CPUContext>(
t->size(), 0, 30, t->mutable_data<float>(), &ctx);
}
{
auto* t = BlobGetMutableTensor(ws.CreateBlob("B"), CPU);
t->Resize(K);
CPUContext ctx;
math::RandGaussian<float, CPUContext>(
t->size(), 0, 30, t->mutable_data<float>(), &ctx);
}
NetDef netdef;
{
{
auto& op = *(netdef.add_op());
op.set_type("Conv");
op.add_input("X_cpu");
op.add_input("W");
op.add_input("B");
op.add_output("Y_cpu");
{
auto& arg = *(op.add_arg());
arg.set_name("order");
arg.set_s("NHWC");
}
{
auto& arg = *(op.add_arg());
arg.set_name("kernel");
arg.set_i(kernel);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_t");
arg.set_i(pad_t);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_l");
arg.set_i(pad_l);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_b");
arg.set_i(pad_b);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_r");
arg.set_i(pad_r);
}
{
auto& arg = *(op.add_arg());
arg.set_name("stride_h");
arg.set_i(stride_h);
}
{
auto& arg = *(op.add_arg());
arg.set_name("stride_w");
arg.set_i(stride_w);
}
}
netdef.add_external_input("X_cpu");
netdef.add_external_input("W");
netdef.add_external_input("B");
netdef.add_external_output("Y_cpu");
}
ws.RunNetOnce(netdef);
const auto& t_cpu = ws.GetBlob("Y_cpu")->Get<TensorCPU>(); // cpu
// NN API
netdef.mutable_op(0)->set_output(0, "Y_nn");
netdef.set_external_output(0, "Y_nn");
NetDef initNet;
NNApi model(initNet, netdef, &ws);
std::vector<TensorCPU*> inputs, outputs;
inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU));
EXPECT_TRUE(model.run(inputs, &outputs));
const auto& t_nn = *outputs[0];
checkError(t_cpu, t_nn, 0.01);
}
static void test_depthwise_conv_NHWC(
int N,
int C,
int H,
int W,
int D,
int kernel,
int pad_t,
int pad_l,
int pad_b,
int pad_r,
int stride_h,
int stride_w) {
Workspace ws;
{
auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU);
t->Resize(N, H, W, C);
CPUContext ctx;
math::RandGaussian<float, CPUContext>(
t->size(), 0, 30, t->mutable_data<float>(), &ctx);
}
{
auto* t = BlobGetMutableTensor(ws.CreateBlob("W"), CPU);
t->Resize(1, kernel, kernel, D);
CPUContext ctx;
math::RandGaussian<float, CPUContext>(
t->size(), 0, 30, t->mutable_data<float>(), &ctx);
}
{
auto* t = BlobGetMutableTensor(ws.CreateBlob("B"), CPU);
t->Resize(D);
CPUContext ctx;
math::RandGaussian<float, CPUContext>(
t->size(), 0, 30, t->mutable_data<float>(), &ctx);
}
NetDef netdef_cpu;
{
// X: NHWC -> NCHW
{
auto& op = *(netdef_cpu.add_op());
op.set_type("Transpose");
op.add_input("X_cpu");
op.add_output("X_t");
{
auto& arg = *(op.add_arg());
arg.set_name("axes");
arg.add_ints(0);
arg.add_ints(3);
arg.add_ints(1);
arg.add_ints(2);
}
}
// X: MHWC -> CMHW
{
auto& op = *(netdef_cpu.add_op());
op.set_type("Transpose");
op.add_input("W");
op.add_output("W_t");
{
auto& arg = *(op.add_arg());
arg.set_name("axes");
arg.add_ints(3);
arg.add_ints(0);
arg.add_ints(1);
arg.add_ints(2);
}
}
{
auto& op = *(netdef_cpu.add_op());
op.set_type("Conv");
op.add_input("X_t");
op.add_input("W_t");
op.add_input("B");
op.add_output("Y_t");
{
auto& arg = *(op.add_arg());
arg.set_name("order");
arg.set_s("NCHW");
}
{
auto& arg = *(op.add_arg());
arg.set_name("kernel");
arg.set_i(kernel);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_t");
arg.set_i(pad_t);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_l");
arg.set_i(pad_l);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_b");
arg.set_i(pad_b);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_r");
arg.set_i(pad_r);
}
{
auto& arg = *(op.add_arg());
arg.set_name("stride_h");
arg.set_i(stride_h);
}
{
auto& arg = *(op.add_arg());
arg.set_name("stride_w");
arg.set_i(stride_w);
}
{
auto& arg = *(op.add_arg());
arg.set_name("group");
arg.set_i(C);
}
}
// Y: NCHW -> NHWC
{
auto& op = *(netdef_cpu.add_op());
op.set_type("Transpose");
op.add_input("Y_t");
op.add_output("Y_cpu");
{
auto& arg = *(op.add_arg());
arg.set_name("axes");
arg.add_ints(0);
arg.add_ints(2);
arg.add_ints(3);
arg.add_ints(1);
}
}
netdef_cpu.add_external_input("X_cpu");
netdef_cpu.add_external_input("W");
netdef_cpu.add_external_input("B");
netdef_cpu.add_external_output("Y_cpu");
}
NetDef netdef;
{
{
auto& op = *(netdef.add_op());
op.set_type("Conv");
op.add_input("X_cpu");
op.add_input("W");
op.add_input("B");
op.add_output("Y_nn");
{
auto& arg = *(op.add_arg());
arg.set_name("order");
arg.set_s("NHWC");
}
{
auto& arg = *(op.add_arg());
arg.set_name("kernel");
arg.set_i(kernel);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_t");
arg.set_i(pad_t);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_l");
arg.set_i(pad_l);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_b");
arg.set_i(pad_b);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_r");
arg.set_i(pad_r);
}
{
auto& arg = *(op.add_arg());
arg.set_name("stride_h");
arg.set_i(stride_h);
}
{
auto& arg = *(op.add_arg());
arg.set_name("stride_w");
arg.set_i(stride_w);
}
{
auto& arg = *(op.add_arg());
arg.set_name("group");
arg.set_i(C);
}
}
netdef.add_external_input("X_cpu");
netdef.add_external_input("W");
netdef.add_external_input("B");
netdef.add_external_output("Y_nn");
}
ws.RunNetOnce(netdef_cpu);
const auto& t_cpu = ws.GetBlob("Y_cpu")->Get<TensorCPU>(); // cpu
// NN API
NetDef initNet;
NNApi model(initNet, netdef, &ws);
std::vector<TensorCPU*> inputs, outputs;
inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU));
EXPECT_TRUE(model.run(inputs, &outputs));
const auto& t_nn = *outputs[0];
checkError(t_cpu, t_nn, 0.01);
}
static void test_pooling(
std::string type,
int N,
int C,
int H,
int W,
int kernel,
int pad_t,
int pad_l,
int pad_b,
int pad_r,
int stride_h,
int stride_w) {
Workspace ws;
{
auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU);
t->Resize(N, H, W, C);
CPUContext ctx;
math::RandGaussian<float, CPUContext>(
t->size(), 0, 30, t->mutable_data<float>(), &ctx);
}
NetDef netdef;
{
{
auto& op = *(netdef.add_op());
op.set_type(type);
op.add_input("X_cpu");
op.add_output("Y_cpu");
{
auto& arg = *(op.add_arg());
arg.set_name("order");
arg.set_s("NHWC");
}
{
auto& arg = *(op.add_arg());
arg.set_name("kernel");
arg.set_i(kernel);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_t");
arg.set_i(pad_t);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_l");
arg.set_i(pad_l);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_b");
arg.set_i(pad_b);
}
{
auto& arg = *(op.add_arg());
arg.set_name("pad_r");
arg.set_i(pad_r);
}
{
auto& arg = *(op.add_arg());
arg.set_name("stride_h");
arg.set_i(stride_h);
}
{
auto& arg = *(op.add_arg());
arg.set_name("stride_w");
arg.set_i(stride_w);
}
}
netdef.add_external_input("X_cpu");
netdef.add_external_output("Y_cpu");
}
ws.RunNetOnce(netdef);
const auto& t_cpu = ws.GetBlob("Y_cpu")->Get<TensorCPU>(); // cpu
// NN API
netdef.mutable_op(0)->set_output(0, "Y_nn");
netdef.set_external_output(0, "Y_nn");
NetDef initNet;
NNApi model(initNet, netdef, &ws);
std::vector<TensorCPU*> inputs, outputs;
inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU));
EXPECT_TRUE(model.run(inputs, &outputs));
const auto& t_nn = *outputs[0];
checkError(t_cpu, t_nn, 0.01);
}
static void test_softmax(int N, int C, int H = 1, int W = 1) {
Workspace ws;
{
auto* t = BlobGetMutableTensor(ws.CreateBlob("X_cpu"), CPU);
if (H == 1 && W == 1) {
t->Resize(N, C);
} else {
t->Resize(N, H, W, C);
}
CPUContext ctx;
math::RandGaussian<float, CPUContext>(
t->size(), 0, 30, t->mutable_data<float>(), &ctx);
}
NetDef netdef;
{
{
auto& op = *(netdef.add_op());
op.set_type("Softmax");
op.add_input("X_cpu");
op.add_output("Y_cpu");
}
netdef.add_external_input("X_cpu");
netdef.add_external_output("Y_cpu");
}
ws.RunNetOnce(netdef);
const auto& t_cpu = ws.GetBlob("Y_cpu")->Get<TensorCPU>(); // cpu
// NN API
netdef.mutable_op(0)->set_output(0, "Y_nn");
netdef.set_external_output(0, "Y_nn");
NetDef initNet;
NNApi model(initNet, netdef, &ws);
std::vector<TensorCPU*> inputs, outputs;
inputs.push_back(BlobGetMutableTensor(ws.GetBlob("X_cpu"), CPU));
EXPECT_TRUE(model.run(inputs, &outputs));
const auto& t_nn = *outputs[0];
checkError(t_cpu, t_nn, 0.01);
}
TEST(NNApi, TestConv) {
for (int C : {13, 32}) {
for (int M : {4, 7, 17}) {
for (int W : {13, 104}) {
for (int K : {1, 3, 5}) {
for (int P : {0, K - 1}) {
for (int S : {1, 2}) {
test_conv_NHWC(1, C, W, W, M, K, P, P, P, P, S, S);
}
}
}
}
}
}
// Test for asymmetric padding
// NN API only supports stride_x == stride_y
test_conv_NHWC(1, 3, 26, 26, 7, 3, 1, 2, 2, 0, 1, 1);
test_conv_NHWC(1, 3, 26, 26, 7, 3, 1, 2, 0, 2, 2, 2);
test_conv_NHWC(1, 3, 26, 26, 7, 3, 1, 1, 2, 1, 2, 2);
test_conv_NHWC(1, 3, 26, 26, 7, 3, 1, 1, 0, 1, 1, 1);
}
TEST(NNApi, Depthwise) {
for (int C : {13, 32}) {
for (int W : {13, 104}) {
for (int K : {1, 3}) {
for (int P : {0, K - 1}) {
for (int S : {1, 2}) {
test_depthwise_conv_NHWC(1, C, W, W, C, K, P, P, P, P, S, S);
}
}
}
}
}
// Test for asymmetric padding
// NN API only supports stride_x == stride_y
test_depthwise_conv_NHWC(1, 3, 26, 26, 3, 3, 1, 2, 2, 0, 1, 1);
test_depthwise_conv_NHWC(1, 3, 26, 26, 3, 3, 1, 2, 0, 2, 2, 2);
test_depthwise_conv_NHWC(1, 3, 26, 26, 3, 3, 1, 1, 2, 1, 2, 2);
test_depthwise_conv_NHWC(1, 3, 26, 26, 3, 3, 1, 1, 0, 1, 1, 1);
}
TEST(NNApi, TestRelu) {
test_relu(1, 4, 10, 10);
test_relu(1, 16, 128, 128);
}
TEST(NNApi, TestAveragePool) {
for (int C : {13, 32}) {
for (int W : {13, 104}) {
for (int K : {1, 3}) {
for (int P : {0, K - 1}) {
for (int S : {1, 2}) {
test_pooling("AveragePool", 1, C, W, W, K, P, P, P, P, S, S);
}
}
}
}
}
test_pooling("AveragePool", 1, 3, 26, 26, 3, 1, 2, 2, 0, 1, 1);
test_pooling("AveragePool", 1, 3, 26, 26, 3, 1, 2, 0, 2, 2, 2);
test_pooling("AveragePool", 1, 3, 26, 26, 3, 1, 1, 2, 1, 2, 2);
test_pooling("AveragePool", 1, 3, 26, 26, 3, 1, 1, 0, 1, 1, 1);
}
TEST(NNApi, TestMaxPool) {
for (int C : {13, 32}) {
for (int W : {13, 104}) {
for (int K : {1, 3}) {
for (int P : {0, K - 1}) {
for (int S : {1, 2}) {
test_pooling("MaxPool", 1, C, W, W, K, P, P, P, P, S, S);
}
}
}
}
}
test_pooling("MaxPool", 1, 3, 26, 26, 3, 1, 2, 2, 0, 1, 1);
test_pooling("MaxPool", 1, 3, 26, 26, 3, 1, 2, 0, 2, 2, 2);
test_pooling("MaxPool", 1, 3, 26, 26, 3, 1, 1, 2, 1, 2, 2);
test_pooling("MaxPool", 1, 3, 26, 26, 3, 1, 1, 0, 1, 1, 1);
}
TEST(NNApi, TestSoftmax) {
test_softmax(1, 100);
test_softmax(2, 17);
// NN API doesn't seem to work for 4D tensor
// test_softmax(1, 100, 13, 13);
// test_softmax(5, 17, 13, 13);
}
} // namespace
} // namespace caffe2