caffe2/share/contrib/nnpack/nnpack_test.cc - platform/external/pytorch - Git at Google

 #include "caffe2/core/init.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/core/tensor.h"
 #include "caffe2/utils/math.h"
 #include "caffe2/utils/proto_utils.h"
 #include "gtest/gtest.h"

 #include <cmath>
 #include <random>

 namespace caffe2 {

 namespace {

 void AddNoiseInput(
     const vector<int64_t>& shape,
     const string& name,
     Workspace* ws) {
   DeviceOption option;
   CPUContext context(option);
   Blob* blob = ws->CreateBlob(name);
   auto* tensor = BlobGetMutableTensor(blob, CPU);
   tensor->Resize(shape);

   math::RandGaussian<float, CPUContext>(
       tensor->size(), 0.0f, 3.0f, tensor->mutable_data<float>(), &context);
   for (auto i = 0; i < tensor->size(); ++i) {
     tensor->mutable_data<float>()[i] =
         std::min(-5.0f, std::max(5.0f, tensor->mutable_data<float>()[i]));
   }
 }

 inline float relativeError(float a, float b) {
   return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
 }

 void compare(
     int N,
     int inputC,
     int H,
     int W,
     int outputC,
     int kernelH,
     int kernelW,
     int strideH,
     int strideW,
     int padT,
     int padL,
     int padB,
     int padR,
     int group,
     const std::string& algorithm,
     const std::string& convolutionTransformStrategy,
     const std::string& activation,
     float maxRelErr,
     float absErrForRelErrFailure) {
   LOG(INFO) << "running N " << N << " inputC " << inputC << " H " << H << " W "
             << W << " outputC " << outputC << " kernelH " << kernelH
             << " kernelW " << kernelW << " strideH " << strideH << " strideW "
             << strideW << " padT " << padT << " padL " << padL << " padB "
             << padB << " padR " << padR << " group " << group;

   Workspace ws;

   OperatorDef nnpackOpDef;
   nnpackOpDef.set_name("test");
   nnpackOpDef.set_type("Conv");
   nnpackOpDef.set_engine("NNPACK");
   nnpackOpDef.add_input("X");
   nnpackOpDef.add_input("W");
   nnpackOpDef.add_input("B");
   nnpackOpDef.add_output("Y_nnpack");

   nnpackOpDef.add_arg()->CopyFrom(MakeArgument("kernel_h", kernelH));
   nnpackOpDef.add_arg()->CopyFrom(MakeArgument("kernel_w", kernelW));
   if (!algorithm.empty()) {
     nnpackOpDef.add_arg()->CopyFrom(MakeArgument("algo", algorithm));
   }
   if (!convolutionTransformStrategy.empty()) {
     nnpackOpDef.add_arg()->CopyFrom(MakeArgument(
         "convolution_transform_strategy", convolutionTransformStrategy));
   }
   if (!activation.empty()) {
     nnpackOpDef.add_arg()->CopyFrom(MakeArgument("activation", activation));
   }
   nnpackOpDef.add_arg()->CopyFrom(MakeArgument("stride_h", strideH));
   nnpackOpDef.add_arg()->CopyFrom(MakeArgument("stride_w", strideW));
   nnpackOpDef.add_arg()->CopyFrom(MakeArgument("pad_t", padT));
   nnpackOpDef.add_arg()->CopyFrom(MakeArgument("pad_l", padL));
   nnpackOpDef.add_arg()->CopyFrom(MakeArgument("pad_b", padB));
   nnpackOpDef.add_arg()->CopyFrom(MakeArgument("pad_r", padR));
   nnpackOpDef.add_arg()->CopyFrom(MakeArgument("group", group));

   AddNoiseInput(vector<int64_t>{N, inputC, H, W}, "X", &ws);
   AddNoiseInput(
       vector<int64_t>{outputC, inputC / group, kernelH, kernelW}, "W", &ws);
   AddNoiseInput(vector<int64_t>{outputC}, "B", &ws);

   unique_ptr<OperatorBase> nnpackOp(CreateOperator(nnpackOpDef, &ws));
   EXPECT_NE(nullptr, nnpackOp.get());

   OperatorDef referenceOpDef;
   referenceOpDef.set_name("test");
   referenceOpDef.set_type("Conv");
   referenceOpDef.add_input("X");
   referenceOpDef.add_input("W");
   referenceOpDef.add_input("B");
   referenceOpDef.add_output("Y_reference");

   referenceOpDef.add_arg()->CopyFrom(MakeArgument("kernel_h", kernelH));
   referenceOpDef.add_arg()->CopyFrom(MakeArgument("kernel_w", kernelW));
   referenceOpDef.add_arg()->CopyFrom(MakeArgument("stride_h", strideH));
   referenceOpDef.add_arg()->CopyFrom(MakeArgument("stride_w", strideW));
   referenceOpDef.add_arg()->CopyFrom(MakeArgument("pad_t", padT));
   referenceOpDef.add_arg()->CopyFrom(MakeArgument("pad_l", padL));
   referenceOpDef.add_arg()->CopyFrom(MakeArgument("pad_b", padB));
   referenceOpDef.add_arg()->CopyFrom(MakeArgument("pad_r", padR));
   referenceOpDef.add_arg()->CopyFrom(MakeArgument("group", group));

   unique_ptr<OperatorBase> referenceOp(CreateOperator(referenceOpDef, &ws));
   EXPECT_NE(nullptr, referenceOp.get());

   unique_ptr<OperatorBase> activationOp;
   if (activation == "Relu") {
     OperatorDef activationOpDef;
     activationOpDef.set_name("activation");
     activationOpDef.set_type("Relu");
     activationOpDef.add_input("Y_reference");
     activationOpDef.add_output("Y_reference");
     activationOp = CreateOperator(activationOpDef, &ws);
     EXPECT_NE(nullptr, activationOp.get());
   }

   for (auto i = 0; i < 10; ++i) {
     EXPECT_TRUE(nnpackOp->Run());
   }
   Blob* nnpackOutputBlob = ws.GetBlob("Y_nnpack");
   EXPECT_NE(nullptr, nnpackOutputBlob);
   auto& nnpackOutput = nnpackOutputBlob->Get<TensorCPU>();

   for (auto i = 0; i < 10; ++i) {
     EXPECT_TRUE(referenceOp->Run());
     if (activationOp) {
       EXPECT_TRUE(activationOp->Run());
     }
   }

   Blob* referenceOutputBlob = ws.GetBlob("Y_reference");
   EXPECT_NE(nullptr, referenceOutputBlob);
   auto& referenceOutput = referenceOutputBlob->Get<TensorCPU>();

   // Compare all output points
   for (int n = 0; n < nnpackOutput.dim32(0); ++n) {
     for (int c = 0; c < nnpackOutput.dim32(1); ++c) {
       for (int h = 0; h < nnpackOutput.dim32(2); ++h) {
         for (int w = 0; w < nnpackOutput.dim32(3); ++w) {
           int offset = n * nnpackOutput.dim32(1) * nnpackOutput.dim32(2) *
                   nnpackOutput.dim32(3) +
               c * nnpackOutput.dim32(2) * nnpackOutput.dim32(3) +
               h * nnpackOutput.dim32(3) + w;

           auto v1 = nnpackOutput.data<float>()[offset];
           auto v2 = referenceOutput.data<float>()[offset];

           float relErr = relativeError(v1, v2);
           float absErr = std::abs(v1 - v2);

           // For small values / small difference, the relative error
           // can be huge but the absolute error will be small
           EXPECT_TRUE(
               relErr <= maxRelErr ||
               (relErr > maxRelErr && absErr <= absErrForRelErrFailure))
               << v1 << " " << v2 << " (rel err " << relErr << ") "
               << "(" << n << " " << c << " " << h << " " << w << ") "
               << "running N " << N << " inputC " << inputC << " H " << H
               << " W " << W << " outputC " << outputC << " kernelH " << kernelH
               << " kernelW " << kernelW << " strideH " << strideH << " strideW "
               << strideW << " padT " << padT << " padL " << padL << " padB "
               << padB << " padR " << padR << " group " << group << " algorithm "
               << algorithm << " convolutionTransformStrategy "
               << convolutionTransformStrategy;
         }
       }
     }
   }
 }

 int randInt(int a, int b) {
   static std::random_device rd;
   static std::mt19937 gen(rd());

   return std::uniform_int_distribution<int>(a, b)(gen);
 }

 void runConv(
     int kernelH,
     int kernelW,
     int strideH,
     int strideW,
     int group = 1,
     std::string algo = "",
     int planesIn = randInt(1, 6),
     int planesOut = randInt(1, 6),
     int n = randInt(1, 2),
     std::string convolutionTransformStrategy = "COMPUTE",
     std::string activation = "identity") {
   int h = randInt(20, 100);
   int w = randInt(20, 100);
   // This pad restriction is imposed by NNPACK
   int padT = std::min(randInt(0, 3), kernelH - 1);
   int padB = std::min(randInt(0, 3), kernelH - 1);
   int padL = std::min(randInt(0, 3), kernelW - 1);
   int padR = std::min(randInt(0, 3), kernelW - 1);

   caffe2::compare(
       n,
       planesIn,
       h,
       w,
       planesOut,
       kernelH,
       kernelW,
       strideH,
       strideW,
       padT,
       padL,
       padB,
       padR,
       group,
       algo,
       convolutionTransformStrategy,
       activation,
       0.05f,
       0.1f);
 }

 } // unnamed namespace

 constexpr size_t kIters = 20;

 TEST(NNPACK, Conv_3x3s1) {
   for (int i = 0; i < kIters; ++i) {
     runConv(3, 3, 1, 1);
   }
 }

 TEST(NNPACK, Conv_3x3s1_precompute) {
   for (int i = 0; i < kIters; ++i) {
     int group = randInt(1, 2);
     runConv(
         3,
         3,
         1,
         1,
         group,
         "WINOGRAD",
         group * randInt(1, 8),
         group * randInt(1, 8),
         1,
         "PRECOMPUTE");
   }
 }

 TEST(NNPACK, Conv_3x3s1_FP16) {
   for (int i = 0; i < kIters; ++i) {
     runConv(3, 3, 1, 1, 1, "WINOGRAD_FP16");
   }
 }

 TEST(NNPACK, Conv_3x3s1_FP16_precompute) {
   for (int i = 0; i < kIters; ++i) {
     int group = randInt(1, 2);
     runConv(
         3,
         3,
         1,
         1,
         group,
         "WINOGRAD_FP16",
         group * randInt(1, 8),
         group * randInt(1, 8),
         1,
         "PRECOMPUTE");
   }
 }

 TEST(NNPACK, Conv_NxNs1) {
   for (int i = 0; i < kIters; ++i) {
     int kernel = randInt(2, 10);
     runConv(kernel, kernel, 1, 1);
   }
 }

 TEST(NNPACK, Conv_1x1s1) {
   for (int i = 0; i < kIters; ++i) {
     auto group = randInt(1, 3);
     auto inChannels = randInt(1, 8) * group;
     auto outChannels = randInt(1, 8) * group;
     auto n = 1;
     runConv(1, 1, 1, 1, group, "DIRECT", inChannels, outChannels, n);
   }
 }

 TEST(NNPACK, ConvRelu_1x1s1) {
   for (int i = 0; i < kIters; ++i) {
     auto group = randInt(1, 3);
     auto inChannels = randInt(1, 8) * group;
     auto outChannels = randInt(1, 8) * group;
     auto n = 1;
     runConv(
         1,
         1,
         1,
         1,
         group,
         "DIRECT",
         inChannels,
         outChannels,
         n,
         "PRECOMPUTE",
         "Relu");
   }
 }

 TEST(NNPACK, Conv_1x1s1_precompute) {
   for (int i = 0; i < kIters; ++i) {
     auto group = randInt(1, 3);
     auto inChannels = randInt(1, 8) * group;
     auto outChannels = randInt(1, 8) * group;
     auto n = 1;
     runConv(
         1, 1, 1, 1, group, "DIRECT", inChannels, outChannels, n, "PRECOMPUTE");
   }
 }

 TEST(NNPACK, Conv_NxNs_grouped) {
   for (int i = 0; i < kIters; ++i) {
     int group = randInt(2, 3);
     int iC = randInt(1, 6) * group;
     int oC = randInt(1, 6) * group;
     int kernel = randInt(2, 10);
     int n = randInt(1, 2);
     runConv(kernel, kernel, 1, 1, group, "", iC, oC, n);
   }
 }

 TEST(NNPACK, Conv_NxNs_grouped_precompute) {
   for (int i = 0; i < kIters; ++i) {
     int group = randInt(2, 3);
     int iC = randInt(1, 6) * group;
     int oC = randInt(1, 6) * group;
     int kernel = randInt(2, 10);
     int n = randInt(1, 2);
     runConv(kernel, kernel, 1, 1, group, "", iC, oC, n, "PRECOMPUTE");
   }
 }

 TEST(NNPACK, Conv_NxNsW) {
   for (int i = 0; i < 3; ++i) {
     int kernel = randInt(3, 5);
     int stride = randInt(1, kernel - 1);
     runConv(kernel, kernel, stride, stride);
   }
 }

 TEST(NNPACK, ConvRelu_NxNsW) {
   for (int i = 0; i < 3; ++i) {
     int kernel = randInt(3, 5);
     int stride = randInt(1, kernel - 1);
     runConv(kernel, kernel, stride, stride, 1, "", 1, 1, 1, "COMPUTE", "Relu");
   }
 }

 TEST(NNPACK, Conv_HxWsHxW) {
   for (int i = 0; i < 3; ++i) {
     int kernelH = randInt(2, 5);
     int kernelW = randInt(2, 5);
     int strideH = randInt(1, kernelH - 1);
     int strideW = randInt(1, kernelW - 1);
     runConv(kernelH, kernelW, strideH, strideW);
   }
 }

 } // namespace caffe2
	#include "caffe2/core/init.h"
	#include "caffe2/core/operator.h"
	#include "caffe2/core/tensor.h"
	#include "caffe2/utils/math.h"
	#include "caffe2/utils/proto_utils.h"
	#include "gtest/gtest.h"

	#include <cmath>
	#include <random>

	namespace caffe2 {

	namespace {

	void AddNoiseInput(
	const vector<int64_t>& shape,
	const string& name,
	Workspace* ws) {
	DeviceOption option;
	CPUContext context(option);
	Blob* blob = ws->CreateBlob(name);
	auto* tensor = BlobGetMutableTensor(blob, CPU);
	tensor->Resize(shape);

	math::RandGaussian<float, CPUContext>(
	tensor->size(), 0.0f, 3.0f, tensor->mutable_data<float>(), &context);
	for (auto i = 0; i < tensor->size(); ++i) {
	tensor->mutable_data<float>()[i] =
	std::min(-5.0f, std::max(5.0f, tensor->mutable_data<float>()[i]));
	}
	}

	inline float relativeError(float a, float b) {
	return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
	}

	void compare(
	int N,
	int inputC,
	int H,
	int W,
	int outputC,
	int kernelH,
	int kernelW,
	int strideH,
	int strideW,
	int padT,
	int padL,
	int padB,
	int padR,
	int group,
	const std::string& algorithm,
	const std::string& convolutionTransformStrategy,
	const std::string& activation,
	float maxRelErr,
	float absErrForRelErrFailure) {
	LOG(INFO) << "running N " << N << " inputC " << inputC << " H " << H << " W "
	<< W << " outputC " << outputC << " kernelH " << kernelH
	<< " kernelW " << kernelW << " strideH " << strideH << " strideW "
	<< strideW << " padT " << padT << " padL " << padL << " padB "
	<< padB << " padR " << padR << " group " << group;

	Workspace ws;

	OperatorDef nnpackOpDef;
	nnpackOpDef.set_name("test");
	nnpackOpDef.set_type("Conv");
	nnpackOpDef.set_engine("NNPACK");
	nnpackOpDef.add_input("X");
	nnpackOpDef.add_input("W");
	nnpackOpDef.add_input("B");
	nnpackOpDef.add_output("Y_nnpack");

	nnpackOpDef.add_arg()->CopyFrom(MakeArgument("kernel_h", kernelH));
	nnpackOpDef.add_arg()->CopyFrom(MakeArgument("kernel_w", kernelW));
	if (!algorithm.empty()) {
	nnpackOpDef.add_arg()->CopyFrom(MakeArgument("algo", algorithm));
	}
	if (!convolutionTransformStrategy.empty()) {
	nnpackOpDef.add_arg()->CopyFrom(MakeArgument(
	"convolution_transform_strategy", convolutionTransformStrategy));
	}
	if (!activation.empty()) {
	nnpackOpDef.add_arg()->CopyFrom(MakeArgument("activation", activation));
	}
	nnpackOpDef.add_arg()->CopyFrom(MakeArgument("stride_h", strideH));
	nnpackOpDef.add_arg()->CopyFrom(MakeArgument("stride_w", strideW));
	nnpackOpDef.add_arg()->CopyFrom(MakeArgument("pad_t", padT));
	nnpackOpDef.add_arg()->CopyFrom(MakeArgument("pad_l", padL));
	nnpackOpDef.add_arg()->CopyFrom(MakeArgument("pad_b", padB));
	nnpackOpDef.add_arg()->CopyFrom(MakeArgument("pad_r", padR));
	nnpackOpDef.add_arg()->CopyFrom(MakeArgument("group", group));

	AddNoiseInput(vector<int64_t>{N, inputC, H, W}, "X", &ws);
	AddNoiseInput(
	vector<int64_t>{outputC, inputC / group, kernelH, kernelW}, "W", &ws);
	AddNoiseInput(vector<int64_t>{outputC}, "B", &ws);

	unique_ptr<OperatorBase> nnpackOp(CreateOperator(nnpackOpDef, &ws));
	EXPECT_NE(nullptr, nnpackOp.get());

	OperatorDef referenceOpDef;
	referenceOpDef.set_name("test");
	referenceOpDef.set_type("Conv");
	referenceOpDef.add_input("X");
	referenceOpDef.add_input("W");
	referenceOpDef.add_input("B");
	referenceOpDef.add_output("Y_reference");

	referenceOpDef.add_arg()->CopyFrom(MakeArgument("kernel_h", kernelH));
	referenceOpDef.add_arg()->CopyFrom(MakeArgument("kernel_w", kernelW));
	referenceOpDef.add_arg()->CopyFrom(MakeArgument("stride_h", strideH));
	referenceOpDef.add_arg()->CopyFrom(MakeArgument("stride_w", strideW));
	referenceOpDef.add_arg()->CopyFrom(MakeArgument("pad_t", padT));
	referenceOpDef.add_arg()->CopyFrom(MakeArgument("pad_l", padL));
	referenceOpDef.add_arg()->CopyFrom(MakeArgument("pad_b", padB));
	referenceOpDef.add_arg()->CopyFrom(MakeArgument("pad_r", padR));
	referenceOpDef.add_arg()->CopyFrom(MakeArgument("group", group));

	unique_ptr<OperatorBase> referenceOp(CreateOperator(referenceOpDef, &ws));
	EXPECT_NE(nullptr, referenceOp.get());

	unique_ptr<OperatorBase> activationOp;
	if (activation == "Relu") {
	OperatorDef activationOpDef;
	activationOpDef.set_name("activation");
	activationOpDef.set_type("Relu");
	activationOpDef.add_input("Y_reference");
	activationOpDef.add_output("Y_reference");
	activationOp = CreateOperator(activationOpDef, &ws);
	EXPECT_NE(nullptr, activationOp.get());
	}

	for (auto i = 0; i < 10; ++i) {
	EXPECT_TRUE(nnpackOp->Run());
	}
	Blob* nnpackOutputBlob = ws.GetBlob("Y_nnpack");
	EXPECT_NE(nullptr, nnpackOutputBlob);
	auto& nnpackOutput = nnpackOutputBlob->Get<TensorCPU>();

	for (auto i = 0; i < 10; ++i) {
	EXPECT_TRUE(referenceOp->Run());
	if (activationOp) {
	EXPECT_TRUE(activationOp->Run());
	}
	}

	Blob* referenceOutputBlob = ws.GetBlob("Y_reference");
	EXPECT_NE(nullptr, referenceOutputBlob);
	auto& referenceOutput = referenceOutputBlob->Get<TensorCPU>();

	// Compare all output points
	for (int n = 0; n < nnpackOutput.dim32(0); ++n) {
	for (int c = 0; c < nnpackOutput.dim32(1); ++c) {
	for (int h = 0; h < nnpackOutput.dim32(2); ++h) {
	for (int w = 0; w < nnpackOutput.dim32(3); ++w) {
	int offset = n * nnpackOutput.dim32(1) * nnpackOutput.dim32(2) *
	nnpackOutput.dim32(3) +
	c * nnpackOutput.dim32(2) * nnpackOutput.dim32(3) +
	h * nnpackOutput.dim32(3) + w;

	auto v1 = nnpackOutput.data<float>()[offset];
	auto v2 = referenceOutput.data<float>()[offset];

	float relErr = relativeError(v1, v2);
	float absErr = std::abs(v1 - v2);

	// For small values / small difference, the relative error
	// can be huge but the absolute error will be small
	EXPECT_TRUE(
	relErr <= maxRelErr \|\|
	(relErr > maxRelErr && absErr <= absErrForRelErrFailure))
	<< v1 << " " << v2 << " (rel err " << relErr << ") "
	<< "(" << n << " " << c << " " << h << " " << w << ") "
	<< "running N " << N << " inputC " << inputC << " H " << H
	<< " W " << W << " outputC " << outputC << " kernelH " << kernelH
	<< " kernelW " << kernelW << " strideH " << strideH << " strideW "
	<< strideW << " padT " << padT << " padL " << padL << " padB "
	<< padB << " padR " << padR << " group " << group << " algorithm "
	<< algorithm << " convolutionTransformStrategy "
	<< convolutionTransformStrategy;
	}
	}
	}
	}
	}

	int randInt(int a, int b) {
	static std::random_device rd;
	static std::mt19937 gen(rd());

	return std::uniform_int_distribution<int>(a, b)(gen);
	}

	void runConv(
	int kernelH,
	int kernelW,
	int strideH,
	int strideW,
	int group = 1,
	std::string algo = "",
	int planesIn = randInt(1, 6),
	int planesOut = randInt(1, 6),
	int n = randInt(1, 2),
	std::string convolutionTransformStrategy = "COMPUTE",
	std::string activation = "identity") {
	int h = randInt(20, 100);
	int w = randInt(20, 100);
	// This pad restriction is imposed by NNPACK
	int padT = std::min(randInt(0, 3), kernelH - 1);
	int padB = std::min(randInt(0, 3), kernelH - 1);
	int padL = std::min(randInt(0, 3), kernelW - 1);
	int padR = std::min(randInt(0, 3), kernelW - 1);

	caffe2::compare(
	n,
	planesIn,
	h,
	w,
	planesOut,
	kernelH,
	kernelW,
	strideH,
	strideW,
	padT,
	padL,
	padB,
	padR,
	group,
	algo,
	convolutionTransformStrategy,
	activation,
	0.05f,
	0.1f);
	}

	} // unnamed namespace

	constexpr size_t kIters = 20;

	TEST(NNPACK, Conv_3x3s1) {
	for (int i = 0; i < kIters; ++i) {
	runConv(3, 3, 1, 1);
	}
	}

	TEST(NNPACK, Conv_3x3s1_precompute) {
	for (int i = 0; i < kIters; ++i) {
	int group = randInt(1, 2);
	runConv(
	3,
	3,
	1,
	1,
	group,
	"WINOGRAD",
	group * randInt(1, 8),
	group * randInt(1, 8),
	1,
	"PRECOMPUTE");
	}
	}

	TEST(NNPACK, Conv_3x3s1_FP16) {
	for (int i = 0; i < kIters; ++i) {
	runConv(3, 3, 1, 1, 1, "WINOGRAD_FP16");
	}
	}

	TEST(NNPACK, Conv_3x3s1_FP16_precompute) {
	for (int i = 0; i < kIters; ++i) {
	int group = randInt(1, 2);
	runConv(
	3,
	3,
	1,
	1,
	group,
	"WINOGRAD_FP16",
	group * randInt(1, 8),
	group * randInt(1, 8),
	1,
	"PRECOMPUTE");
	}
	}

	TEST(NNPACK, Conv_NxNs1) {
	for (int i = 0; i < kIters; ++i) {
	int kernel = randInt(2, 10);
	runConv(kernel, kernel, 1, 1);
	}
	}

	TEST(NNPACK, Conv_1x1s1) {
	for (int i = 0; i < kIters; ++i) {
	auto group = randInt(1, 3);
	auto inChannels = randInt(1, 8) * group;
	auto outChannels = randInt(1, 8) * group;
	auto n = 1;
	runConv(1, 1, 1, 1, group, "DIRECT", inChannels, outChannels, n);
	}
	}

	TEST(NNPACK, ConvRelu_1x1s1) {
	for (int i = 0; i < kIters; ++i) {
	auto group = randInt(1, 3);
	auto inChannels = randInt(1, 8) * group;
	auto outChannels = randInt(1, 8) * group;
	auto n = 1;
	runConv(
	1,
	1,
	1,
	1,
	group,
	"DIRECT",
	inChannels,
	outChannels,
	n,
	"PRECOMPUTE",
	"Relu");
	}
	}

	TEST(NNPACK, Conv_1x1s1_precompute) {
	for (int i = 0; i < kIters; ++i) {
	auto group = randInt(1, 3);
	auto inChannels = randInt(1, 8) * group;
	auto outChannels = randInt(1, 8) * group;
	auto n = 1;
	runConv(
	1, 1, 1, 1, group, "DIRECT", inChannels, outChannels, n, "PRECOMPUTE");
	}
	}

	TEST(NNPACK, Conv_NxNs_grouped) {
	for (int i = 0; i < kIters; ++i) {
	int group = randInt(2, 3);
	int iC = randInt(1, 6) * group;
	int oC = randInt(1, 6) * group;
	int kernel = randInt(2, 10);
	int n = randInt(1, 2);
	runConv(kernel, kernel, 1, 1, group, "", iC, oC, n);
	}
	}

	TEST(NNPACK, Conv_NxNs_grouped_precompute) {
	for (int i = 0; i < kIters; ++i) {
	int group = randInt(2, 3);
	int iC = randInt(1, 6) * group;
	int oC = randInt(1, 6) * group;
	int kernel = randInt(2, 10);
	int n = randInt(1, 2);
	runConv(kernel, kernel, 1, 1, group, "", iC, oC, n, "PRECOMPUTE");
	}
	}

	TEST(NNPACK, Conv_NxNsW) {
	for (int i = 0; i < 3; ++i) {
	int kernel = randInt(3, 5);
	int stride = randInt(1, kernel - 1);
	runConv(kernel, kernel, stride, stride);
	}
	}

	TEST(NNPACK, ConvRelu_NxNsW) {
	for (int i = 0; i < 3; ++i) {
	int kernel = randInt(3, 5);
	int stride = randInt(1, kernel - 1);
	runConv(kernel, kernel, stride, stride, 1, "", 1, 1, 1, "COMPUTE", "Relu");
	}
	}

	TEST(NNPACK, Conv_HxWsHxW) {
	for (int i = 0; i < 3; ++i) {
	int kernelH = randInt(2, 5);
	int kernelW = randInt(2, 5);
	int strideH = randInt(1, kernelH - 1);
	int strideW = randInt(1, kernelW - 1);
	runConv(kernelH, kernelW, strideH, strideW);
	}
	}

	} // namespace caffe2