test/cpp/tensorexpr/test_kernel.cpp - platform/external/pytorch - Git at Google

 #include <test/cpp/tensorexpr/test_base.h>
 #include <torch/csrc/jit/ir/ir.h>
 #include <torch/csrc/jit/ir/irparser.h>
 #include <torch/csrc/jit/tensorexpr/buffer.h>
 #include <torch/csrc/jit/tensorexpr/kernel.h>
 #include <torch/csrc/jit/tensorexpr/loopnest.h>
 #include <torch/csrc/jit/tensorexpr/tensor.h>
 #include <torch/torch.h>
 #include <cmath>
 #include <sstream>
 #include <stdexcept>

 namespace torch {
 namespace jit {

 using namespace torch::indexing;
 using namespace torch::jit::tensorexpr;

 void testKernel_1() {
   KernelScope kernel_scope;

   const auto graph_string = R"IR(
       graph(%0 : Float(5:3,3:1, device=cpu),
             %1 : Float(5:3,3:1, device=cpu)):
         %2 : Float(5:3,3:1) = aten::mul(%0, %1)
         %3 : Float(5:3,3:1) = aten::mul(%0, %2)
         return (%3))IR";
   auto graph = std::make_shared<Graph>();
   parseIR(graph_string, &*graph);

   auto a = at::rand({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
   auto b = at::rand({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
   auto o = at::zeros({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
   auto ref = a * (a * b);
   TensorExprKernel k(graph);
   std::vector<at::Tensor> inputs = {a, b};
   Stmt* s = k.getCodeGenStmt();
   // TODO: verify stmt

   std::vector<IValue> stack = fmap<IValue>(inputs);
   k.run(stack);
   o = stack[0].toTensor();
   for (size_t i = 0; i < 5 * 3; i++) {
     CHECK_EQ(((float*)o.data_ptr())[i], ((float*)ref.data_ptr())[i]);
   }
 }

 void testKernel_2() {
   KernelScope kernel_scope;

   const auto graph_string = R"IR(
       graph(%0 : Float(5:3,3:1, device=cpu),
             %1 : Float(5:1,3:5, device=cpu)):
         %2 : Float(5:3,3:1) = aten::mul(%0, %1)
         %3 : Float(5:3,3:1) = aten::mul(%0, %2)
         return (%3))IR";
   auto graph = std::make_shared<Graph>();
   parseIR(graph_string, &*graph);

   auto a = at::rand({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
   auto b =
       at::rand({3, 5}, TensorOptions(kCPU).dtype(at::kFloat)).transpose(0, 1);
   auto o = at::zeros({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
   auto ref = a * (a * b);
   TensorExprKernel k(graph);
   std::vector<at::Tensor> inputs = {a, b};
   Stmt* s = k.getCodeGenStmt();
   // TODO: verify stmt

   std::vector<IValue> stack = fmap<IValue>(inputs);
   k.run(stack);
   o = stack[0].toTensor();
   for (size_t i = 0; i < 5 * 3; i++) {
     CHECK_EQ(((float*)o.data_ptr())[i], ((float*)ref.data_ptr())[i]);
   }
 }

 void testKernel_3() {
   KernelScope kernel_scope;

   const auto graph_string = R"IR(
       graph(%0 : Float(5:3,3:1, device=cpu),
             %1 : Float(5:12,3:2, device=cpu)):
         %2 : Float(5:3,3:1) = aten::mul(%0, %1)
         %3 : Float(5:3,3:1) = aten::mul(%0, %2)
         return (%3))IR";
   auto graph = std::make_shared<Graph>();
   parseIR(graph_string, &*graph);

   auto a = at::rand({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
   auto b = at::rand({10, 6}, TensorOptions(kCPU).dtype(at::kFloat))
                .index({Slice(None, None, 2), Slice(None, None, 2)});
   auto o = at::zeros({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
   auto ref = a * (a * b);
   TensorExprKernel k(graph);
   std::vector<at::Tensor> inputs = {a, b};
   Stmt* s = k.getCodeGenStmt();
   // TODO: verify stmt

   std::vector<IValue> stack = fmap<IValue>(inputs);
   k.run(stack);
   o = stack[0].toTensor();
   for (size_t i = 0; i < 5 * 3; i++) {
     CHECK_EQ(((float*)o.data_ptr())[i], ((float*)ref.data_ptr())[i]);
   }
 }

 void testKernel_4() {
   // Test TensorExpr shape inference capabilities: it should only require shapes
   // for the inputs
   {
     KernelScope kernel_scope;

     const auto graph_string = R"IR(
       graph(%0 : Float(5:3,  3:1, device=cpu),
             %1 : Float(5:12, 3:2, device=cpu)):
         %2 : Tensor = aten::mul(%0, %1)
         %3 : Tensor = aten::mul(%0, %2)
         return (%3))IR";
     auto graph = std::make_shared<Graph>();
     parseIR(graph_string, &*graph);

     auto a = at::rand({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
     auto b = at::rand({10, 6}, TensorOptions(kCPU).dtype(at::kFloat))
                  .index({Slice(None, None, 2), Slice(None, None, 2)});
     auto o = at::zeros({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
     auto ref = a * (a * b);
     TensorExprKernel k(graph);
     std::vector<at::Tensor> inputs = {a, b};
     Stmt* s = k.getCodeGenStmt();

     std::vector<IValue> stack = fmap<IValue>(inputs);
     k.run(stack);
     o = stack[0].toTensor();
     for (size_t i = 0; i < 5 * 3; i++) {
       CHECK_EQ(((float*)o.data_ptr())[i], ((float*)ref.data_ptr())[i]);
     }
   }
   {
     KernelScope kernel_scope;

     const auto graph_string = R"IR(
       graph(%0 : Float(8:8, 8:1, device=cpu),
             %1 : Float(8:8, 8:1, device=cpu)):
         %2 : Tensor = aten::mul(%0, %1)
         %3 : Tensor, %4 : Tensor = prim::ConstantChunk[dim=1,chunks=2](%2)
         %r : Tensor = aten::mul(%3, %4)
         return (%r))IR";
     auto graph = std::make_shared<Graph>();
     parseIR(graph_string, &*graph);

     auto a = at::rand({8, 8}, TensorOptions(kCPU).dtype(at::kFloat));
     auto b = at::rand({8, 8}, TensorOptions(kCPU).dtype(at::kFloat));
     auto o = at::zeros({8, 4}, TensorOptions(kCPU).dtype(at::kFloat));
     auto t = torch::chunk(a * b, 2, 1);
     auto ref = t[0] * t[1];
     TensorExprKernel k(graph);
     std::vector<at::Tensor> inputs = {a, b};
     Stmt* s = k.getCodeGenStmt();

     std::vector<IValue> stack = fmap<IValue>(inputs);
     k.run(stack);
     o = stack[0].toTensor();
     CHECK_EQ(o.sizes()[0], 8);
     CHECK_EQ(o.sizes()[1], 4);
     for (size_t i = 0; i < 8 * 4; i++) {
       CHECK_EQ(((float*)o.data_ptr())[i], ((float*)ref.data_ptr())[i]);
     }
   }
   {
     // Test that shape inference handles aten::unsqueeze
     KernelScope kernel_scope;

     const auto graph_string = R"IR(
       graph(%a : Float(4:2, 2:1, device=cpu),
             %b : Float(4:6, 3:2, 2:1, device=cpu),
             %c : Float(3:4, 2:2, 2:1, device=cpu)):
         %one : int = prim::Constant[value=1]()
         %minus_one : int = prim::Constant[value=-1]()
         %three : int = prim::Constant[value=3]()
         %minus_four : int = prim::Constant[value=-4]()
         %a1 : Tensor = aten::unsqueeze(%a, %one)        # new size: [4,1,2]
         %a2 : Tensor = aten::unsqueeze(%a1, %minus_one) # new size: [4,1,2,1]
         %b1 : Tensor = aten::unsqueeze(%b, %three)      # new size: [4,3,2,1]
         %c1 : Tensor = aten::unsqueeze(%c, %minus_four) # new size: [1,3,2,2]
         %ab : Tensor = aten::mul(%a2, %b1)         # expected size: [4,3,2,1]
         %abc : Tensor = aten::mul(%ab, %c1)        # expected size: [4,3,2,2]
         return (%abc))IR";
     auto graph = std::make_shared<Graph>();
     parseIR(graph_string, &*graph);

     auto a = at::rand({4, 2}, TensorOptions(kCPU).dtype(at::kFloat));
     auto b = at::rand({4, 3, 2}, TensorOptions(kCPU).dtype(at::kFloat));
     auto c = at::rand({3, 2, 2}, TensorOptions(kCPU).dtype(at::kFloat));
     auto o = at::zeros({4, 3, 2, 2}, TensorOptions(kCPU).dtype(at::kFloat));
     auto ref = at::unsqueeze(at::unsqueeze(a, 1), -1) * at::unsqueeze(b, 3) *
         at::unsqueeze(c, -4);

     TensorExprKernel k(graph);
     std::vector<at::Tensor> inputs = {a, b, c};
     Stmt* s = k.getCodeGenStmt();

     std::vector<IValue> stack = fmap<IValue>(inputs);
     k.run(stack);
     o = stack[0].toTensor();

     // Check sizes
     CHECK_EQ(o.sizes().size(), ref.sizes().size());
     size_t num_el = 1;
     for (auto idx = 0; idx < ref.sizes().size(); idx++) {
       CHECK_EQ(o.sizes()[idx], ref.sizes()[idx]);
       num_el *= ref.sizes()[idx];
     }

     // Check the contents
     for (size_t i = 0; i < num_el; i++) {
       CHECK_EQ(((float*)o.data_ptr())[i], ((float*)ref.data_ptr())[i]);
     }
   }
 }

 } // namespace jit
 } // namespace torch
	#include <test/cpp/tensorexpr/test_base.h>
	#include <torch/csrc/jit/ir/ir.h>
	#include <torch/csrc/jit/ir/irparser.h>
	#include <torch/csrc/jit/tensorexpr/buffer.h>
	#include <torch/csrc/jit/tensorexpr/kernel.h>
	#include <torch/csrc/jit/tensorexpr/loopnest.h>
	#include <torch/csrc/jit/tensorexpr/tensor.h>
	#include <torch/torch.h>
	#include <cmath>
	#include <sstream>
	#include <stdexcept>

	namespace torch {
	namespace jit {

	using namespace torch::indexing;
	using namespace torch::jit::tensorexpr;

	void testKernel_1() {
	KernelScope kernel_scope;

	const auto graph_string = R"IR(
	graph(%0 : Float(5:3,3:1, device=cpu),
	%1 : Float(5:3,3:1, device=cpu)):
	%2 : Float(5:3,3:1) = aten::mul(%0, %1)
	%3 : Float(5:3,3:1) = aten::mul(%0, %2)
	return (%3))IR";
	auto graph = std::make_shared<Graph>();
	parseIR(graph_string, &*graph);

	auto a = at::rand({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
	auto b = at::rand({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
	auto o = at::zeros({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
	auto ref = a * (a * b);
	TensorExprKernel k(graph);
	std::vector<at::Tensor> inputs = {a, b};
	Stmt* s = k.getCodeGenStmt();
	// TODO: verify stmt

	std::vector<IValue> stack = fmap<IValue>(inputs);
	k.run(stack);
	o = stack[0].toTensor();
	for (size_t i = 0; i < 5 * 3; i++) {
	CHECK_EQ(((float)o.data_ptr())[i], ((float)ref.data_ptr())[i]);
	}
	}

	void testKernel_2() {
	KernelScope kernel_scope;

	const auto graph_string = R"IR(
	graph(%0 : Float(5:3,3:1, device=cpu),
	%1 : Float(5:1,3:5, device=cpu)):
	%2 : Float(5:3,3:1) = aten::mul(%0, %1)
	%3 : Float(5:3,3:1) = aten::mul(%0, %2)
	return (%3))IR";
	auto graph = std::make_shared<Graph>();
	parseIR(graph_string, &*graph);

	auto a = at::rand({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
	auto b =
	at::rand({3, 5}, TensorOptions(kCPU).dtype(at::kFloat)).transpose(0, 1);
	auto o = at::zeros({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
	auto ref = a * (a * b);
	TensorExprKernel k(graph);
	std::vector<at::Tensor> inputs = {a, b};
	Stmt* s = k.getCodeGenStmt();
	// TODO: verify stmt

	std::vector<IValue> stack = fmap<IValue>(inputs);
	k.run(stack);
	o = stack[0].toTensor();
	for (size_t i = 0; i < 5 * 3; i++) {
	CHECK_EQ(((float)o.data_ptr())[i], ((float)ref.data_ptr())[i]);
	}
	}

	void testKernel_3() {
	KernelScope kernel_scope;

	const auto graph_string = R"IR(
	graph(%0 : Float(5:3,3:1, device=cpu),
	%1 : Float(5:12,3:2, device=cpu)):
	%2 : Float(5:3,3:1) = aten::mul(%0, %1)
	%3 : Float(5:3,3:1) = aten::mul(%0, %2)
	return (%3))IR";
	auto graph = std::make_shared<Graph>();
	parseIR(graph_string, &*graph);

	auto a = at::rand({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
	auto b = at::rand({10, 6}, TensorOptions(kCPU).dtype(at::kFloat))
	.index({Slice(None, None, 2), Slice(None, None, 2)});
	auto o = at::zeros({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
	auto ref = a * (a * b);
	TensorExprKernel k(graph);
	std::vector<at::Tensor> inputs = {a, b};
	Stmt* s = k.getCodeGenStmt();
	// TODO: verify stmt

	std::vector<IValue> stack = fmap<IValue>(inputs);
	k.run(stack);
	o = stack[0].toTensor();
	for (size_t i = 0; i < 5 * 3; i++) {
	CHECK_EQ(((float)o.data_ptr())[i], ((float)ref.data_ptr())[i]);
	}
	}

	void testKernel_4() {
	// Test TensorExpr shape inference capabilities: it should only require shapes
	// for the inputs
	{
	KernelScope kernel_scope;

	const auto graph_string = R"IR(
	graph(%0 : Float(5:3, 3:1, device=cpu),
	%1 : Float(5:12, 3:2, device=cpu)):
	%2 : Tensor = aten::mul(%0, %1)
	%3 : Tensor = aten::mul(%0, %2)
	return (%3))IR";
	auto graph = std::make_shared<Graph>();
	parseIR(graph_string, &*graph);

	auto a = at::rand({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
	auto b = at::rand({10, 6}, TensorOptions(kCPU).dtype(at::kFloat))
	.index({Slice(None, None, 2), Slice(None, None, 2)});
	auto o = at::zeros({5, 3}, TensorOptions(kCPU).dtype(at::kFloat));
	auto ref = a * (a * b);
	TensorExprKernel k(graph);
	std::vector<at::Tensor> inputs = {a, b};
	Stmt* s = k.getCodeGenStmt();

	std::vector<IValue> stack = fmap<IValue>(inputs);
	k.run(stack);
	o = stack[0].toTensor();
	for (size_t i = 0; i < 5 * 3; i++) {
	CHECK_EQ(((float)o.data_ptr())[i], ((float)ref.data_ptr())[i]);
	}
	}
	{
	KernelScope kernel_scope;

	const auto graph_string = R"IR(
	graph(%0 : Float(8:8, 8:1, device=cpu),
	%1 : Float(8:8, 8:1, device=cpu)):
	%2 : Tensor = aten::mul(%0, %1)
	%3 : Tensor, %4 : Tensor = prim::ConstantChunk[dim=1,chunks=2](%2)
	%r : Tensor = aten::mul(%3, %4)
	return (%r))IR";
	auto graph = std::make_shared<Graph>();
	parseIR(graph_string, &*graph);

	auto a = at::rand({8, 8}, TensorOptions(kCPU).dtype(at::kFloat));
	auto b = at::rand({8, 8}, TensorOptions(kCPU).dtype(at::kFloat));
	auto o = at::zeros({8, 4}, TensorOptions(kCPU).dtype(at::kFloat));
	auto t = torch::chunk(a * b, 2, 1);
	auto ref = t[0] * t[1];
	TensorExprKernel k(graph);
	std::vector<at::Tensor> inputs = {a, b};
	Stmt* s = k.getCodeGenStmt();

	std::vector<IValue> stack = fmap<IValue>(inputs);
	k.run(stack);
	o = stack[0].toTensor();
	CHECK_EQ(o.sizes()[0], 8);
	CHECK_EQ(o.sizes()[1], 4);
	for (size_t i = 0; i < 8 * 4; i++) {
	CHECK_EQ(((float)o.data_ptr())[i], ((float)ref.data_ptr())[i]);
	}
	}
	{
	// Test that shape inference handles aten::unsqueeze
	KernelScope kernel_scope;

	const auto graph_string = R"IR(
	graph(%a : Float(4:2, 2:1, device=cpu),
	%b : Float(4:6, 3:2, 2:1, device=cpu),
	%c : Float(3:4, 2:2, 2:1, device=cpu)):
	%one : int = prim::Constant[value=1]()
	%minus_one : int = prim::Constant[value=-1]()
	%three : int = prim::Constant[value=3]()
	%minus_four : int = prim::Constant[value=-4]()
	%a1 : Tensor = aten::unsqueeze(%a, %one) # new size: [4,1,2]
	%a2 : Tensor = aten::unsqueeze(%a1, %minus_one) # new size: [4,1,2,1]
	%b1 : Tensor = aten::unsqueeze(%b, %three) # new size: [4,3,2,1]
	%c1 : Tensor = aten::unsqueeze(%c, %minus_four) # new size: [1,3,2,2]
	%ab : Tensor = aten::mul(%a2, %b1) # expected size: [4,3,2,1]
	%abc : Tensor = aten::mul(%ab, %c1) # expected size: [4,3,2,2]
	return (%abc))IR";
	auto graph = std::make_shared<Graph>();
	parseIR(graph_string, &*graph);

	auto a = at::rand({4, 2}, TensorOptions(kCPU).dtype(at::kFloat));
	auto b = at::rand({4, 3, 2}, TensorOptions(kCPU).dtype(at::kFloat));
	auto c = at::rand({3, 2, 2}, TensorOptions(kCPU).dtype(at::kFloat));
	auto o = at::zeros({4, 3, 2, 2}, TensorOptions(kCPU).dtype(at::kFloat));
	auto ref = at::unsqueeze(at::unsqueeze(a, 1), -1) * at::unsqueeze(b, 3) *
	at::unsqueeze(c, -4);

	TensorExprKernel k(graph);
	std::vector<at::Tensor> inputs = {a, b, c};
	Stmt* s = k.getCodeGenStmt();

	std::vector<IValue> stack = fmap<IValue>(inputs);
	k.run(stack);
	o = stack[0].toTensor();

	// Check sizes
	CHECK_EQ(o.sizes().size(), ref.sizes().size());
	size_t num_el = 1;
	for (auto idx = 0; idx < ref.sizes().size(); idx++) {
	CHECK_EQ(o.sizes()[idx], ref.sizes()[idx]);
	num_el *= ref.sizes()[idx];
	}

	// Check the contents
	for (size_t i = 0; i < num_el; i++) {
	CHECK_EQ(((float)o.data_ptr())[i], ((float)ref.data_ptr())[i]);
	}
	}
	}

	} // namespace jit
	} // namespace torch