examples/Linalg/Linalg3/lib/TensorOps.cpp - platform/external/tensorflow - Git at Google

 //===- TensorOps.cpp - Implementation of the linalg TensorOps operation ---===//
 //
 // Copyright 2019 The MLIR Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // =============================================================================
 //
 // This file implements a simple IR operation to create new tensor computation
 // operations in the linalg dialect.
 //
 //===----------------------------------------------------------------------===//

 #include "linalg1/Analysis.h"
 #include "linalg1/Common.h"
 #include "linalg3/Intrinsics.h"
 #include "linalg3/Ops.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/StandardTypes.h"
 #include "llvm/ADT/STLExtras.h"

 using namespace mlir;
 using namespace mlir::edsc;
 using namespace mlir::edsc::intrinsics;
 using namespace linalg;
 using namespace linalg::intrinsics;

 //////////////////////////////////////////////////////////////////////////////
 // Implementation of DotOp.
 //////////////////////////////////////////////////////////////////////////////
 SmallVector<AffineMap, 8> linalg::DotOp::loopsToOperandRangeMaps() {
   // A(K), B(K), C()
   assert(getRanges(*this).size() == 2);
   auto *context = ScopedContext::getContext();
   auto d0 = getAffineDimExpr(0, context); // K
   // A(K), B(K), C()
   //   (d0) -> (d0, d0)(%k)
   return SmallVector<AffineMap, 8>{AffineMap::get(1, 0, {d0}, {}), // A(K)
                                    AffineMap::get(1, 0, {d0}, {}), // B(K)
                                    AffineMap()};                   // C()
 }

 void linalg::DotOp::emitScalarImplementation(
     llvm::ArrayRef<Value *> parallelIvs, llvm::ArrayRef<Value *> reductionIvs) {
   using IndexedValue = TemplatedIndexedValue<linalg::intrinsics::load,
                                              linalg::intrinsics::store>;
   assert(reductionIvs.size() == 1);
   auto innermostLoop = getForInductionVarOwner(reductionIvs.back());
   auto *body = innermostLoop.getBody();
   using edsc::op::operator+;
   using edsc::op::operator*;
   using edsc::op::operator==;
   using edsc::intrinsics::select;

   // Account for affine.terminator in loop.
   FuncBuilder builder(body, std::prev(body->end(), 1));
   ScopedContext scope(builder, innermostLoop.getLoc());
   FloatType fTy = getOperand(0)
                       ->getType()
                       .cast<ViewType>()
                       .getElementType()
                       .cast<FloatType>();
   IndexHandle zero(constant_index(0));
   ValueHandle zerof =
       constant_float(llvm::APFloat::getZero(fTy.getFloatSemantics()), fTy);
   IndexHandle r_i(reductionIvs[0]);
   IndexedValue A(getOperand(0)), B(getOperand(1)), C(getOperand(2));
   ValueHandle cond = (r_i == zero);
   ValueHandle scalarC = select(cond, zerof, *C());
   C() = scalarC + A(r_i) * B(r_i);
 }

 //////////////////////////////////////////////////////////////////////////////
 // Implementation of MatvecOp.
 //////////////////////////////////////////////////////////////////////////////
 SmallVector<AffineMap, 8> linalg::MatvecOp::loopsToOperandRangeMaps() {
   // A(M, K), B(K), C(M)
   assert(getRanges(*this).size() == 4);
   auto *context = ScopedContext::getContext();
   auto d0 = getAffineDimExpr(0, context); // M
   auto d1 = getAffineDimExpr(1, context); // K
   // A(M, K), B(K), C(M)
   //   (d0, d1) -> (d0, d1, d1, d0)(%m, %k)
   return SmallVector<AffineMap, 8>{
       AffineMap::get(2, 0, {d0, d1}, {}), // A(M, K)
       AffineMap::get(2, 0, {d1}, {}),     // B(K)
       AffineMap::get(2, 0, {d0}, {})};    // C(M)
 }

 // The body expression for matvec is: C(i) = scalarC + A(i, r_j) * B(r_j)
 // The body expression for dot is: C() = A(r_i) * B(r_i);
 // So we must drop the `i` loop from the matvec.
 void linalg::MatvecOp::writeAsFinerGrainTensorContraction() {
   auto *op = getOperation();
   auto *vA(getInputView(0)), *vB(getInputView(1)), *vC(getOutputView(0));
   auto indexingPosPair = getViewRootIndexing(vA, 0);
   assert(
       llvm::isa_and_nonnull<RangeOp>(indexingPosPair.first->getDefiningOp()));
   // clang-format off
   FuncBuilder builder(op);
   ScopedContext scope(builder, op->getLoc());
   IndexHandle i;
   using linalg::common::LoopNestRangeBuilder;
   LoopNestRangeBuilder(&i, ValueHandle(indexingPosPair.first))({
     [&i, &vA, &vB, &vC]() {
       ValueHandle sliceA = slice(vA, i, 0);
       ValueHandle sliceC = slice(vC, i, 0);
       dot(sliceA, vB, sliceC);
       /// NestedBuilders expect handles, we thus return an IndexHandle.
       return IndexHandle();
     }()
   });
   // clang-format on
 }

 void linalg::MatvecOp::emitScalarImplementation(
     llvm::ArrayRef<Value *> parallelIvs, llvm::ArrayRef<Value *> reductionIvs) {
   using IndexedValue = TemplatedIndexedValue<linalg::intrinsics::load,
                                              linalg::intrinsics::store>;
   assert(reductionIvs.size() == 1);
   auto innermostLoop = getForInductionVarOwner(reductionIvs.back());
   auto *body = innermostLoop.getBody();
   using edsc::op::operator+;
   using edsc::op::operator*;
   using edsc::op::operator==;
   using edsc::intrinsics::select;
   // Account for affine.terminator in loop.
   FuncBuilder builder(body, std::prev(body->end(), 1));
   ScopedContext scope(builder, innermostLoop.getLoc());
   FloatType fTy = getOperand(0)
                       ->getType()
                       .cast<ViewType>()
                       .getElementType()
                       .cast<FloatType>();
   IndexHandle i(parallelIvs[0]), r_j(reductionIvs[0]);
   IndexedValue A(getOperand(0)), B(getOperand(1)), C(getOperand(2));
   IndexHandle zero(constant_index(0));
   ValueHandle zerof =
       constant_float(llvm::APFloat::getZero(fTy.getFloatSemantics()), fTy);
   ValueHandle cond = (r_j == zero);
   ValueHandle scalarC = select(cond, zerof, *C(i));
   C(i) = scalarC + A(i, r_j) * B(r_j);
 }

 //////////////////////////////////////////////////////////////////////////////
 // Implementation of Matmul.
 //////////////////////////////////////////////////////////////////////////////
 SmallVector<AffineMap, 8> linalg::MatmulOp::loopsToOperandRangeMaps() {
   // A(M, K), B(K, N), C(M, N)
   assert(getRanges(*this).size() == 6);
   auto *context = ScopedContext::getContext();
   auto d0 = getAffineDimExpr(0, context); // M
   auto d1 = getAffineDimExpr(1, context); // N
   auto d2 = getAffineDimExpr(2, context); // K
   // A(M, K), B(K, N), C(M, N):
   //   (d0, d1, d2) -> (d0, d2, d2, d1, d0, d1)(%m, %n, %k)
   return SmallVector<AffineMap, 8>{
       AffineMap::get(3, 0, {d0, d2}, {}), // A(M, K)
       AffineMap::get(3, 0, {d2, d1}, {}), // B(K, N)
       AffineMap::get(3, 0, {d0, d1}, {})  // C(M, N)
   };
 }

 // The body expression for matmul is: C(i, j) = scalarC + A(i, r_k) * B(r_k, j)
 // The body expression for matvec is: C(i) = scalarC + A(i, r_j) * B(r_j)
 // So we must drop the `j` loop from the matmul.
 // This is fine because parallel dimensions permute: we can just do it
 // declaratively.
 void linalg::MatmulOp::writeAsFinerGrainTensorContraction() {
   auto *op = getOperation();
   auto *vA(getInputView(0)), *vB(getInputView(1)), *vC(getOutputView(0));
   auto indexingPosPair = getViewRootIndexing(vB, 1);
   assert(
       llvm::isa_and_nonnull<RangeOp>(indexingPosPair.first->getDefiningOp()));
   using linalg::common::LoopNestRangeBuilder;
   // clang-format off
   FuncBuilder builder(op);
   ScopedContext scope(builder, op->getLoc());
   IndexHandle j;
   LoopNestRangeBuilder(&j, ValueHandle(indexingPosPair.first))({
     [&j, &vA, &vB, &vC]() {
       ValueHandle sliceB = slice(vB, j, 1);
       ValueHandle sliceC = slice(vC, j, 1);
       matvec(vA, sliceB, sliceC);
       /// NestedBuilders expect handles, we thus return an IndexHandle.
       return IndexHandle();
     }()
   });
   // clang-format on
 }

 void linalg::MatmulOp::emitScalarImplementation(
     llvm::ArrayRef<Value *> parallelIvs, llvm::ArrayRef<Value *> reductionIvs) {
   using IndexedValue = TemplatedIndexedValue<linalg::intrinsics::load,
                                              linalg::intrinsics::store>;
   assert(reductionIvs.size() == 1);
   auto innermostLoop = getForInductionVarOwner(reductionIvs.back());
   auto *body = innermostLoop.getBody();
   using edsc::op::operator+;
   using edsc::op::operator*;
   using edsc::op::operator==;
   using edsc::intrinsics::select;
   // Account for affine.terminator in loop.
   FuncBuilder builder(body, std::prev(body->end(), 1));
   ScopedContext scope(builder, innermostLoop.getLoc());
   FloatType fTy = getOperand(0)
                       ->getType()
                       .cast<ViewType>()
                       .getElementType()
                       .cast<FloatType>();
   IndexHandle i(parallelIvs[0]), j(parallelIvs[1]), r_k(reductionIvs[0]);
   IndexedValue A(getOperand(0)), B(getOperand(1)), C(getOperand(2));
   IndexHandle zero(constant_index(0));
   ValueHandle zerof =
       constant_float(llvm::APFloat::getZero(fTy.getFloatSemantics()), fTy);
   ValueHandle cond = r_k == zero;
   ValueHandle scalarC = select(cond, zerof, *C(i, j));
   C(i, j) = scalarC + A(i, r_k) * B(r_k, j);
 }
	//===- TensorOps.cpp - Implementation of the linalg TensorOps operation ---===//
	//
	// Copyright 2019 The MLIR Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	// =============================================================================
	//
	// This file implements a simple IR operation to create new tensor computation
	// operations in the linalg dialect.
	//
	//===----------------------------------------------------------------------===//

	#include "linalg1/Analysis.h"
	#include "linalg1/Common.h"
	#include "linalg3/Intrinsics.h"
	#include "linalg3/Ops.h"
	#include "mlir/IR/Builders.h"
	#include "mlir/IR/OpDefinition.h"
	#include "mlir/IR/OpImplementation.h"
	#include "mlir/IR/StandardTypes.h"
	#include "llvm/ADT/STLExtras.h"

	using namespace mlir;
	using namespace mlir::edsc;
	using namespace mlir::edsc::intrinsics;
	using namespace linalg;
	using namespace linalg::intrinsics;

	//////////////////////////////////////////////////////////////////////////////
	// Implementation of DotOp.
	//////////////////////////////////////////////////////////////////////////////
	SmallVector<AffineMap, 8> linalg::DotOp::loopsToOperandRangeMaps() {
	// A(K), B(K), C()
	assert(getRanges(*this).size() == 2);
	auto *context = ScopedContext::getContext();
	auto d0 = getAffineDimExpr(0, context); // K
	// A(K), B(K), C()
	// (d0) -> (d0, d0)(%k)
	return SmallVector<AffineMap, 8>{AffineMap::get(1, 0, {d0}, {}), // A(K)
	AffineMap::get(1, 0, {d0}, {}), // B(K)
	AffineMap()}; // C()
	}

	void linalg::DotOp::emitScalarImplementation(
	llvm::ArrayRef<Value > parallelIvs, llvm::ArrayRef<Value > reductionIvs) {
	using IndexedValue = TemplatedIndexedValue<linalg::intrinsics::load,
	linalg::intrinsics::store>;
	assert(reductionIvs.size() == 1);
	auto innermostLoop = getForInductionVarOwner(reductionIvs.back());
	auto *body = innermostLoop.getBody();
	using edsc::op::operator+;
	using edsc::op::operator*;
	using edsc::op::operator==;
	using edsc::intrinsics::select;

	// Account for affine.terminator in loop.
	FuncBuilder builder(body, std::prev(body->end(), 1));
	ScopedContext scope(builder, innermostLoop.getLoc());
	FloatType fTy = getOperand(0)
	->getType()
	.cast<ViewType>()
	.getElementType()
	.cast<FloatType>();
	IndexHandle zero(constant_index(0));
	ValueHandle zerof =
	constant_float(llvm::APFloat::getZero(fTy.getFloatSemantics()), fTy);
	IndexHandle r_i(reductionIvs[0]);
	IndexedValue A(getOperand(0)), B(getOperand(1)), C(getOperand(2));
	ValueHandle cond = (r_i == zero);
	ValueHandle scalarC = select(cond, zerof, *C());
	C() = scalarC + A(r_i) * B(r_i);
	}

	//////////////////////////////////////////////////////////////////////////////
	// Implementation of MatvecOp.
	//////////////////////////////////////////////////////////////////////////////
	SmallVector<AffineMap, 8> linalg::MatvecOp::loopsToOperandRangeMaps() {
	// A(M, K), B(K), C(M)
	assert(getRanges(*this).size() == 4);
	auto *context = ScopedContext::getContext();
	auto d0 = getAffineDimExpr(0, context); // M
	auto d1 = getAffineDimExpr(1, context); // K
	// A(M, K), B(K), C(M)
	// (d0, d1) -> (d0, d1, d1, d0)(%m, %k)
	return SmallVector<AffineMap, 8>{
	AffineMap::get(2, 0, {d0, d1}, {}), // A(M, K)
	AffineMap::get(2, 0, {d1}, {}), // B(K)
	AffineMap::get(2, 0, {d0}, {})}; // C(M)
	}

	// The body expression for matvec is: C(i) = scalarC + A(i, r_j) * B(r_j)
	// The body expression for dot is: C() = A(r_i) * B(r_i);
	// So we must drop the `i` loop from the matvec.
	void linalg::MatvecOp::writeAsFinerGrainTensorContraction() {
	auto *op = getOperation();
	auto vA(getInputView(0)), vB(getInputView(1)), *vC(getOutputView(0));
	auto indexingPosPair = getViewRootIndexing(vA, 0);
	assert(
	llvm::isa_and_nonnull<RangeOp>(indexingPosPair.first->getDefiningOp()));
	// clang-format off
	FuncBuilder builder(op);
	ScopedContext scope(builder, op->getLoc());
	IndexHandle i;
	using linalg::common::LoopNestRangeBuilder;
	LoopNestRangeBuilder(&i, ValueHandle(indexingPosPair.first))({
	[&i, &vA, &vB, &vC]() {
	ValueHandle sliceA = slice(vA, i, 0);
	ValueHandle sliceC = slice(vC, i, 0);
	dot(sliceA, vB, sliceC);
	/// NestedBuilders expect handles, we thus return an IndexHandle.
	return IndexHandle();
	}()
	});
	// clang-format on
	}

	void linalg::MatvecOp::emitScalarImplementation(
	llvm::ArrayRef<Value > parallelIvs, llvm::ArrayRef<Value > reductionIvs) {
	using IndexedValue = TemplatedIndexedValue<linalg::intrinsics::load,
	linalg::intrinsics::store>;
	assert(reductionIvs.size() == 1);
	auto innermostLoop = getForInductionVarOwner(reductionIvs.back());
	auto *body = innermostLoop.getBody();
	using edsc::op::operator+;
	using edsc::op::operator*;
	using edsc::op::operator==;
	using edsc::intrinsics::select;
	// Account for affine.terminator in loop.
	FuncBuilder builder(body, std::prev(body->end(), 1));
	ScopedContext scope(builder, innermostLoop.getLoc());
	FloatType fTy = getOperand(0)
	->getType()
	.cast<ViewType>()
	.getElementType()
	.cast<FloatType>();
	IndexHandle i(parallelIvs[0]), r_j(reductionIvs[0]);
	IndexedValue A(getOperand(0)), B(getOperand(1)), C(getOperand(2));
	IndexHandle zero(constant_index(0));
	ValueHandle zerof =
	constant_float(llvm::APFloat::getZero(fTy.getFloatSemantics()), fTy);
	ValueHandle cond = (r_j == zero);
	ValueHandle scalarC = select(cond, zerof, *C(i));
	C(i) = scalarC + A(i, r_j) * B(r_j);
	}

	//////////////////////////////////////////////////////////////////////////////
	// Implementation of Matmul.
	//////////////////////////////////////////////////////////////////////////////
	SmallVector<AffineMap, 8> linalg::MatmulOp::loopsToOperandRangeMaps() {
	// A(M, K), B(K, N), C(M, N)
	assert(getRanges(*this).size() == 6);
	auto *context = ScopedContext::getContext();
	auto d0 = getAffineDimExpr(0, context); // M
	auto d1 = getAffineDimExpr(1, context); // N
	auto d2 = getAffineDimExpr(2, context); // K
	// A(M, K), B(K, N), C(M, N):
	// (d0, d1, d2) -> (d0, d2, d2, d1, d0, d1)(%m, %n, %k)
	return SmallVector<AffineMap, 8>{
	AffineMap::get(3, 0, {d0, d2}, {}), // A(M, K)
	AffineMap::get(3, 0, {d2, d1}, {}), // B(K, N)
	AffineMap::get(3, 0, {d0, d1}, {}) // C(M, N)
	};
	}

	// The body expression for matmul is: C(i, j) = scalarC + A(i, r_k) * B(r_k, j)
	// The body expression for matvec is: C(i) = scalarC + A(i, r_j) * B(r_j)
	// So we must drop the `j` loop from the matmul.
	// This is fine because parallel dimensions permute: we can just do it
	// declaratively.
	void linalg::MatmulOp::writeAsFinerGrainTensorContraction() {
	auto *op = getOperation();
	auto vA(getInputView(0)), vB(getInputView(1)), *vC(getOutputView(0));
	auto indexingPosPair = getViewRootIndexing(vB, 1);
	assert(
	llvm::isa_and_nonnull<RangeOp>(indexingPosPair.first->getDefiningOp()));
	using linalg::common::LoopNestRangeBuilder;
	// clang-format off
	FuncBuilder builder(op);
	ScopedContext scope(builder, op->getLoc());
	IndexHandle j;
	LoopNestRangeBuilder(&j, ValueHandle(indexingPosPair.first))({
	[&j, &vA, &vB, &vC]() {
	ValueHandle sliceB = slice(vB, j, 1);
	ValueHandle sliceC = slice(vC, j, 1);
	matvec(vA, sliceB, sliceC);
	/// NestedBuilders expect handles, we thus return an IndexHandle.
	return IndexHandle();
	}()
	});
	// clang-format on
	}

	void linalg::MatmulOp::emitScalarImplementation(
	llvm::ArrayRef<Value > parallelIvs, llvm::ArrayRef<Value > reductionIvs) {
	using IndexedValue = TemplatedIndexedValue<linalg::intrinsics::load,
	linalg::intrinsics::store>;
	assert(reductionIvs.size() == 1);
	auto innermostLoop = getForInductionVarOwner(reductionIvs.back());
	auto *body = innermostLoop.getBody();
	using edsc::op::operator+;
	using edsc::op::operator*;
	using edsc::op::operator==;
	using edsc::intrinsics::select;
	// Account for affine.terminator in loop.
	FuncBuilder builder(body, std::prev(body->end(), 1));
	ScopedContext scope(builder, innermostLoop.getLoc());
	FloatType fTy = getOperand(0)
	->getType()
	.cast<ViewType>()
	.getElementType()
	.cast<FloatType>();
	IndexHandle i(parallelIvs[0]), j(parallelIvs[1]), r_k(reductionIvs[0]);
	IndexedValue A(getOperand(0)), B(getOperand(1)), C(getOperand(2));
	IndexHandle zero(constant_index(0));
	ValueHandle zerof =
	constant_float(llvm::APFloat::getZero(fTy.getFloatSemantics()), fTy);
	ValueHandle cond = r_k == zero;
	ValueHandle scalarC = select(cond, zerof, *C(i, j));
	C(i, j) = scalarC + A(i, r_k) * B(r_k, j);
	}