include/mlir/Quantization/QuantOps.td - platform/external/tensorflow - Git at Google

 //===- QuantOps.td - Quantization operation definition -----*- tablegen -*-===//
 //
 // Copyright 2019 The MLIR Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // =============================================================================
 //
 // This is the operation definition file for Quantization.
 //
 //===----------------------------------------------------------------------===//

 #ifdef QUANTIZATION_OPS
 #else

 #ifdef OP_BASE
 #else
 include "mlir/IR/OpBase.td"
 include "mlir/Quantization/QuantPredicates.td"
 #endif // OP_BASE

 def quant_Dialect : Dialect {
   let name = "quant";
 }

 //===----------------------------------------------------------------------===//
 // Base classes
 //===----------------------------------------------------------------------===//

 class quant_Op<string mnemonic, list<OpTrait> traits> :
     Op<quant_Dialect, mnemonic, traits>;

 //===----------------------------------------------------------------------===//
 // Quantization casts
 //===----------------------------------------------------------------------===//
 // A QuantizeCast (qcast) represents a potential type shift from a quantizable
 // type to a quantized type.
 //
 // At runtime, a qcast will apply the transformation expressed by its
 // operand and result type. For flexibility during transformation, it is also
 // possible to have a qcast that performs no transformation (both its
 // operand and result type are quantizable).
 //
 // A qcast will typically originate from either:
 //   a) An expressed or implied constraint in the source dialect which signals
 //      that a certain level of quantization is possible or required.
 //   b) An inference made by a quantization algorithm indicating that a
 //      quantized representation may be acceptable.
 //
 // Especially early in transformation, it is common to have pairs of
 // qcast/dcast at points where a transition to a quantized type is
 // required. In addition, it is also common to have an identity qcast
 // (where the operand and result type are not quantized) at all points where
 // it is legal to use a quantized representation (but is not known to be
 // acceptable).
 def quant_QuantizeCastOp : quant_Op<"qcast", [NoSideEffect]> {
   let arguments = (ins quant_RealValueType:$arg);
   let results = (outs quant_RealValueType);
 }

 // A DequantizeCast op (dcast) represents the inverse of a qcast,
 // converting back from a quantized to quantizable (expressed) type.
 //
 // Like qcasts, a dcast is allowed to have both its operand and result
 // as non quantized types. This facilitates transformations and marks edges
 // where the computation must be carried out in the expressed type.
 //
 // Especially early in transformation, it is common to have dcasts on
 // all operands to ops that must operate with the expressed type (typically
 // math ops prior to lowering to target-specific, quantized kernels).
 def quant_DequantizeCastOp : quant_Op<"dcast", [NoSideEffect]> {
   let arguments = (ins quant_RealValueType:$arg);
   let results = (outs quant_RealValueType);
 }

 // A StorageCast (scast) represents a cast from or to a type based on the
 // storage type and a type based on a corresponding quantized type.
 //
 // This op exists to ensure type coherency for between parts of the computation
 // which are operating directly on an underlying storage type and those which
 // operate on quantized values.
 //
 // Examples from storage to quantized type:
 //   i8 -> !quant<"uniform[i8:f32]{1.0}">
 //   tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
 //   vector<4xi8> -> vector<4x!quant<"uniform[i8:f32]{1.0}">>
 def quant_StorageCastOp : quant_Op<"scast", [NoSideEffect]> {
   let arguments = (ins quant_RealOrStorageValueType:$arg);
   let results = (outs quant_RealOrStorageValueType);
   let hasCanonicalizer = 0b1;
 }

 //===----------------------------------------------------------------------===//
 // Training integration and instrumentation ops
 //===----------------------------------------------------------------------===//

 def quant_ConstFakeQuant : quant_Op<"const_fake_quant",
                                     [SameValueType, NoSideEffect]> {
   let summary =
       "Simulates the effect of uniform quantization with const range.";

   let description = [{
     Given a const min, max, num_bits and narrow_range attribute, applies the
     same uniform quantization simulation as is done by the TensorFlow
     fake_quant_with_min_max_args op. See the fakeQuantAttrsToType() utility
     method and the quant-convert-simulated-quantization pass for futher details.
   }];

   let arguments = (ins
     F32Tensor:$inputs,
     F32Attr:$min,
     F32Attr:$max,
     // The bitwidth of the quantization; between 2 and 16, inclusive.
     I64Attr:$num_bits,
     // Quantization range starts from 0 or 1; starts from 1 if true.
     DefaultValuedAttr<BoolAttr, "false">:$narrow_range
   );

   let results = (outs
     F32Tensor:$outputs
   );
 }

 def quant_StatisticsRefOp : quant_Op<"stats_ref", [SameValueType]> {
   let summary =
       "Indicates that statistics are resolved by reference.";

   let description = [{
     This op acts as an identity that, when encountered at runtime, should result
     in statistics being collected about about the value of its operand/result.
     Such statistics will be stored with the provided key, allowing this node
     to later be converted to a 'stats' op if statistics with that key have been
     encountered.
   }];

   let arguments = (ins
     quant_RealValueType:$arg,
     StrAttr:$statsKey
   );
   let results = (outs quant_RealValueType);
 }

 def quant_StatisticsOp : quant_Op<"stats", [SameValueType]> {
   let summary =
       "Identity op which associates statistics with the value.";

   let description = [{
     Associates statistics about the runtime ranges of values observed for
     evaluations of this node.

     Statistics about the entire type are reported in the 'layerStats' attribute
     and those for each axis, in the (optional) `axisStats` attribute. The
     interpretation of each is determined by the last dimension of its shape.
     Currently, only dim=2 is supported, which is interpreted as [min, max].

     `layerStats` must be a rank 1 tensor: [2]
     `axisStats` must be a rank 2 tensor: [N, 2], where N=the rank of `arg`.
   }];

   let arguments = (ins
     quant_RealValueType:$arg,
     ElementsAttr:$layerStats,
     OptionalAttr<ElementsAttr>:$axisStats);
   let results = (outs quant_RealValueType);

   let verifier = [{
     auto tensorArg = arg()->getType().dyn_cast<TensorType>();
     auto argRank = tensorArg ? tensorArg.getRank() : 0;
     // Verify layerStats attribute.
     {
       auto layerStatsType = layerStats().getType();
       if (!layerStatsType.getElementType().isa<FloatType>()) {
         return emitOpError(
             "layerStats must have a floating point element type");
       }
       if (layerStatsType.getRank() != 1 || layerStatsType.getDimSize(0) != 2) {
         return emitOpError("layerStats must have shape [2]");
       }
     }
     // Verify axisStats (optional) attribute.
     if (axisStats()) {
       auto axisStatsType = axisStats()->getType();
       if (!axisStatsType.getElementType().isa<FloatType>()) {
         return emitOpError("axisStats must have a floating point element type");
       }
       if (axisStatsType.getRank() != 2 ||
           axisStatsType.getDimSize(1) != 2 ||
           axisStatsType.getDimSize(0) != argRank) {
         return emitOpError("axisStats must have shape [N,2] "
                            "where N = the argument rank");
       }
     }
     return success();
   }];
 }

 def quant_CoupledRefOp : quant_Op<"coupled_ref", [SameValueType]> {
   let summary =
       "Indicates that one point of the computation is coupled to another.";

   let description = [{
     Ordinarily, relationships between ops for the purposes of determining
     compatible quantized types is explicit based on the use-def chain. However,
     in some situations, a use may be separated from its def by arbitrary
     external connections. In such a case, during analysis, all coupled_ref
     nodes in a module which share a coupledKey will be considered to be
     directly connected as via an identity op for the purpose of type inference.
   }];

   let arguments = (ins
     quant_RealValueType:$arg,
     StrAttr:$coupledKey);
   let results = (outs quant_RealValueType);
 }

 #endif // QUANT_OPS
	//===- QuantOps.td - Quantization operation definition ------ tablegen --===//
	//
	// Copyright 2019 The MLIR Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	// =============================================================================
	//
	// This is the operation definition file for Quantization.
	//
	//===----------------------------------------------------------------------===//

	#ifdef QUANTIZATION_OPS
	#else

	#ifdef OP_BASE
	#else
	include "mlir/IR/OpBase.td"
	include "mlir/Quantization/QuantPredicates.td"
	#endif // OP_BASE

	def quant_Dialect : Dialect {
	let name = "quant";
	}

	//===----------------------------------------------------------------------===//
	// Base classes
	//===----------------------------------------------------------------------===//

	class quant_Op<string mnemonic, list<OpTrait> traits> :
	Op<quant_Dialect, mnemonic, traits>;

	//===----------------------------------------------------------------------===//
	// Quantization casts
	//===----------------------------------------------------------------------===//
	// A QuantizeCast (qcast) represents a potential type shift from a quantizable
	// type to a quantized type.
	//
	// At runtime, a qcast will apply the transformation expressed by its
	// operand and result type. For flexibility during transformation, it is also
	// possible to have a qcast that performs no transformation (both its
	// operand and result type are quantizable).
	//
	// A qcast will typically originate from either:
	// a) An expressed or implied constraint in the source dialect which signals
	// that a certain level of quantization is possible or required.
	// b) An inference made by a quantization algorithm indicating that a
	// quantized representation may be acceptable.
	//
	// Especially early in transformation, it is common to have pairs of
	// qcast/dcast at points where a transition to a quantized type is
	// required. In addition, it is also common to have an identity qcast
	// (where the operand and result type are not quantized) at all points where
	// it is legal to use a quantized representation (but is not known to be
	// acceptable).
	def quant_QuantizeCastOp : quant_Op<"qcast", [NoSideEffect]> {
	let arguments = (ins quant_RealValueType:$arg);
	let results = (outs quant_RealValueType);
	}

	// A DequantizeCast op (dcast) represents the inverse of a qcast,
	// converting back from a quantized to quantizable (expressed) type.
	//
	// Like qcasts, a dcast is allowed to have both its operand and result
	// as non quantized types. This facilitates transformations and marks edges
	// where the computation must be carried out in the expressed type.
	//
	// Especially early in transformation, it is common to have dcasts on
	// all operands to ops that must operate with the expressed type (typically
	// math ops prior to lowering to target-specific, quantized kernels).
	def quant_DequantizeCastOp : quant_Op<"dcast", [NoSideEffect]> {
	let arguments = (ins quant_RealValueType:$arg);
	let results = (outs quant_RealValueType);
	}

	// A StorageCast (scast) represents a cast from or to a type based on the
	// storage type and a type based on a corresponding quantized type.
	//
	// This op exists to ensure type coherency for between parts of the computation
	// which are operating directly on an underlying storage type and those which
	// operate on quantized values.
	//
	// Examples from storage to quantized type:
	// i8 -> !quant<"uniform[i8:f32]{1.0}">
	// tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
	// vector<4xi8> -> vector<4x!quant<"uniform[i8:f32]{1.0}">>
	def quant_StorageCastOp : quant_Op<"scast", [NoSideEffect]> {
	let arguments = (ins quant_RealOrStorageValueType:$arg);
	let results = (outs quant_RealOrStorageValueType);
	let hasCanonicalizer = 0b1;
	}

	//===----------------------------------------------------------------------===//
	// Training integration and instrumentation ops
	//===----------------------------------------------------------------------===//

	def quant_ConstFakeQuant : quant_Op<"const_fake_quant",
	[SameValueType, NoSideEffect]> {
	let summary =
	"Simulates the effect of uniform quantization with const range.";

	let description = [{
	Given a const min, max, num_bits and narrow_range attribute, applies the
	same uniform quantization simulation as is done by the TensorFlow
	fake_quant_with_min_max_args op. See the fakeQuantAttrsToType() utility
	method and the quant-convert-simulated-quantization pass for futher details.
	}];

	let arguments = (ins
	F32Tensor:$inputs,
	F32Attr:$min,
	F32Attr:$max,
	// The bitwidth of the quantization; between 2 and 16, inclusive.
	I64Attr:$num_bits,
	// Quantization range starts from 0 or 1; starts from 1 if true.
	DefaultValuedAttr<BoolAttr, "false">:$narrow_range
	);

	let results = (outs
	F32Tensor:$outputs
	);
	}

	def quant_StatisticsRefOp : quant_Op<"stats_ref", [SameValueType]> {
	let summary =
	"Indicates that statistics are resolved by reference.";

	let description = [{
	This op acts as an identity that, when encountered at runtime, should result
	in statistics being collected about about the value of its operand/result.
	Such statistics will be stored with the provided key, allowing this node
	to later be converted to a 'stats' op if statistics with that key have been
	encountered.
	}];

	let arguments = (ins
	quant_RealValueType:$arg,
	StrAttr:$statsKey
	);
	let results = (outs quant_RealValueType);
	}

	def quant_StatisticsOp : quant_Op<"stats", [SameValueType]> {
	let summary =
	"Identity op which associates statistics with the value.";

	let description = [{
	Associates statistics about the runtime ranges of values observed for
	evaluations of this node.

	Statistics about the entire type are reported in the 'layerStats' attribute
	and those for each axis, in the (optional) `axisStats` attribute. The
	interpretation of each is determined by the last dimension of its shape.
	Currently, only dim=2 is supported, which is interpreted as [min, max].

	`layerStats` must be a rank 1 tensor: [2]
	`axisStats` must be a rank 2 tensor: [N, 2], where N=the rank of `arg`.
	}];

	let arguments = (ins
	quant_RealValueType:$arg,
	ElementsAttr:$layerStats,
	OptionalAttr<ElementsAttr>:$axisStats);
	let results = (outs quant_RealValueType);

	let verifier = [{
	auto tensorArg = arg()->getType().dyn_cast<TensorType>();
	auto argRank = tensorArg ? tensorArg.getRank() : 0;
	// Verify layerStats attribute.
	{
	auto layerStatsType = layerStats().getType();
	if (!layerStatsType.getElementType().isa<FloatType>()) {
	return emitOpError(
	"layerStats must have a floating point element type");
	}
	if (layerStatsType.getRank() != 1 \|\| layerStatsType.getDimSize(0) != 2) {
	return emitOpError("layerStats must have shape [2]");
	}
	}
	// Verify axisStats (optional) attribute.
	if (axisStats()) {
	auto axisStatsType = axisStats()->getType();
	if (!axisStatsType.getElementType().isa<FloatType>()) {
	return emitOpError("axisStats must have a floating point element type");
	}
	if (axisStatsType.getRank() != 2 \|\|
	axisStatsType.getDimSize(1) != 2 \|\|
	axisStatsType.getDimSize(0) != argRank) {
	return emitOpError("axisStats must have shape [N,2] "
	"where N = the argument rank");
	}
	}
	return success();
	}];
	}

	def quant_CoupledRefOp : quant_Op<"coupled_ref", [SameValueType]> {
	let summary =
	"Indicates that one point of the computation is coupled to another.";

	let description = [{
	Ordinarily, relationships between ops for the purposes of determining
	compatible quantized types is explicit based on the use-def chain. However,
	in some situations, a use may be separated from its def by arbitrary
	external connections. In such a case, during analysis, all coupled_ref
	nodes in a module which share a coupledKey will be considered to be
	directly connected as via an identity op for the purpose of type inference.
	}];

	let arguments = (ins
	quant_RealValueType:$arg,
	StrAttr:$coupledKey);
	let results = (outs quant_RealValueType);
	}

	#endif // QUANT_OPS