blob: 1c7e6ff6854c6c8eef13ef8829258de744acffaf [file] [log] [blame]
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifdef INTEL_MKL
#define EIGEN_USE_THREADS
#include "mkldnn.h"
#include "mkldnn.hpp"
#include "mkldnn_types.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/type_traits.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/graph/mkl_graph_util.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/util/mkl_util.h"
using mkldnn::primitive_attr;
using mkldnn::prop_kind;
using mkldnn::reorder;
using mkldnn::stream;
namespace {
enum { QUANTIZE_MODE_SCALED };
enum {
// Round half to even: if the fraction of y is exactly 0.5, then round(y) is
// the nearest even integer to y.
// E.g., 23.5 gets rounded to 24, 24.5 gets rounded to 24, while -23.5 becomes
// -24, and -24.5 gets rounded to 24.
ROUND_HALF_TO_EVEN,
};
} // namespace
namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
// Quantizes a tensor from float to T, with user-specified min_range and
// max_range.
template <typename Device, typename T>
class MklQuantizeV2Op : public OpKernel {
public:
explicit MklQuantizeV2Op(OpKernelConstruction* ctx) : OpKernel(ctx) {
string mode_string;
OP_REQUIRES_OK(ctx, ctx->GetAttr("mode", &mode_string));
OP_REQUIRES(ctx, (mode_string == "SCALED"),
errors::InvalidArgument("mode must be scaled"));
mode_ = QUANTIZE_MODE_SCALED;
string round_mode_string;
OP_REQUIRES_OK(ctx, ctx->GetAttr("round_mode", &round_mode_string));
OP_REQUIRES(ctx, (round_mode_string == "HALF_TO_EVEN"),
errors::InvalidArgument("Round mode must be half to even"));
round_mode_ = ROUND_HALF_TO_EVEN;
}
~MklQuantizeV2Op() {}
void Compute(OpKernelContext* ctx) override {
const float input_min_range = ctx->input(1).flat<float>()(0);
const float input_max_range = ctx->input(2).flat<float>()(0);
float min_range = std::min(0.0f, input_min_range);
float max_range;
OP_REQUIRES(ctx, (input_max_range > input_min_range),
errors::InvalidArgument(
"input_max_range must be larger than input_min_range."));
// When the minimum and maximum ranges are too close together, nudge them
// apart by a small value so that they are slightly different. This helps
// us avoid creating ill-formed buffers where all quantized values map to
// the same float number. These kinds of buffers cause problems for
// downstream ops when they need to do calculations on them.
// We pick the value by making sure that zero is not more than 100x the
// overall range from the maximum, so that the value can be easily
// represented when we promote the quantized value to a higher
// intermediate bit depth, since that's a common requirement.
const float epsilon = std::max(1.0f, std::max(fabsf(input_min_range),
fabsf(input_max_range))) /
100.0f;
max_range = std::max(input_max_range, min_range + epsilon);
// Clamping the max_range to zero since max_range can also be negative.
max_range = std::max(0.0f, max_range);
auto cpu_engine = engine(engine::cpu, 0);
const unsigned int src_idx = 0;
const Tensor& src_tensor = MklGetInput(ctx, src_idx);
MklDnnShape src_mkl_shape;
GetMklShape(ctx, src_idx, &src_mkl_shape);
auto src_tf_shape = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetTfShape()
: src_tensor.shape();
auto src_dims = src_mkl_shape.IsMklTensor()
? src_mkl_shape.GetSizesAsMklDnnDims()
: TFShapeToMklDnnDims(src_tensor.shape());
auto output_dims = src_dims;
// Set the dst layout to be the best mkl layout based on dims and type.
memory::format dst_layout_type;
switch (src_tf_shape.dims()) {
case 1:
dst_layout_type = memory::format::x;
break;
case 2:
dst_layout_type = memory::format::nc;
break;
case 3:
dst_layout_type = memory::format::tnc;
break;
case 4:
dst_layout_type = memory::format::nhwc;
break;
case 5:
dst_layout_type = memory::format::ndhwc;
break;
default:
OP_REQUIRES_OK(ctx,
errors::Aborted("Input dims must be <= 5 and >= 1"));
return;
}
// Create reorder memory for src, dst: both are defined in mkl_util.h,
// they are wrapper
MklDnnData<float> src(&cpu_engine);
MklDnnData<T> dst(&cpu_engine);
auto src_md =
src_mkl_shape.IsMklTensor()
? src_mkl_shape.GetMklLayout()
: memory::desc(src_dims, MklDnnType<float>(), dst_layout_type);
src.SetUsrMem(src_md, &src_tensor);
memory::desc dst_md =
memory::desc(src_dims, MklDnnType<T>(), dst_layout_type);
auto dst_pd = src.GetUsrMemPrimDesc();
// Standard shape assignments for layout pass
MklDnnShape output_mkl_shape;
TensorShape output_tf_shape;
if (src_mkl_shape.IsMklTensor()) {
output_mkl_shape.SetMklTensor(true);
output_mkl_shape.SetMklLayout(&dst_md);
output_mkl_shape.SetElemType(MklDnnType<T>());
output_mkl_shape.SetTfLayout(src_mkl_shape.GetDimension(),
src_mkl_shape.GetSizesAsMklDnnDims(),
src_mkl_shape.GetTfDataFormat());
output_tf_shape.AddDim(dst_pd.get_size() / sizeof(T));
} else {
output_mkl_shape.SetMklTensor(false);
output_tf_shape = MklDnnDimsToTFShape(output_dims);
}
Tensor* output_tensor = nullptr;
AllocateOutputSetMklShape(ctx, 0, &output_tensor, output_tf_shape,
output_mkl_shape);
TensorShape min_tf_shape = {};
MklDnnShape min_mkl_shape;
min_mkl_shape.SetMklTensor(false);
Tensor* output_min_tensor = nullptr;
AllocateOutputSetMklShape(ctx, 1, &output_min_tensor, min_tf_shape,
min_mkl_shape);
TensorShape max_tf_shape = {};
MklDnnShape max_mkl_shape;
max_mkl_shape.SetMklTensor(false);
Tensor* output_max_tensor = nullptr;
AllocateOutputSetMklShape(ctx, 2, &output_max_tensor, max_tf_shape,
max_mkl_shape);
dst.SetUsrMem(dst_md, output_tensor);
// Estimating scales for quantization.
const int num_bits = sizeof(T) * 8;
const float max_abs = std::max(std::abs(min_range), std::abs(max_range));
const bool is_signed = std::is_signed<T>::value;
float target_range;
if (is_signed) {
max_range = max_abs;
min_range = -max_abs;
// If it is signed, we try to keep 0.0 being 0 and drop one bucket. For
// example, if it is 8 bits, we have the range [-127, 127]. So for input
// range of [-x, x], the scale should be 254/(2*x).
target_range = static_cast<float>((uint64_t{1} << (num_bits - 1)) - 1);
} else {
max_range = max_abs;
min_range = 0.0;
// If it is unsigned and num_bits == 8, the range with 8 bits is [0,
// 255]. If the input range is [0, x], then the scale is 255/x instead
// of 254 as in the case above.
target_range = static_cast<float>((uint64_t{1} << num_bits) - 1);
}
output_min_tensor->flat<float>()(0) = min_range;
output_max_tensor->flat<float>()(0) = max_range;
const float scale_factor = target_range / max_abs;
// Primitive creation and stream submit
std::vector<float> scales{scale_factor};
mkldnn::primitive_attr attr;
attr.set_output_scales(0, scales);
auto reorder_desc = reorder::primitive_desc(src.GetUsrMemPrimDesc(),
dst.GetUsrMemPrimDesc(), attr);
reorder my_reorder = reorder(reorder_desc, primitive::at(*src.GetUsrMem()),
*dst.GetUsrMem());
std::vector<primitive> net{my_reorder};
stream(stream::kind::eager).submit(net).wait();
}
private:
int mode_;
int round_mode_;
};
REGISTER_KERNEL_BUILDER(Name("_MklQuantizeV2")
.Device(DEVICE_CPU)
.TypeConstraint<quint8>("T")
.Label(mkl_op_registry::kMklQuantizedOpLabel),
MklQuantizeV2Op<CPUDevice, quint8>);
REGISTER_KERNEL_BUILDER(Name("_MklQuantizeV2")
.Device(DEVICE_CPU)
.TypeConstraint<qint8>("T")
.Label(mkl_op_registry::kMklQuantizedOpLabel),
MklQuantizeV2Op<CPUDevice, qint8>);
} // namespace tensorflow
#endif // INTEL_MKL