| // Copyright 2015 The Gemmlowp Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // output_stages.h: public definitions of the output stages that can |
| // be assembled into an output pipeline, to control how internal |
| // 32-bit accumulators are transformed to obtain the final uint8 |
| // result matrix entries. |
| |
| #ifndef GEMMLOWP_PUBLIC_OUTPUT_STAGES_H_ |
| #define GEMMLOWP_PUBLIC_OUTPUT_STAGES_H_ |
| |
| #include <tuple> |
| |
| #include "../internal/common.h" |
| |
| namespace gemmlowp { |
| |
| // This output stage takes int32 values and returns still int32 values, |
| // but "quantized down" to the uint8 scale; in other words, its output |
| // is typically what one would then clamp to [0..255] and cast to uint8 |
| // (see OutputStageSaturatingCastToUint8). |
| // |
| // This "quantization down" process depends on 3 parameters, |
| // result_offset, result_mult_int, result_shift, |
| // and the result is: |
| // ((input + result_offset) * result_mult_int + rounding) >> result_shift |
| // where |
| // rounding = (result_shift < 1) ? 0 : (1 << (result_shift - 1)); |
| struct OutputStageQuantizeDownInt32ToUint8Scale { |
| std::int32_t result_offset; |
| std::int32_t result_mult_int; |
| std::int32_t result_shift; |
| }; |
| |
| // This output stage takes int32 values and returns still int32 values, |
| // but "quantized down" to the uint8 scale; in other words, its output |
| // is typically what one would then clamp to [0..255] and cast to uint8 |
| // (see OutputStageSaturatingCastToUint8). |
| // |
| // This "quantization down" process depends on 3 parameters, |
| // result_offset, result_mult_int, result_shift, |
| // and the result is: |
| // ((input + result_offset) * result_mult_int + rounding) >> result_shift |
| // where |
| // rounding = (result_shift < 1) ? 0 : (1 << (result_shift - 1)); |
| // |
| // Difference from OutputStageQuantizeDownInt32ToUint8Scale here is that each |
| // row or column of the output (depending on tShape) has its own result_offset |
| // and result_mult_int numbers. |
| template <VectorShape tShape> |
| struct OutputStageQuantizeDownInt32ToUint8ScalePC { |
| VectorMap<const std::int32_t, tShape> result_offset; |
| VectorMap<const std::int32_t, tShape> result_mult_int; |
| std::int32_t result_shift; |
| }; |
| |
| // This output stage takes int32 values and returns still int32 values, |
| // but "quantized down" to a difference scale; for example, in a pipeline |
| // that outputs uint8 values in [0..255], the output of this stage would be |
| // int32 values ready to be clamped to [0..255] and casted to uint8 |
| // (see OutputStageSaturatingCastToUint8). |
| // |
| // This "quantization down" process depends on 3 parameters, |
| // result_offset, result_fixedpoint_multiplier, result_shift, |
| // and the result is: |
| // ((FixedPointMul(input, result_fixedpoint_multiplier) + |
| // rounding) >> result_shift) + result_offset_after_shift |
| // where |
| // rounding = (result_shift < 1) ? 0 : (1 << (result_shift - 1)); |
| // and where FixedPointMul(x, y) is the nearest integer to the following |
| // mathematical expression, evaluated without overflow or intermediate |
| // rounding: |
| // (x * y) / 2^31 |
| // In practice, it is expected that FixedPointMul will be implemented |
| // using hardware "rounding doubling int32 multiply high" instructions, |
| // such as VQRDMULH on ARM. See in fixedpoint.h the generic function, |
| // SaturatingRoundingDoublingHighMul. |
| // |
| // Notice that the other difference from |
| // OutputStageQuantizeDownInt32ToUint8Scale is that the result offset |
| // is applied after the multiplier and shift, not before. This ensures |
| // that no matter what the multiplier and shift are, the result offset |
| // is effectively integral: offsetting the final result by an integer. |
| // The motivation for this is to faithfully support quantization schemes |
| // where the formula linking quantized values to the real mathematical |
| // values that they represent, is of the form |
| // |
| // real_value = scale * (quantized_value - zero_point) |
| // |
| // where scale is a real number (represented in quantized form by |
| // result_fixedpoint_multiplier and result_shift) and zero_point |
| // is an integer telling which quantized value correspond to the |
| // real value 0, and is represented here by (the opposite of) |
| // result_offset_after_shift. |
| // The motivation for such a quantization scheme, designed to |
| // ensure that 0 is always a representable value, is that in |
| // many applications, we need to 0-pad arrays and that can only be |
| // done for quantized arrays if 0 is a representable value in |
| // quantized form. In particular, convolution-like operations |
| // are often implemented using 0-padding, or "im2col"-like |
| // expansions that implicitly rely on 0-padding. If 0 were not |
| // a representable value, such operations would have to pad |
| // using a nonzero value, introducing bias in the computation. |
| struct OutputStageQuantizeDownInt32ByFixedPoint { |
| std::int32_t result_fixedpoint_multiplier; |
| std::int32_t result_shift; |
| std::int32_t result_offset_after_shift; |
| }; |
| |
| // OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint is the old deprecated |
| // name of OutputStageQuantizeDownInt32ByFixedPoint, before we noticed that |
| // there really wasn't anything Uint8-specific about it. |
| using OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint = OutputStageQuantizeDownInt32ByFixedPoint; |
| |
| // Variant of OutputStageQuantizeDownInt32ByFixedPoint where the 'shift' |
| // is not necessarily just a right shift, so we can represent multipliers |
| // greater than 1. This takes an result_exponent parameter; when it's |
| // <= 0, this is equivalent to OutputStageQuantizeDownInt32ByFixedPoint |
| // with result_shift = -result_exponent. |
| // In the general case, this consists in first left-shifting by |
| // std::max(result_exponent, 0), before doing the same as |
| // OutputStageQuantizeDownInt32ByFixedPoint with |
| // result_shift = std::max(-result_exponent, 0). |
| struct OutputStageScaleInt32ByFixedPointAndExponent { |
| std::int32_t result_fixedpoint_multiplier; |
| std::int32_t result_exponent; |
| std::int32_t result_offset_after_shift; |
| }; |
| |
| // This output stage takes int32 values that are expected to be already |
| // on the final uint8 scale, but not necessarily in the [0..255] range. |
| // It clamps them to the [0..255] range and returns them casted to uint8. |
| struct OutputStageSaturatingCastToUint8 {}; |
| |
| // This output stage takes int32 values that are expected to be already |
| // on the final int16 scale, but not necessarily in the [-32768..32767] range. |
| // It clamps them to the [-32768..32767] range and returns them casted to int16. |
| struct OutputStageSaturatingCastToInt16 {}; |
| |
| // This output stage depends on a "bias vector" that should contain int32 |
| // entries, and be either a row-vector of the same number of columns as the |
| // result matrix, or a column-vector of the same number of rows as the |
| // result matrix. This output stage takes int32 values and adds to them |
| // the corresponding entry of the bias vector (broadcasted in the other |
| // direction to fit the matrix's shape), outputting int32 values. |
| template <typename VectorType> |
| struct OutputStageBiasAddition { |
| VectorType bias_vector; |
| }; |
| |
| // This output stage clamps value between the specified min and max bounds. |
| // It can be used to implement "rectified linear unit" activation functions |
| // in neural networks. |
| struct OutputStageClamp { |
| std::int32_t min; |
| std::int32_t max; |
| }; |
| |
| struct OutputStageTanh { |
| std::int32_t real_zero_as_int32; |
| std::int32_t real_amplitude_as_int32; |
| }; |
| |
| // An output pipeline is just a std::tuple of output stages. |
| // This function generates a standard output pipeline consisting of two stages: |
| // OutputStageQuantizeDownInt32ToUint8Scale, OutputStageSaturatingCastToUint8. |
| inline std::tuple<OutputStageQuantizeDownInt32ToUint8Scale, |
| OutputStageSaturatingCastToUint8> |
| MakeStandardOutputPipeline(std::int32_t result_offset, |
| std::int32_t result_mult_int, |
| std::int32_t result_shift) { |
| OutputStageQuantizeDownInt32ToUint8Scale quantize_down_stage; |
| quantize_down_stage.result_offset = result_offset; |
| quantize_down_stage.result_mult_int = result_mult_int; |
| quantize_down_stage.result_shift = result_shift; |
| OutputStageSaturatingCastToUint8 saturating_cast_stage; |
| return std::make_tuple(quantize_down_stage, saturating_cast_stage); |
| } |
| |
| // An output pipeline is just a std::tuple of output stages. |
| // This function generates a standard output pipeline consisting of two stages: |
| // OutputStageQuantizeDownInt32ToUint8ScalePC, OutputStageSaturatingCastToUint8. |
| template <VectorShape tShape> |
| inline std::tuple<OutputStageQuantizeDownInt32ToUint8ScalePC<tShape>, |
| OutputStageSaturatingCastToUint8> |
| MakeStandardOutputPipeline( |
| const VectorMap<const std::int32_t, tShape>& result_offset, |
| const VectorMap<const std::int32_t, tShape>& result_mult_int, |
| std::int32_t result_shift) { |
| OutputStageQuantizeDownInt32ToUint8ScalePC<tShape> quantize_down_stage; |
| quantize_down_stage.result_offset = result_offset; |
| quantize_down_stage.result_mult_int = result_mult_int; |
| quantize_down_stage.result_shift = result_shift; |
| OutputStageSaturatingCastToUint8 saturating_cast_stage; |
| return std::make_tuple(quantize_down_stage, saturating_cast_stage); |
| } |
| |
| } // namespace gemmlowp |
| |
| #endif // GEMMLOWP_PUBLIC_OUTPUT_STAGES_H_ |