blob: abbe6ca01f3bd8a110aba1d2f29b74ef9bcdd953 [file] [log] [blame]
#pragma once
#ifdef CAFFE2_USE_MKL
#include <mkl.h>
#endif // CAFFE2_USE_MKL
#include <random>
namespace caffe2 {
namespace math {
// Returns the quantized and compressed values of floating inputs
// The "fused" representation stores the [bitwidth][tail][min][max]
// with the quantized data in one array. Since we store 8/bitwidth
// quantized data in one byte, the last buckets of some bytes may have
// unused bits. There are totally tail buckets are unused.
// We encode *bitwidth* and *tail* at the beginning,
// following by 32-bit floating data respresenting min and max.
// | bitwidth | tail | min | max | ... int8 data ... |
// | 1B | 1B | 4B | 4B | ...output_data....|
// In output_data: the b-th bucket of the i-th byte stores
// the i-th data of the b-th segment of input row
#ifdef CAFFE2_USE_MKL
#define FUSED_ROWWISE_RANDOM_QUANTIZATION_USE_MKL
#endif
void quantize_and_compress(
const float* input_data,
uint8_t* output_data,
size_t input_size,
size_t bitwidth,
bool random,
#ifdef FUSED_ROWWISE_RANDOM_QUANTIZATION_USE_MKL
VSLStreamStatePtr& vslStream,
std::vector<float>& random_buffer
#else
std::unique_ptr<std::uniform_real_distribution<float>>& dis,
std::minstd_rand& gen
#endif
);
void decompress_and_dequantize(
const uint8_t* input_data,
float* output_data,
size_t input_size);
} // namespace math
} // namespace caffe2