caffe2/perfkernels/math.h - platform/external/pytorch - Git at Google

 #pragma once

 #ifdef CAFFE2_USE_MKL
 #include <mkl.h>
 #endif // CAFFE2_USE_MKL

 #include <random>

 namespace caffe2 {

 namespace math {

 // Returns the quantized and compressed values of floating inputs
 // The "fused" representation stores the [bitwidth][tail][min][max]
 // with the quantized data in one array. Since we store 8/bitwidth
 // quantized data in one byte, the last buckets of some bytes may have
 // unused bits. There are totally tail buckets are unused.
 // We encode *bitwidth* and *tail* at the beginning,
 // following by 32-bit floating data respresenting min and max.
 // | bitwidth | tail | min | max | ... int8 data ... |
 // |    1B    |  1B  |  4B |  4B | ...output_data....|
 // In output_data: the b-th bucket of the i-th byte stores
 // the i-th data of the b-th segment of input row
 #ifdef CAFFE2_USE_MKL
 #define FUSED_ROWWISE_RANDOM_QUANTIZATION_USE_MKL
 #endif
 void quantize_and_compress(
     const float* input_data,
     uint8_t* output_data,
     size_t input_size,
     size_t bitwidth,
     bool random,
 #ifdef FUSED_ROWWISE_RANDOM_QUANTIZATION_USE_MKL
     VSLStreamStatePtr& vslStream,
     std::vector<float>& random_buffer
 #else
     std::unique_ptr<std::uniform_real_distribution<float>>& dis,
     std::minstd_rand& gen
 #endif
 );
 void decompress_and_dequantize(
     const uint8_t* input_data,
     float* output_data,
     size_t input_size);

 } // namespace math
 } // namespace caffe2
	#pragma once

	#ifdef CAFFE2_USE_MKL
	#include <mkl.h>
	#endif // CAFFE2_USE_MKL

	#include <random>

	namespace caffe2 {

	namespace math {

	// Returns the quantized and compressed values of floating inputs
	// The "fused" representation stores the [bitwidth][tail][min][max]
	// with the quantized data in one array. Since we store 8/bitwidth
	// quantized data in one byte, the last buckets of some bytes may have
	// unused bits. There are totally tail buckets are unused.
	// We encode bitwidth and tail at the beginning,
	// following by 32-bit floating data respresenting min and max.
	// \| bitwidth \| tail \| min \| max \| ... int8 data ... \|
	// \| 1B \| 1B \| 4B \| 4B \| ...output_data....\|
	// In output_data: the b-th bucket of the i-th byte stores
	// the i-th data of the b-th segment of input row
	#ifdef CAFFE2_USE_MKL
	#define FUSED_ROWWISE_RANDOM_QUANTIZATION_USE_MKL
	#endif
	void quantize_and_compress(
	const float* input_data,
	uint8_t* output_data,
	size_t input_size,
	size_t bitwidth,
	bool random,
	#ifdef FUSED_ROWWISE_RANDOM_QUANTIZATION_USE_MKL
	VSLStreamStatePtr& vslStream,
	std::vector<float>& random_buffer
	#else
	std::unique_ptr<std::uniform_real_distribution<float>>& dis,
	std::minstd_rand& gen
	#endif
	);
	void decompress_and_dequantize(
	const uint8_t* input_data,
	float* output_data,
	size_t input_size);

	} // namespace math
	} // namespace caffe2