caffe2/quantization/server/dynamic_histogram.cc - platform/external/pytorch - Git at Google

 #include "dynamic_histogram.h"
 #include "dnnlowp_op.h"

 #include <cassert>
 #include <cmath>
 #include <limits>

 #ifdef _OPENMP
 #include <omp.h>
 #endif

 namespace dnnlowp {

 using namespace std;

 void Histogram::Add(float f, uint64_t cnt) {
   int nbins = histogram_.size();
   float bin_width = (max_ - min_) / nbins;
   int bin = bin_width == 0
       ? 0
       : std::min(static_cast<int>((f - min_) / bin_width), nbins - 1);
   bin = std::max(0, bin);
   assert(bin >= 0);
   histogram_[bin] += cnt;
 }

 void Histogram::Add(const float* f, int len) {
   int nbins = histogram_.size();
   float bin_width = (max_ - min_) / nbins;

   if (bin_width > 0.0) {
     assert(per_thread_histogram_.size() % nbins == 0);

     // Check if dnnlowp_get_max_threads has been reduced, and if so reduce
     // per-thread histogram and clear them.
     int old_nthreads = per_thread_histogram_.size() / nbins + 1;
     if (caffe2::dnnlowp_get_max_threads() < old_nthreads) {
       Finalize();
     }

     per_thread_histogram_.resize(
         (caffe2::dnnlowp_get_max_threads() - 1) * nbins);

 #ifdef _OPENMP
 #pragma omp parallel
 #endif
     {
       int tid = caffe2::dnnlowp_get_thread_num();

       uint64_t* my_histogram = nullptr;
       if (tid == 0) {
         my_histogram = histogram_.data();
       } else {
         my_histogram = per_thread_histogram_.data() + (tid - 1) * nbins;
       }

 #ifdef _OPENMP
 #pragma omp for
 #endif
       for (auto i = 0; i < len; ++i) {
         int bin =
             std::min(static_cast<int>((f[i] - min_) / bin_width), nbins - 1);
         bin = std::max(0, bin);
         ++my_histogram[bin];
       }
     } // omp parallel
   } else {
     histogram_[0] += len;
   }
 }

 void Histogram::Finalize() {
   int nbins = histogram_.size();
   assert(per_thread_histogram_.size() % nbins == 0);
   int nthreads = per_thread_histogram_.size() / nbins + 1;

   if (nthreads > 1) {
     for (int bin = 0; bin < nbins; ++bin) {
       for (int i = 1; i < nthreads; ++i) {
         histogram_[bin] += per_thread_histogram_[(i - 1) * nbins + bin];
       }
     }
   }

   per_thread_histogram_.clear();
 }

 static const int OVER_BINNING_FACTOR = 4;

 DynamicHistogram::DynamicHistogram(int nbins)
     : nbins_(nbins),
       min_(numeric_limits<float>::max()),
       max_(numeric_limits<float>::lowest()) {
   assert(nbins_ > 0);
 }

 void DynamicHistogram::Add(float f) {
   min_ = std::min(min_, f);
   max_ = std::max(max_, f);

   if (histograms_.empty()) {
     histograms_.emplace_back(nbins_ * OVER_BINNING_FACTOR, f, f);
   } else {
     Histogram& curr_hist = histograms_.back();
     if (f < curr_hist.Min() || f > curr_hist.Max()) {
       float old_spread = curr_hist.Max() - curr_hist.Min();
       if (f < curr_hist.Min()) {
         float new_min;
         if (old_spread == 0) {
           new_min = f;
         } else {
           new_min = curr_hist.Min() -
               ceil((curr_hist.Min() - f) / old_spread) * old_spread;
         }
         histograms_.emplace_back(
             curr_hist.GetHistogram()->size(), new_min, curr_hist.Max());
       } else {
         float new_max;
         if (old_spread == 0) {
           new_max = f;
         } else {
           new_max = curr_hist.Max() +
               ceil((f - curr_hist.Max()) / old_spread) * old_spread;
         }
         histograms_.emplace_back(
             curr_hist.GetHistogram()->size(), curr_hist.Min(), new_max);
       }
     }
   }

   Histogram& new_hist = histograms_.back();
   new_hist.Add(f);
 }

 void DynamicHistogram::Add(const float* f, int len) {
   float minimum = min_, maximum = max_;
 #ifdef _OPENMP
 #pragma omp parallel for reduction(min : minimum) reduction(max : maximum)
 #endif
   for (int i = 0; i < len; ++i) {
     minimum = std::min(f[i], minimum);
     maximum = std::max(f[i], maximum);
   }
   min_ = minimum;
   max_ = maximum;

   if (histograms_.empty()) {
     histograms_.emplace_back(nbins_ * OVER_BINNING_FACTOR, min_, max_);
   } else {
     Histogram& curr_hist = histograms_.back();
     if (min_ < curr_hist.Min() || max_ > curr_hist.Max()) {
       float old_spread = curr_hist.Max() - curr_hist.Min();
       float new_min = curr_hist.Min(), new_max = curr_hist.Max();
       if (min_ < curr_hist.Min()) {
         if (old_spread == 0.0f) {
           new_min = min_;
         } else {
           new_min = curr_hist.Min() -
               ceil((curr_hist.Min() - min_) / old_spread) * old_spread;
         }
       }
       if (max_ > curr_hist.Max()) {
         old_spread = curr_hist.Max() - new_min;
         if (old_spread == 0.0f) {
           new_max = max_;
         } else {
           new_max = curr_hist.Max() +
               ceil((max_ - curr_hist.Max()) / old_spread) * old_spread;
         }
       }
       histograms_.emplace_back(
           curr_hist.GetHistogram()->size(), new_min, new_max);
     }
   }

   Histogram& new_hist = histograms_.back();
   new_hist.Add(f, len);
 }

 const Histogram* DynamicHistogram::Finalize() {
   if (final_histogram_.get()) {
     return final_histogram_.get();
   }

   final_histogram_.reset(new Histogram(nbins_, min_, max_));
   float dst_bin_width = (max_ - min_) / nbins_;

   for (Histogram& hist : histograms_) {
     hist.Finalize();

     const std::vector<uint64_t>& bins = *hist.GetHistogram();
     float src_bin_width = (hist.Max() - hist.Min()) / bins.size();

     for (int i = 0; i < bins.size(); ++i) {
       if (bins[i] == 0) {
         continue;
       }
       float src_bin_begin = hist.Min() + src_bin_width * i;
       float src_bin_end = src_bin_begin + src_bin_width;
       // dst_bin corresponds to the beginning of the src_bin
       // dst_bin2 corresponds to the end of the src_bin
       int dst_bin =
           dst_bin_width == 0 ? 0 : (src_bin_begin - min_) / dst_bin_width;
       float dst_bin_begin = min_ + dst_bin_width * dst_bin;
       float dst_bin_end = dst_bin_begin + dst_bin_width;
       int dst_bin2 =
           dst_bin_width == 0 ? 0 : (src_bin_end - min_) / dst_bin_width;
       // 1 src_bin is mapped to at most 2 dst bin
       assert(dst_bin2 <= dst_bin + 2);

       // dst_bin_cnt is the count from src_bin that should go to dst_bin
       // The remainder should go to dst_bin2
       // rint is the fastest way to round
       // (https://stackoverflow.com/questions/485525/round-for-float-in-c/5849630)
       uint64_t dst_bin_cnt = src_bin_width == 0
           ? 0
           : std::min(
                 static_cast<uint64_t>(rint(
                     (dst_bin_end - src_bin_begin) / src_bin_width * bins[i])),
                 bins[i]);

       final_histogram_->Add(dst_bin_begin + dst_bin_width / 2, dst_bin_cnt);
       if (dst_bin_cnt < bins[i]) {
         final_histogram_->Add(
             dst_bin_end + dst_bin_width / 2, bins[i] - dst_bin_cnt);
       }
     }
   } // for each histogram with different scales

   return final_histogram_.get();
 }

 } // namespace dnnlowp
	#include "dynamic_histogram.h"
	#include "dnnlowp_op.h"

	#include <cassert>
	#include <cmath>
	#include <limits>

	#ifdef _OPENMP
	#include <omp.h>
	#endif

	namespace dnnlowp {

	using namespace std;

	void Histogram::Add(float f, uint64_t cnt) {
	int nbins = histogram_.size();
	float bin_width = (max_ - min_) / nbins;
	int bin = bin_width == 0
	? 0
	: std::min(static_cast<int>((f - min_) / bin_width), nbins - 1);
	bin = std::max(0, bin);
	assert(bin >= 0);
	histogram_[bin] += cnt;
	}

	void Histogram::Add(const float* f, int len) {
	int nbins = histogram_.size();
	float bin_width = (max_ - min_) / nbins;

	if (bin_width > 0.0) {
	assert(per_thread_histogram_.size() % nbins == 0);

	// Check if dnnlowp_get_max_threads has been reduced, and if so reduce
	// per-thread histogram and clear them.
	int old_nthreads = per_thread_histogram_.size() / nbins + 1;
	if (caffe2::dnnlowp_get_max_threads() < old_nthreads) {
	Finalize();
	}

	per_thread_histogram_.resize(
	(caffe2::dnnlowp_get_max_threads() - 1) * nbins);

	#ifdef _OPENMP
	#pragma omp parallel
	#endif
	{
	int tid = caffe2::dnnlowp_get_thread_num();

	uint64_t* my_histogram = nullptr;
	if (tid == 0) {
	my_histogram = histogram_.data();
	} else {
	my_histogram = per_thread_histogram_.data() + (tid - 1) * nbins;
	}

	#ifdef _OPENMP
	#pragma omp for
	#endif
	for (auto i = 0; i < len; ++i) {
	int bin =
	std::min(static_cast<int>((f[i] - min_) / bin_width), nbins - 1);
	bin = std::max(0, bin);
	++my_histogram[bin];
	}
	} // omp parallel
	} else {
	histogram_[0] += len;
	}
	}

	void Histogram::Finalize() {
	int nbins = histogram_.size();
	assert(per_thread_histogram_.size() % nbins == 0);
	int nthreads = per_thread_histogram_.size() / nbins + 1;

	if (nthreads > 1) {
	for (int bin = 0; bin < nbins; ++bin) {
	for (int i = 1; i < nthreads; ++i) {
	histogram_[bin] += per_thread_histogram_[(i - 1) * nbins + bin];
	}
	}
	}

	per_thread_histogram_.clear();
	}

	static const int OVER_BINNING_FACTOR = 4;

	DynamicHistogram::DynamicHistogram(int nbins)
	: nbins_(nbins),
	min_(numeric_limits<float>::max()),
	max_(numeric_limits<float>::lowest()) {
	assert(nbins_ > 0);
	}

	void DynamicHistogram::Add(float f) {
	min_ = std::min(min_, f);
	max_ = std::max(max_, f);

	if (histograms_.empty()) {
	histograms_.emplace_back(nbins_ * OVER_BINNING_FACTOR, f, f);
	} else {
	Histogram& curr_hist = histograms_.back();
	if (f < curr_hist.Min() \|\| f > curr_hist.Max()) {
	float old_spread = curr_hist.Max() - curr_hist.Min();
	if (f < curr_hist.Min()) {
	float new_min;
	if (old_spread == 0) {
	new_min = f;
	} else {
	new_min = curr_hist.Min() -
	ceil((curr_hist.Min() - f) / old_spread) * old_spread;
	}
	histograms_.emplace_back(
	curr_hist.GetHistogram()->size(), new_min, curr_hist.Max());
	} else {
	float new_max;
	if (old_spread == 0) {
	new_max = f;
	} else {
	new_max = curr_hist.Max() +
	ceil((f - curr_hist.Max()) / old_spread) * old_spread;
	}
	histograms_.emplace_back(
	curr_hist.GetHistogram()->size(), curr_hist.Min(), new_max);
	}
	}
	}

	Histogram& new_hist = histograms_.back();
	new_hist.Add(f);
	}

	void DynamicHistogram::Add(const float* f, int len) {
	float minimum = min_, maximum = max_;
	#ifdef _OPENMP
	#pragma omp parallel for reduction(min : minimum) reduction(max : maximum)
	#endif
	for (int i = 0; i < len; ++i) {
	minimum = std::min(f[i], minimum);
	maximum = std::max(f[i], maximum);
	}
	min_ = minimum;
	max_ = maximum;

	if (histograms_.empty()) {
	histograms_.emplace_back(nbins_ * OVER_BINNING_FACTOR, min_, max_);
	} else {
	Histogram& curr_hist = histograms_.back();
	if (min_ < curr_hist.Min() \|\| max_ > curr_hist.Max()) {
	float old_spread = curr_hist.Max() - curr_hist.Min();
	float new_min = curr_hist.Min(), new_max = curr_hist.Max();
	if (min_ < curr_hist.Min()) {
	if (old_spread == 0.0f) {
	new_min = min_;
	} else {
	new_min = curr_hist.Min() -
	ceil((curr_hist.Min() - min_) / old_spread) * old_spread;
	}
	}
	if (max_ > curr_hist.Max()) {
	old_spread = curr_hist.Max() - new_min;
	if (old_spread == 0.0f) {
	new_max = max_;
	} else {
	new_max = curr_hist.Max() +
	ceil((max_ - curr_hist.Max()) / old_spread) * old_spread;
	}
	}
	histograms_.emplace_back(
	curr_hist.GetHistogram()->size(), new_min, new_max);
	}
	}

	Histogram& new_hist = histograms_.back();
	new_hist.Add(f, len);
	}

	const Histogram* DynamicHistogram::Finalize() {
	if (final_histogram_.get()) {
	return final_histogram_.get();
	}

	final_histogram_.reset(new Histogram(nbins_, min_, max_));
	float dst_bin_width = (max_ - min_) / nbins_;

	for (Histogram& hist : histograms_) {
	hist.Finalize();

	const std::vector<uint64_t>& bins = *hist.GetHistogram();
	float src_bin_width = (hist.Max() - hist.Min()) / bins.size();

	for (int i = 0; i < bins.size(); ++i) {
	if (bins[i] == 0) {
	continue;
	}
	float src_bin_begin = hist.Min() + src_bin_width * i;
	float src_bin_end = src_bin_begin + src_bin_width;
	// dst_bin corresponds to the beginning of the src_bin
	// dst_bin2 corresponds to the end of the src_bin
	int dst_bin =
	dst_bin_width == 0 ? 0 : (src_bin_begin - min_) / dst_bin_width;
	float dst_bin_begin = min_ + dst_bin_width * dst_bin;
	float dst_bin_end = dst_bin_begin + dst_bin_width;
	int dst_bin2 =
	dst_bin_width == 0 ? 0 : (src_bin_end - min_) / dst_bin_width;
	// 1 src_bin is mapped to at most 2 dst bin
	assert(dst_bin2 <= dst_bin + 2);

	// dst_bin_cnt is the count from src_bin that should go to dst_bin
	// The remainder should go to dst_bin2
	// rint is the fastest way to round
	// (https://stackoverflow.com/questions/485525/round-for-float-in-c/5849630)
	uint64_t dst_bin_cnt = src_bin_width == 0
	? 0
	: std::min(
	static_cast<uint64_t>(rint(
	(dst_bin_end - src_bin_begin) / src_bin_width * bins[i])),
	bins[i]);

	final_histogram_->Add(dst_bin_begin + dst_bin_width / 2, dst_bin_cnt);
	if (dst_bin_cnt < bins[i]) {
	final_histogram_->Add(
	dst_bin_end + dst_bin_width / 2, bins[i] - dst_bin_cnt);
	}
	}
	} // for each histogram with different scales

	return final_histogram_.get();
	}

	} // namespace dnnlowp