col max hist observer
Summary:
Add InputColumnMaxHistogramNetObserver and InputColumnMaxHistogramObserver to dnnlowp observers.
Sample output histogram at /mnt/public/amyyang/test/col_max_test.log (generated for ctr_web_feed)
```
columns:
"op_index",
"input_idx",
"blob_name",
"col_idx",
"min",
"max",
"nbins"
```
Test Plan: Tested with ctr_web_feed
Reviewed By: csummersea
Differential Revision: D18194229
fbshipit-source-id: 1402fcdc174a1f52744c850f5e2cc3bdc73c3a45
diff --git a/caffe2/quantization/server/activation_distribution_observer.cc b/caffe2/quantization/server/activation_distribution_observer.cc
index da43212..5d9afc4 100644
--- a/caffe2/quantization/server/activation_distribution_observer.cc
+++ b/caffe2/quantization/server/activation_distribution_observer.cc
@@ -43,6 +43,40 @@
fbgemm::FindMinMax(temp.data(), min, max, len);
}
+float* GetFloatTensorData(TensorCPU* tensor) {
+ float* data = nullptr;
+ vector<float> data_temp;
+ if (tensor->IsType<float>()) {
+ if (!tensor->data<float>()) {
+ return nullptr;
+ }
+ data = tensor->template data<float>();
+ } else if (tensor->IsType<int>()) {
+ if (!tensor->data<int>()) {
+ return nullptr;
+ }
+ const int* data_orig = tensor->data<int>();
+ data_temp.resize(tensor->numel());
+ for (int j = 0; j < tensor->numel(); ++j) {
+ data_temp[j] = data_orig[j];
+ }
+ data = data_temp.data();
+ } else if (tensor->IsType<long>()) {
+ if (!tensor->data<long>()) {
+ return nullptr;
+ }
+ const long* data_orig = tensor->data<long>();
+ data_temp.resize(tensor->numel());
+ for (int j = 0; j < tensor->numel(); ++j) {
+ data_temp[j] = data_orig[j];
+ }
+ data = data_temp.data();
+ } else {
+ return nullptr;
+ }
+ return data;
+}
+
template <>
void FindMinMax<float>(const float* data, float* min, float* max, int len) {
fbgemm::FindMinMax(data, min, max, len);
@@ -281,6 +315,71 @@
return;
}
+OutputColumnMaxHistogramObserver::OutputColumnMaxHistogramObserver(
+ OperatorBase* op,
+ const std::string& col_max_blob_name,
+ int nbins,
+ std::shared_ptr<HistogramObserver::Info> info)
+ : ObserverBase<OperatorBase>(op),
+ col_max_blob_name_(col_max_blob_name),
+ nbins_(nbins),
+ info_(info) {
+ const auto& output_names = op->debug_def().output();
+ auto it =
+ std::find(output_names.begin(), output_names.end(), col_max_blob_name);
+ CAFFE_ENFORCE(
+ it != output_names.end(), "Cannot find blob in operator output.");
+ col_max_blob_idx_ = std::distance(output_names.begin(), it);
+};
+
+void OutputColumnMaxHistogramObserver::Stop() {
+ if (!subject_->OutputIsTensorType(col_max_blob_idx_, CPU)) {
+ return;
+ }
+ Tensor* tensor = subject_->template Output<Tensor>(col_max_blob_idx_, CPU);
+ if (tensor->numel() == 0 || tensor->numel() == -1) {
+ return;
+ }
+
+ float* data = GetFloatTensorData(tensor);
+ if (data == nullptr && !warning_printed_) {
+ LOG(INFO) << "Tensor " << col_max_blob_name_
+ << " has mismatching type, or unsupported type "
+ << tensor->meta().name() << " with size " << tensor->numel();
+ warning_printed_ = true;
+ return;
+ }
+
+ // determine number of columns
+ CAFFE_ENFORCE(
+ tensor->dim() == 2,
+ "Tensor " + col_max_blob_name_ +
+ " is not two-dimensional. Tensor.dim() = " +
+ caffe2::to_string(tensor->dim()));
+ int num_columns = tensor->size_from_dim(1);
+ if (num_columns_ == -1) {
+ num_columns_ = num_columns;
+ }
+ CAFFE_ENFORCE(
+ num_columns_ == num_columns, "Observed inconsistent number of columns.");
+ int num_rows = tensor->size_to_dim(1);
+ for (int col = 0; col < num_columns; col++) {
+ // find col max of the ith column
+ auto col_max = std::abs(data[col]);
+ for (int r = 0; r < num_rows; r++) {
+ int idx = r * num_columns + col;
+ col_max = max(col_max, std::abs(data[idx]));
+ }
+ if (info_->histograms.size() <= col) {
+ info_->histograms.emplace_back(nbins_);
+ info_->total_histograms.emplace_back(nbins_);
+ info_->min_max_info.tensor_infos.emplace_back(col_max_blob_name_);
+ }
+ info_->histograms[col].Add(col_max);
+ info_->total_histograms[col].Add(col_max);
+ }
+}
+
HistogramNetObserver::HistogramNetObserver(
NetBase* subject,
const string& out_file_name,
@@ -407,6 +506,136 @@
return HasDNNLowPEngine_(op.debug_def());
}
+OutputColumnMaxHistogramNetObserver::OutputColumnMaxHistogramNetObserver(
+ NetBase* subject,
+ const std::string& out_file_name,
+ const std::vector<std::string>& observe_column_max_for_blobs,
+ int nbins,
+ int dump_freq,
+ bool mul_nets)
+ : NetObserver(subject),
+ dump_freq_(dump_freq),
+ cnt_(0),
+ mul_nets_(mul_nets),
+ out_file_name_(out_file_name) {
+ if (observe_column_max_for_blobs.size() == 0) {
+ return;
+ }
+ col_max_blob_names_.insert(
+ observe_column_max_for_blobs.begin(), observe_column_max_for_blobs.end());
+ int op_idx = 0;
+ for (auto* op : subject->GetOperators()) {
+ const auto& op_output_names = op->debug_def().output();
+ int output_idx = 0;
+ std::unordered_map<int, std::shared_ptr<HistogramObserver::Info>>
+ output_col_hists_map;
+ for (const auto& output_blob : op_output_names) {
+ if (col_max_blob_names_.find(output_blob) == col_max_blob_names_.end()) {
+ ++output_idx;
+ continue;
+ }
+ /// create col max hist observer for blob
+ auto info = std::make_shared<HistogramObserver::Info>();
+ info->min_max_info.type = op->debug_def().type();
+ // number of histograms in info will be determined at runtime by the
+ // number of columns in the tensor.
+ OutputColumnMaxHistogramObserver* observer =
+ new OutputColumnMaxHistogramObserver(op, output_blob, nbins, info);
+ op->AttachObserver(
+ unique_ptr<OutputColumnMaxHistogramObserver>(observer));
+ output_col_hists_map[output_idx] = info;
+ ++output_idx;
+ }
+ if (output_col_hists_map.size() > 0) {
+ hist_infos_[op_idx] = output_col_hists_map;
+ }
+ ++op_idx;
+ }
+}
+
+void OutputColumnMaxHistogramNetObserver::DumpAndReset_(
+ const std::string& out_file_name,
+ bool print_total_min_max) {
+ stringstream file_name;
+ file_name << out_file_name;
+ if (mul_nets_) {
+ file_name << ".";
+ file_name << this;
+ }
+ ofstream f(file_name.str());
+ if (!f) {
+ LOG(WARNING) << this << ": can't open " << file_name.str();
+ }
+ for (const auto& it : hist_infos_) {
+ auto output_idx_hists_map = it.second;
+ for (const auto& output_idx_hist : output_idx_hists_map) {
+ int output_idx = output_idx_hist.first;
+ HistogramObserver::Info* info = output_idx_hist.second.get();
+ if (!info) {
+ continue;
+ }
+ for (int i = 0; i < info->histograms.size(); ++i) {
+ const Histogram* hist =
+ (print_total_min_max ? info->total_histograms : info->histograms)[i]
+ .Finalize();
+ if (hist->Min() >= hist->Max()) {
+ LOG(WARNING) << "Histogram of "
+ << info->min_max_info.tensor_infos[i].name
+ << " has an empty range: min " << hist->Min()
+ << " and max " << hist->Max();
+ }
+ if (hist->GetHistogram()->empty()) {
+ LOG(WARNING) << "Histogram of "
+ << info->min_max_info.tensor_infos[i].name
+ << " is empty";
+ }
+ ostringstream ost;
+ // op_idx, output_idx, blob_name, col, min, max, nbins
+ ost << it.first << " " << output_idx << " "
+ << info->min_max_info.tensor_infos[i].name << " " << i << " "
+ << hist->Min() << " " << hist->Max() << " "
+ << hist->GetHistogram()->size();
+
+ // bins
+ for (uint64_t c : *hist->GetHistogram()) {
+ ost << " " << c;
+ }
+ if (print_total_min_max) {
+ LOG(INFO) << this << " " << ost.str();
+ }
+ f << ost.str() << endl;
+ if (!print_total_min_max) {
+ info->histograms[i] = DynamicHistogram(hist->GetHistogram()->size());
+ }
+ }
+ }
+ }
+ f.close();
+}
+
+void OutputColumnMaxHistogramNetObserver::Stop() {
+ ++cnt_;
+ if (dump_freq_ == -1 || (cnt_ % dump_freq_) != 0) {
+ return;
+ }
+ ostringstream ost;
+ size_t last_dot = out_file_name_.rfind('.');
+ size_t last_slash = out_file_name_.rfind('/');
+ if (last_dot != string::npos &&
+ (last_slash == string::npos || last_slash < last_dot)) {
+ ost << out_file_name_.substr(0, last_dot) << "_" << cnt_ / dump_freq_
+ << out_file_name_.substr(last_dot);
+ } else {
+ ost << out_file_name_ << "_" << cnt_ / dump_freq_;
+ }
+ DumpAndReset_(ost.str());
+ return;
+}
+
+OutputColumnMaxHistogramNetObserver::~OutputColumnMaxHistogramNetObserver() {
+ DumpAndReset_(out_file_name_, true);
+}
+
RegisterQuantizationParamsNetObserver::RegisterQuantizationParamsNetObserver(
NetBase* subject,
const string& min_max_file_name,
diff --git a/caffe2/quantization/server/activation_distribution_observer.h b/caffe2/quantization/server/activation_distribution_observer.h
index 72ecab3..56deb6e 100644
--- a/caffe2/quantization/server/activation_distribution_observer.h
+++ b/caffe2/quantization/server/activation_distribution_observer.h
@@ -98,6 +98,29 @@
bool warning_printed_ = false;
}; // class HistogramObserver
+/**
+ * Given min/max, collect histogram of the max value of each column of tensor
+ */
+class OutputColumnMaxHistogramObserver final
+ : public ObserverBase<OperatorBase> {
+ public:
+ explicit OutputColumnMaxHistogramObserver(
+ OperatorBase* op,
+ const std::string& col_max_blob_name,
+ int nbins,
+ std::shared_ptr<HistogramObserver::Info> info);
+
+ private:
+ void Stop() override;
+
+ std::string col_max_blob_name_;
+ int nbins_;
+ std::shared_ptr<HistogramObserver::Info> info_;
+ bool warning_printed_ = false;
+ int col_max_blob_idx_ = -1;
+ int num_columns_ = -1;
+}; // class OutputColumnMaxHistogramObserver
+
class HistogramNetObserver final : public NetObserver {
public:
/**
@@ -131,6 +154,34 @@
std::vector<std::shared_ptr<HistogramObserver::Info>> hist_infos_;
};
+class OutputColumnMaxHistogramNetObserver final : public NetObserver {
+ public:
+ explicit OutputColumnMaxHistogramNetObserver(
+ NetBase* subject,
+ const std::string& out_file_name,
+ const std::vector<std::string>& observe_column_max_for_blobs,
+ int nbins,
+ int dump_freq = -1,
+ bool mul_nets = false);
+ ~OutputColumnMaxHistogramNetObserver();
+
+ private:
+ void Stop() override;
+ void DumpAndReset_(
+ const std::string& out_file_name,
+ bool print_total_min_max = false);
+ int dump_freq_, cnt_;
+ bool mul_nets_;
+ const std::string out_file_name_;
+ std::unordered_set<std::string> col_max_blob_names_;
+
+ // {op_idx: {output_index: col_hists}}
+ std::unordered_map<
+ int,
+ std::unordered_map<int, std::shared_ptr<HistogramObserver::Info>>>
+ hist_infos_;
+};
+
/**
* Set quantization parameters of operators based on min/max
* collected from OutputMinMaxObserver
diff --git a/caffe2/quantization/server/observer_test.py b/caffe2/quantization/server/observer_test.py
index be8770e..7fd6301 100644
--- a/caffe2/quantization/server/observer_test.py
+++ b/caffe2/quantization/server/observer_test.py
@@ -1,11 +1,10 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
+
import numpy as np
from caffe2.python import core, workspace
from caffe2.quantization.server import dnnlowp_pybind11
+
net = core.Net("test_net")
X = np.array([[1, 2], [3, 4]]).astype(np.float32)
@@ -32,3 +31,7 @@
dnnlowp_pybind11.ObserveHistogramOfOutput("test_net.hist", 1)
workspace.CreateNet(net)
workspace.RunNet(net)
+
+dnnlowp_pybind11.AddOutputColumnMaxHistogramObserver(
+ net._net.name, "test_net._col_max_hist", ["X", "W"]
+)
diff --git a/caffe2/quantization/server/pybind.cc b/caffe2/quantization/server/pybind.cc
index 3c2cad0..9c4cce8 100644
--- a/caffe2/quantization/server/pybind.cc
+++ b/caffe2/quantization/server/pybind.cc
@@ -69,6 +69,40 @@
pybind11::arg("mul_nets") = false);
m.def(
+ "AddOutputColumnMaxHistogramObserver",
+ [](const string& net_name,
+ const string& out_file_name,
+ const std::vector<std::string>& observe_column_max_for_blobs,
+ int dump_freq,
+ bool mul_nets) {
+ Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();
+ CAFFE_ENFORCE(gWorkspace);
+ CAFFE_ENFORCE(
+ gWorkspace->GetNet(net_name), "Can't find net ", net_name);
+ pybind11::gil_scoped_release g;
+
+ NetBase* net = gWorkspace->GetNet(net_name);
+ const Observable<NetBase>::Observer* observer = nullptr;
+
+ observer = net->AttachObserver(
+ make_unique<OutputColumnMaxHistogramNetObserver>(
+ net,
+ out_file_name,
+ observe_column_max_for_blobs,
+ 2048,
+ dump_freq,
+ mul_nets));
+
+ CAFFE_ENFORCE(observer != nullptr);
+ return pybind11::cast(observer);
+ },
+ pybind11::arg("net_name"),
+ pybind11::arg("out_file_name"),
+ pybind11::arg("observe_column_max_for_blobs"),
+ pybind11::arg("dump_freq") = -1,
+ pybind11::arg("mul_nets") = false);
+
+ m.def(
"ChooseQuantizationParams",
[](const std::string& blob_name) {
Workspace* gWorkspace = caffe2::python::GetCurrentWorkspace();