Dump data for DocNN visualization
Summary:
- Dump instance activations, some statistics about each neuron for model introspection visualization in flow
- It is a part of minsuk's summer intern project. See the following link for high-level details: https://www.dropbox.com/s/m89rwpoomqkc9jb/aml-talk-nnvis-minsuk.pptx?dl=0
- Will combine the following two visualizations: https://our.intern.facebook.com/intern/fblearner/c2graphvis/13795371/ and https://our.intern.facebook.com/intern/fblearner/model-introspection-nn/11910201/
Differential Revision: D4303679
fbshipit-source-id: eeac699891b17cea0b29324d584937460a8d7a25
diff --git a/caffe2/python/introspect_vis.py b/caffe2/python/introspect_vis.py
new file mode 100644
index 0000000..68b3668
--- /dev/null
+++ b/caffe2/python/introspect_vis.py
@@ -0,0 +1,104 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import numpy as np
+import json
+
+
+class IntrospectVisData():
+
+ def __init__(self, model_name, first_outputs, meta_info, lab_arr):
+ self.model_name = model_name
+ self.max_num_instances = 1000
+ self.count = 0
+ self.instances = []
+ self.labels = [{"id": i, "name": c} for i, c in enumerate(lab_arr)]
+ self.is_multilabel = True if 'multi_label' in meta_info\
+ and meta_info['multi_label'] else False
+
+ self.neuron_groups = [{"idx": i, "name": b,
+ "size": len(first_outputs[2][i -
+ len(meta_info['output_names'])][0])}
+ for i, b in enumerate(meta_info['output_names'])]
+ self.summaries = map(lambda x: np.array([[0. for _ in range(x['size'])]
+ for _ in range(len(self.labels))]),
+ self.neuron_groups)
+
+ def getInstanceActivations(self, outputs):
+ outputs = outputs[(-1) * len(self.neuron_groups):]
+ return [[round(_val, 4) for _val in out[0]] for out in outputs]
+
+ def updateNeuronSummaries(self, activations, true_idxs):
+ self.count += 1
+ for out_idx in range(len(self.summaries)):
+ if self.is_multilabel:
+ for true_idx in true_idxs:
+ self.summaries[out_idx][true_idx] += activations[out_idx]
+ else:
+ self.summaries[out_idx][true_idxs] += activations[out_idx]
+
+ def appendInstance(self, instance):
+ self.instances.append(instance)
+
+ def processInstance(self, idx, labels, scores, outputs, model_specific):
+ activations = []
+ if self.model_name in ['DocNN']:
+ activations = self.getInstanceActivations(outputs)
+ self.updateNeuronSummaries(activations, labels)
+ if idx < self.max_num_instances:
+ if len(activations) == 0:
+ activations = self.getInstanceActivations(outputs),
+ instance = {
+ "id": idx,
+ "labels": labels,
+ "scores": scores,
+ "activations": activations,
+ }
+ for key, val in model_specific.items():
+ instance[key] = val
+ self.appendInstance(instance)
+
+ def updateArrangements(self):
+ if self.model_name in ['DocNN']:
+ # sort class scores based on score values
+ for instance in self.instances:
+ instance['scores'] =\
+ sorted([{"class_id": j, "score": round(_s, 3)}
+ for j, _s in enumerate(instance['scores'])],
+ key=lambda x: x['score'], reverse=True)
+ # instance positions based on scores
+ inst_sort_vals = [[] for _ in range(len(self.labels))]
+ for i, x in enumerate(self.instances):
+ sort_val = 1.0
+ # if multi_label, get the first label
+ label = x['labels'] if type(x['labels']) == int\
+ else x['labels'][0]
+ if label == x['scores'][0]['class_id']:
+ # How much score difference from that of rank 2 class
+ sort_val = x['scores'][0]['score'] - x['scores'][1]['score']
+ else:
+ # How much score difference from that of rank 1 class
+ sort_val = x['scores'][label]['score'] -\
+ x['scores'][0]['score']
+ inst_sort_vals[label].append({"inst_id": i, "val": sort_val})
+ for class_id, inst_vals in enumerate(inst_sort_vals):
+ for i, r in enumerate(sorted(inst_vals, key=lambda x: x['val'],
+ reverse=True)):
+ self.instances[r['inst_id']]['position'] = i
+
+ def postprocess(self, filepath):
+ self.neuron_summaries = [np.around((np.swapaxes(_s, 0, 1) /
+ float(self.count)), 4).tolist()
+ for _s in self.summaries] if self.count > 0 else None
+
+ self.updateArrangements()
+
+ with open(filepath, 'w') as vf:
+ json.dump({
+ "model_type": self.model_name,
+ "neuron_groups": self.neuron_groups,
+ "classes": self.labels,
+ "instances": self.instances,
+ "neuron_summaries": self.neuron_summaries,
+ }, vf)