Refactoring of the parameters step 0. Add simple tags and unify interface for params and computed_params.

Summary:
This diff is the first step in the effort for refactoring all parameters. As a first step - I'm merging concept of params and computed_params, that is going
to be based on tags instead (in the first version it's still using old data structs to store all the BlobReferences).

Renaming computed_params to non-trainable/non-backprop params should be done is some other diff.

Reviewed By: salexspb

Differential Revision: D5171159

fbshipit-source-id: 68031ca779f053fb266a7c4a2e5b482a3bd9c832
diff --git a/caffe2/python/data_parallel_model.py b/caffe2/python/data_parallel_model.py
index 5dc1a3c..e980c49 100644
--- a/caffe2/python/data_parallel_model.py
+++ b/caffe2/python/data_parallel_model.py
@@ -129,7 +129,7 @@
 
     # computed params
     computed_params_grouped =\
-        _GroupByDevice(devices, model_helper_obj.computed_params, [])
+        _GroupByDevice(devices, model_helper_obj.GetComputedParams(''), [])
     model_helper_obj._device_grouped_blobs.update(computed_params_grouped)
 
     model_helper_obj._param_names =\
diff --git a/caffe2/python/helpers/conv.py b/caffe2/python/helpers/conv.py
index ba1f50a..acc1131 100644
--- a/caffe2/python/helpers/conv.py
+++ b/caffe2/python/helpers/conv.py
@@ -7,6 +7,7 @@
 
 from caffe2.python import core
 from caffe2.python.modeling import initializers
+from caffe2.python.modeling.parameter_info import ParameterTags
 
 def _ConvBase(
     model,
@@ -65,27 +66,24 @@
         weight = model.create_param(
             param_name=blob_out + '_w',
             shape=weight_shape,
-            initializer=weight_initializer)
+            initializer=weight_initializer,
+            tags=ParameterTags.WEIGHT
+        )
         if use_bias:
             bias = model.create_param(
                 param_name=blob_out + '_b',
                 shape=[dim_out, ],
-                initializer=bias_initializer)
+                initializer=bias_initializer,
+                tags=ParameterTags.BIAS
+            )
     else:
         weight = core.ScopedBlobReference(
             blob_out + '_w', model.param_init_net)
+        model.AddParameter(weight)
         if use_bias:
             bias = core.ScopedBlobReference(
                 blob_out + '_b', model.param_init_net)
-    if use_bias:
-        model.params.extend([weight, bias])
-    else:
-        model.params.extend([weight])
-
-    model.weights.append(weight)
-
-    if use_bias:
-        model.biases.append(bias)
+            model.AddParameter(bias, ParameterTags.BIAS)
 
     if use_bias:
         inputs = [blob_in, weight, bias]
@@ -206,9 +204,8 @@
             blob_out + '_w', model.param_init_net)
         bias = core.ScopedBlobReference(
             blob_out + '_b', model.param_init_net)
-    model.params.extend([weight, bias])
-    model.weights.append(weight)
-    model.biases.append(bias)
+    model.AddParameter(weight, ParameterTags.WEIGHT)
+    model.AddParameter(bias, ParameterTags.BIAS)
     if use_cudnn:
         kwargs['engine'] = 'CUDNN'
         kwargs['exhaustive_search'] = cudnn_exhaustive_search
@@ -314,13 +311,9 @@
             if use_bias:
                 bias = core.ScopedBlobReference(
                     blob_out + '_gconv_%d_b' % i, model.param_init_net)
+        model.AddParameter(weight, ParameterTags.WEIGHT)
         if use_bias:
-            model.params.extend([weight, bias])
-        else:
-            model.params.extend([weight])
-        model.weights.append(weight)
-        if use_bias:
-            model.biases.append(bias)
+            model.AddParameter(bias, ParameterTags.BIAS)
         if use_bias:
             inputs = [weight, bias]
         else:
diff --git a/caffe2/python/helpers/fc.py b/caffe2/python/helpers/fc.py
index d8dfe52..3c5453f 100644
--- a/caffe2/python/helpers/fc.py
+++ b/caffe2/python/helpers/fc.py
@@ -7,6 +7,7 @@
 
 from caffe2.python import core
 from caffe2.python.modeling import initializers
+from caffe2.python.modeling.parameter_info import ParameterTags
 
 
 def _FC_or_packed_FC(
@@ -22,16 +23,22 @@
     )
 
     blob_out = blob_out or model.net.NextName()
+    bias_tags = [ParameterTags.BIAS]
+    if 'freeze_bias' in kwargs:
+        bias_tags.append(ParameterTags.COMPUTED_PARAM)
+
     if model.init_params:
         weight = model.create_param(
             param_name=blob_out + '_w',
             shape=[dim_out, dim_in],
             initializer=WeightInitializer,
+            tags=ParameterTags.WEIGHT
         )
         bias = model.create_param(
             param_name=blob_out + '_b',
             shape=[dim_out, ],
             initializer=BiasInitializer,
+            tags=bias_tags
         )
     else:
         weight = core.ScopedBlobReference(
@@ -39,13 +46,9 @@
         bias = core.ScopedBlobReference(
             blob_out + '_b', model.param_init_net)
 
-    if 'freeze_bias' in kwargs:
-        model.params.extend([weight])
-    else:
-        model.params.extend([weight, bias])
+        model.AddParameter(weight, ParameterTags.WEIGHT)
+        model.AddParameter(bias, bias_tags)
 
-    model.weights.append(weight)
-    model.biases.append(bias)
     return op_call([blob_in, weight, bias], blob_out, **kwargs)
 
 
@@ -59,34 +62,34 @@
 
 def fc_decomp(
     model, blob_in, blob_out, dim_in, dim_out,
-    rank_approx=5, weight_init=None,
-    bias_init=None, **kwargs
+    rank_approx=5, weight_init=None, bias_init=None,
+    WeightInitializer=None, BiasInitializer=None, **kwargs
 ):
     """FC_Decomp version
     Here we assume that the rank of original input is bigger than 5.
     """
-    weight_init = weight_init if weight_init else ('XavierFill', {})
-    bias_init = bias_init if bias_init else ('ConstantFill', {})
+    WeightInitializer = initializers.update_initializer(
+        WeightInitializer, weight_init, ("XavierFill", {})
+    )
+    BiasInitializer = initializers.update_initializer(
+        BiasInitializer, bias_init, ("ConstantFill", {})
+    )
     blob_out = blob_out or model.net.NextName()
-    u = model.param_init_net.__getattr__(weight_init[0])(
-        [],
-        blob_out + '_u',
+    u = model.create_param(
+        param_name=blob_out + '_u',
         shape=[dim_out, rank_approx],
-        **weight_init[1]
+        initializer=WeightInitializer,
     )
-    v = model.param_init_net.__getattr__(weight_init[0])(
-        [],
-        blob_out + '_v',
+    v = model.create_param(
+        param_name=blob_out + '_v',
         shape=[dim_in, rank_approx],
-        **weight_init[1]
+        initializer=WeightInitializer,
     )
-    bias = model.param_init_net.__getattr__(bias_init[0])(
-        [],
-        blob_out + '_b',
+    bias = model.create_param(
+        param_name=blob_out + '_b',
         shape=[dim_out, ],
-        **bias_init[1]
+        initializer=BiasInitializer,
     )
-    model.params.extend([u, v, bias])
     return model.net.FC_Decomp([blob_in, u, v, bias], blob_out, **kwargs)
 
 
@@ -164,7 +167,8 @@
         thres = core.ScopedBlobReference(
             blob_out + '_thres', model.param_init_net)
 
-    model.params.extend([weight, bias])
+    model.AddParameter(weight)
+    model.AddParameter(bias)
     if need_compress_rate:
         return model.net.FC_Prune([blob_in, weight, mask, bias, ag_dw, mask_seq,
                                    thres, compress_lb],
@@ -183,6 +187,9 @@
     """FC_Sparse: Only takes in alocated weights"""
     if not (w_csr and iw and jw and bias):
         print("Warning...")
-    model.params.extend([w_csr, iw, jw, bias])
+    model.AddParameter(w_csr)
+    model.AddParameter(iw)
+    model.AddParameter(jw)
+    model.AddParameter(bias)
     return model.net.FC_Sparse([blob_in, w_csr, iw, jw, bias],
                                blob_out, **kwargs)
diff --git a/caffe2/python/helpers/nonlinearity.py b/caffe2/python/helpers/nonlinearity.py
index ba75acf..954efdf 100644
--- a/caffe2/python/helpers/nonlinearity.py
+++ b/caffe2/python/helpers/nonlinearity.py
@@ -24,7 +24,7 @@
         slope = core.ScopedBlobReference(
             blob_out + '_slope', model.param_init_net)
 
-    model.params.extend([slope])
+    model.AddParameter(slope)
 
     return model.net.PRelu([blob_in, slope], [blob_out])
 
diff --git a/caffe2/python/helpers/normalization.py b/caffe2/python/helpers/normalization.py
index 03a9a52..94dcc59 100644
--- a/caffe2/python/helpers/normalization.py
+++ b/caffe2/python/helpers/normalization.py
@@ -6,6 +6,7 @@
 from __future__ import unicode_literals
 
 from caffe2.python import core, scope
+from caffe2.python.modeling.parameter_info import ParameterTags
 from caffe2.proto import caffe2_pb2
 
 
@@ -54,9 +55,8 @@
             [], blob_out + "_" + suffix, shape=[dim_in], value=value)
     scale, bias = init_blob(1.0, "s"), init_blob(0.0, "b")
 
-    model.params.extend([scale, bias])
-    model.weights.append(scale)
-    model.biases.append(bias)
+    model.AddParameter(scale, ParameterTags.WEIGHT)
+    model.AddParameter(bias, ParameterTags.BIAS)
     blob_outs = [blob_out, blob_out + "_sm", blob_out + "_siv"]
     if 'is_test' in kwargs and kwargs['is_test']:
         blob_outputs = model.net.InstanceNorm(
@@ -99,10 +99,11 @@
         running_inv_var = core.ScopedBlobReference(
             blob_out + '_riv', model.param_init_net)
 
-    model.params.extend([scale, bias])
-    model.computed_params.extend([running_mean, running_inv_var])
-    model.weights.append(scale)
-    model.biases.append(bias)
+    model.AddParameter(running_mean, ParameterTags.COMPUTED_PARAM)
+    model.AddParameter(running_inv_var, ParameterTags.COMPUTED_PARAM)
+    model.AddParameter(scale, ParameterTags.WEIGHT)
+    model.AddParameter(bias, ParameterTags.BIAS)
+
     blob_outs = [blob_out, running_mean, running_inv_var,
                  blob_out + "_sm", blob_out + "_siv"]
     if 'is_test' in kwargs and kwargs['is_test']:
diff --git a/caffe2/python/model_helper.py b/caffe2/python/model_helper.py
index f2c10f1..3597544 100644
--- a/caffe2/python/model_helper.py
+++ b/caffe2/python/model_helper.py
@@ -7,10 +7,9 @@
 
 from caffe2.python import core, scope, workspace
 from caffe2.python.modeling import parameter_info
-
-
 import logging
 
+
 # _known_working_ops are operators that do not need special care.
 _known_working_ops = [
     "Accuracy",
@@ -83,12 +82,12 @@
             self.param_init_net = param_model.param_init_net
             self.param_to_grad = param_model.param_to_grad
             self.params = param_model.params
-            self.computed_params = param_model.computed_params
+            self._computed_params = param_model._computed_params
         else:
             self.param_init_net = core.Net(name + '_init')
             self.param_to_grad = {}
             self.params = []
-            self.computed_params = []
+            self._computed_params = []
 
         self._param_info_deprecated = []
         self._parameters_info = {}
@@ -128,7 +127,8 @@
         assert len(self._param_info_deprecated) <= len(self.params)
         for param in self.params[len(self._param_info_deprecated):]:
             if not isinstance(param, core.BlobReference):
-                raise ValueError("Param %s must be a BlobReference!" % str(param))
+                raise ValueError(
+                    "Param %s must be a BlobReference!" % str(param))
             self._param_info_deprecated.append(parameter_info.ParameterInfo(
                 param_id=len(self._param_info_deprecated),
                 param=param,
@@ -136,13 +136,16 @@
         for info in self._param_info_deprecated:
             info.grad = self.param_to_grad.get(info.name)
 
-    def create_param(self, param_name, shape, initializer):
+    def create_param(self, param_name, shape, initializer, tags=None):
         param_info = initializer.create_param(
             param_name=param_name,
             init_net=self.param_init_net,
             shape=shape,
         )
         self._parameters_info[param_info.blob] = param_info
+        # Add param to legacy structs as well, so all other functions for
+        # parameters are still working.
+        self.AddParameter(param_info.blob, tags)
         return param_info.blob
 
     def get_param_info(self, param):
@@ -155,11 +158,11 @@
     def add_param_DEPRECATED(self, param, key=None, shape=None, length=None):
         logging.warning("add_param method is DEPRECATED")
         self._update_param_info_deprecated()
+        self.AddParameter(param)
         if key is not None and self.net.input_record() is not None:
             idx = self.net.input_record().field_blobs().index(key)
             key = self.net.input_record().field_names()[idx]
         shape = shape if shape is not None else self._infer_param_shape(param)
-        self.params.append(param)
         if not isinstance(param, core.BlobReference):
             raise ValueError("Param %s must be a BlobReference!" % str(param))
         self._param_info_deprecated.append(parameter_info.ParameterInfo(
@@ -187,6 +190,22 @@
         else:
             return self._param_info_deprecated
 
+    def AddParameter(self, param, tags=None):
+        tags = tags or []
+        if isinstance(tags, list):
+            tags = set(tags)
+        else:
+            tags = set([tags])
+        if parameter_info.ParameterTags.COMPUTED_PARAM in tags:
+            self._computed_params.append(param)
+        else:
+            self.params.append(param)
+
+        if parameter_info.ParameterTags.WEIGHT in tags:
+            self.weights.append(param)
+        if parameter_info.ParameterTags.BIAS in tags:
+            self.biases.append(param)
+
     def GetParams(self, namescope=None, top_scope=False):
         '''
         Returns the params in current namescope
@@ -318,9 +337,9 @@
                 namescope += scope._NAMESCOPE_SEPARATOR
 
         if namescope == '':
-            return self.computed_params[:]
+            return self._computed_params[:]
         else:
-            return [p for p in self.computed_params
+            return [p for p in self._computed_params
                     if p.GetNameScope() == namescope]
 
     def GetAllParams(self, namescope=None):
diff --git a/caffe2/python/modeling/parameter_info.py b/caffe2/python/modeling/parameter_info.py
index 3ab7c7e..28c6c2a 100644
--- a/caffe2/python/modeling/parameter_info.py
+++ b/caffe2/python/modeling/parameter_info.py
@@ -7,12 +7,19 @@
 import numpy as np
 
 
+class ParameterTags(object):
+    BIAS = 'BIAS'
+    WEIGHT = 'WEIGHT'
+    COMPUTED_PARAM = 'COMPUTED_PARAM'
+
+
 class ParameterType(object):
     DENSE = 'dense'
     SPARSE = 'sparse'
 
 
 class ParameterInfo(object):
+
     def __init__(
             self, param_id, param, key=None, shape=None, length=None,
             grad=None, blob_copy=None):