Refactoring of the parameters step 0. Add simple tags and unify interface for params and computed_params.
Summary:
This diff is the first step in the effort for refactoring all parameters. As a first step - I'm merging concept of params and computed_params, that is going
to be based on tags instead (in the first version it's still using old data structs to store all the BlobReferences).
Renaming computed_params to non-trainable/non-backprop params should be done is some other diff.
Reviewed By: salexspb
Differential Revision: D5171159
fbshipit-source-id: 68031ca779f053fb266a7c4a2e5b482a3bd9c832
diff --git a/caffe2/python/data_parallel_model.py b/caffe2/python/data_parallel_model.py
index 5dc1a3c..e980c49 100644
--- a/caffe2/python/data_parallel_model.py
+++ b/caffe2/python/data_parallel_model.py
@@ -129,7 +129,7 @@
# computed params
computed_params_grouped =\
- _GroupByDevice(devices, model_helper_obj.computed_params, [])
+ _GroupByDevice(devices, model_helper_obj.GetComputedParams(''), [])
model_helper_obj._device_grouped_blobs.update(computed_params_grouped)
model_helper_obj._param_names =\
diff --git a/caffe2/python/helpers/conv.py b/caffe2/python/helpers/conv.py
index ba1f50a..acc1131 100644
--- a/caffe2/python/helpers/conv.py
+++ b/caffe2/python/helpers/conv.py
@@ -7,6 +7,7 @@
from caffe2.python import core
from caffe2.python.modeling import initializers
+from caffe2.python.modeling.parameter_info import ParameterTags
def _ConvBase(
model,
@@ -65,27 +66,24 @@
weight = model.create_param(
param_name=blob_out + '_w',
shape=weight_shape,
- initializer=weight_initializer)
+ initializer=weight_initializer,
+ tags=ParameterTags.WEIGHT
+ )
if use_bias:
bias = model.create_param(
param_name=blob_out + '_b',
shape=[dim_out, ],
- initializer=bias_initializer)
+ initializer=bias_initializer,
+ tags=ParameterTags.BIAS
+ )
else:
weight = core.ScopedBlobReference(
blob_out + '_w', model.param_init_net)
+ model.AddParameter(weight)
if use_bias:
bias = core.ScopedBlobReference(
blob_out + '_b', model.param_init_net)
- if use_bias:
- model.params.extend([weight, bias])
- else:
- model.params.extend([weight])
-
- model.weights.append(weight)
-
- if use_bias:
- model.biases.append(bias)
+ model.AddParameter(bias, ParameterTags.BIAS)
if use_bias:
inputs = [blob_in, weight, bias]
@@ -206,9 +204,8 @@
blob_out + '_w', model.param_init_net)
bias = core.ScopedBlobReference(
blob_out + '_b', model.param_init_net)
- model.params.extend([weight, bias])
- model.weights.append(weight)
- model.biases.append(bias)
+ model.AddParameter(weight, ParameterTags.WEIGHT)
+ model.AddParameter(bias, ParameterTags.BIAS)
if use_cudnn:
kwargs['engine'] = 'CUDNN'
kwargs['exhaustive_search'] = cudnn_exhaustive_search
@@ -314,13 +311,9 @@
if use_bias:
bias = core.ScopedBlobReference(
blob_out + '_gconv_%d_b' % i, model.param_init_net)
+ model.AddParameter(weight, ParameterTags.WEIGHT)
if use_bias:
- model.params.extend([weight, bias])
- else:
- model.params.extend([weight])
- model.weights.append(weight)
- if use_bias:
- model.biases.append(bias)
+ model.AddParameter(bias, ParameterTags.BIAS)
if use_bias:
inputs = [weight, bias]
else:
diff --git a/caffe2/python/helpers/fc.py b/caffe2/python/helpers/fc.py
index d8dfe52..3c5453f 100644
--- a/caffe2/python/helpers/fc.py
+++ b/caffe2/python/helpers/fc.py
@@ -7,6 +7,7 @@
from caffe2.python import core
from caffe2.python.modeling import initializers
+from caffe2.python.modeling.parameter_info import ParameterTags
def _FC_or_packed_FC(
@@ -22,16 +23,22 @@
)
blob_out = blob_out or model.net.NextName()
+ bias_tags = [ParameterTags.BIAS]
+ if 'freeze_bias' in kwargs:
+ bias_tags.append(ParameterTags.COMPUTED_PARAM)
+
if model.init_params:
weight = model.create_param(
param_name=blob_out + '_w',
shape=[dim_out, dim_in],
initializer=WeightInitializer,
+ tags=ParameterTags.WEIGHT
)
bias = model.create_param(
param_name=blob_out + '_b',
shape=[dim_out, ],
initializer=BiasInitializer,
+ tags=bias_tags
)
else:
weight = core.ScopedBlobReference(
@@ -39,13 +46,9 @@
bias = core.ScopedBlobReference(
blob_out + '_b', model.param_init_net)
- if 'freeze_bias' in kwargs:
- model.params.extend([weight])
- else:
- model.params.extend([weight, bias])
+ model.AddParameter(weight, ParameterTags.WEIGHT)
+ model.AddParameter(bias, bias_tags)
- model.weights.append(weight)
- model.biases.append(bias)
return op_call([blob_in, weight, bias], blob_out, **kwargs)
@@ -59,34 +62,34 @@
def fc_decomp(
model, blob_in, blob_out, dim_in, dim_out,
- rank_approx=5, weight_init=None,
- bias_init=None, **kwargs
+ rank_approx=5, weight_init=None, bias_init=None,
+ WeightInitializer=None, BiasInitializer=None, **kwargs
):
"""FC_Decomp version
Here we assume that the rank of original input is bigger than 5.
"""
- weight_init = weight_init if weight_init else ('XavierFill', {})
- bias_init = bias_init if bias_init else ('ConstantFill', {})
+ WeightInitializer = initializers.update_initializer(
+ WeightInitializer, weight_init, ("XavierFill", {})
+ )
+ BiasInitializer = initializers.update_initializer(
+ BiasInitializer, bias_init, ("ConstantFill", {})
+ )
blob_out = blob_out or model.net.NextName()
- u = model.param_init_net.__getattr__(weight_init[0])(
- [],
- blob_out + '_u',
+ u = model.create_param(
+ param_name=blob_out + '_u',
shape=[dim_out, rank_approx],
- **weight_init[1]
+ initializer=WeightInitializer,
)
- v = model.param_init_net.__getattr__(weight_init[0])(
- [],
- blob_out + '_v',
+ v = model.create_param(
+ param_name=blob_out + '_v',
shape=[dim_in, rank_approx],
- **weight_init[1]
+ initializer=WeightInitializer,
)
- bias = model.param_init_net.__getattr__(bias_init[0])(
- [],
- blob_out + '_b',
+ bias = model.create_param(
+ param_name=blob_out + '_b',
shape=[dim_out, ],
- **bias_init[1]
+ initializer=BiasInitializer,
)
- model.params.extend([u, v, bias])
return model.net.FC_Decomp([blob_in, u, v, bias], blob_out, **kwargs)
@@ -164,7 +167,8 @@
thres = core.ScopedBlobReference(
blob_out + '_thres', model.param_init_net)
- model.params.extend([weight, bias])
+ model.AddParameter(weight)
+ model.AddParameter(bias)
if need_compress_rate:
return model.net.FC_Prune([blob_in, weight, mask, bias, ag_dw, mask_seq,
thres, compress_lb],
@@ -183,6 +187,9 @@
"""FC_Sparse: Only takes in alocated weights"""
if not (w_csr and iw and jw and bias):
print("Warning...")
- model.params.extend([w_csr, iw, jw, bias])
+ model.AddParameter(w_csr)
+ model.AddParameter(iw)
+ model.AddParameter(jw)
+ model.AddParameter(bias)
return model.net.FC_Sparse([blob_in, w_csr, iw, jw, bias],
blob_out, **kwargs)
diff --git a/caffe2/python/helpers/nonlinearity.py b/caffe2/python/helpers/nonlinearity.py
index ba75acf..954efdf 100644
--- a/caffe2/python/helpers/nonlinearity.py
+++ b/caffe2/python/helpers/nonlinearity.py
@@ -24,7 +24,7 @@
slope = core.ScopedBlobReference(
blob_out + '_slope', model.param_init_net)
- model.params.extend([slope])
+ model.AddParameter(slope)
return model.net.PRelu([blob_in, slope], [blob_out])
diff --git a/caffe2/python/helpers/normalization.py b/caffe2/python/helpers/normalization.py
index 03a9a52..94dcc59 100644
--- a/caffe2/python/helpers/normalization.py
+++ b/caffe2/python/helpers/normalization.py
@@ -6,6 +6,7 @@
from __future__ import unicode_literals
from caffe2.python import core, scope
+from caffe2.python.modeling.parameter_info import ParameterTags
from caffe2.proto import caffe2_pb2
@@ -54,9 +55,8 @@
[], blob_out + "_" + suffix, shape=[dim_in], value=value)
scale, bias = init_blob(1.0, "s"), init_blob(0.0, "b")
- model.params.extend([scale, bias])
- model.weights.append(scale)
- model.biases.append(bias)
+ model.AddParameter(scale, ParameterTags.WEIGHT)
+ model.AddParameter(bias, ParameterTags.BIAS)
blob_outs = [blob_out, blob_out + "_sm", blob_out + "_siv"]
if 'is_test' in kwargs and kwargs['is_test']:
blob_outputs = model.net.InstanceNorm(
@@ -99,10 +99,11 @@
running_inv_var = core.ScopedBlobReference(
blob_out + '_riv', model.param_init_net)
- model.params.extend([scale, bias])
- model.computed_params.extend([running_mean, running_inv_var])
- model.weights.append(scale)
- model.biases.append(bias)
+ model.AddParameter(running_mean, ParameterTags.COMPUTED_PARAM)
+ model.AddParameter(running_inv_var, ParameterTags.COMPUTED_PARAM)
+ model.AddParameter(scale, ParameterTags.WEIGHT)
+ model.AddParameter(bias, ParameterTags.BIAS)
+
blob_outs = [blob_out, running_mean, running_inv_var,
blob_out + "_sm", blob_out + "_siv"]
if 'is_test' in kwargs and kwargs['is_test']:
diff --git a/caffe2/python/model_helper.py b/caffe2/python/model_helper.py
index f2c10f1..3597544 100644
--- a/caffe2/python/model_helper.py
+++ b/caffe2/python/model_helper.py
@@ -7,10 +7,9 @@
from caffe2.python import core, scope, workspace
from caffe2.python.modeling import parameter_info
-
-
import logging
+
# _known_working_ops are operators that do not need special care.
_known_working_ops = [
"Accuracy",
@@ -83,12 +82,12 @@
self.param_init_net = param_model.param_init_net
self.param_to_grad = param_model.param_to_grad
self.params = param_model.params
- self.computed_params = param_model.computed_params
+ self._computed_params = param_model._computed_params
else:
self.param_init_net = core.Net(name + '_init')
self.param_to_grad = {}
self.params = []
- self.computed_params = []
+ self._computed_params = []
self._param_info_deprecated = []
self._parameters_info = {}
@@ -128,7 +127,8 @@
assert len(self._param_info_deprecated) <= len(self.params)
for param in self.params[len(self._param_info_deprecated):]:
if not isinstance(param, core.BlobReference):
- raise ValueError("Param %s must be a BlobReference!" % str(param))
+ raise ValueError(
+ "Param %s must be a BlobReference!" % str(param))
self._param_info_deprecated.append(parameter_info.ParameterInfo(
param_id=len(self._param_info_deprecated),
param=param,
@@ -136,13 +136,16 @@
for info in self._param_info_deprecated:
info.grad = self.param_to_grad.get(info.name)
- def create_param(self, param_name, shape, initializer):
+ def create_param(self, param_name, shape, initializer, tags=None):
param_info = initializer.create_param(
param_name=param_name,
init_net=self.param_init_net,
shape=shape,
)
self._parameters_info[param_info.blob] = param_info
+ # Add param to legacy structs as well, so all other functions for
+ # parameters are still working.
+ self.AddParameter(param_info.blob, tags)
return param_info.blob
def get_param_info(self, param):
@@ -155,11 +158,11 @@
def add_param_DEPRECATED(self, param, key=None, shape=None, length=None):
logging.warning("add_param method is DEPRECATED")
self._update_param_info_deprecated()
+ self.AddParameter(param)
if key is not None and self.net.input_record() is not None:
idx = self.net.input_record().field_blobs().index(key)
key = self.net.input_record().field_names()[idx]
shape = shape if shape is not None else self._infer_param_shape(param)
- self.params.append(param)
if not isinstance(param, core.BlobReference):
raise ValueError("Param %s must be a BlobReference!" % str(param))
self._param_info_deprecated.append(parameter_info.ParameterInfo(
@@ -187,6 +190,22 @@
else:
return self._param_info_deprecated
+ def AddParameter(self, param, tags=None):
+ tags = tags or []
+ if isinstance(tags, list):
+ tags = set(tags)
+ else:
+ tags = set([tags])
+ if parameter_info.ParameterTags.COMPUTED_PARAM in tags:
+ self._computed_params.append(param)
+ else:
+ self.params.append(param)
+
+ if parameter_info.ParameterTags.WEIGHT in tags:
+ self.weights.append(param)
+ if parameter_info.ParameterTags.BIAS in tags:
+ self.biases.append(param)
+
def GetParams(self, namescope=None, top_scope=False):
'''
Returns the params in current namescope
@@ -318,9 +337,9 @@
namescope += scope._NAMESCOPE_SEPARATOR
if namescope == '':
- return self.computed_params[:]
+ return self._computed_params[:]
else:
- return [p for p in self.computed_params
+ return [p for p in self._computed_params
if p.GetNameScope() == namescope]
def GetAllParams(self, namescope=None):
diff --git a/caffe2/python/modeling/parameter_info.py b/caffe2/python/modeling/parameter_info.py
index 3ab7c7e..28c6c2a 100644
--- a/caffe2/python/modeling/parameter_info.py
+++ b/caffe2/python/modeling/parameter_info.py
@@ -7,12 +7,19 @@
import numpy as np
+class ParameterTags(object):
+ BIAS = 'BIAS'
+ WEIGHT = 'WEIGHT'
+ COMPUTED_PARAM = 'COMPUTED_PARAM'
+
+
class ParameterType(object):
DENSE = 'dense'
SPARSE = 'sparse'
class ParameterInfo(object):
+
def __init__(
self, param_id, param, key=None, shape=None, length=None,
grad=None, blob_copy=None):