|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | from caffe2.python import schema | 
|  | from caffe2.python.layers.arc_cosine_feature_map import ArcCosineFeatureMap | 
|  | import numpy as np | 
|  |  | 
|  |  | 
|  | class SemiRandomFeatures(ArcCosineFeatureMap): | 
|  | """ | 
|  | Implementation of the semi-random kernel feature map. | 
|  |  | 
|  | Applies H(x_rand) * x_rand^s * x_learned, where | 
|  | H is the Heaviside step function, | 
|  | x_rand is the input after applying FC with randomized parameters, | 
|  | and x_learned is the input after applying FC with learnable parameters. | 
|  |  | 
|  | If using multilayer model with semi-random layers, then input and output records | 
|  | should have a 'full' and 'random' Scalar. The random Scalar will be passed as | 
|  | input to process the random features. | 
|  |  | 
|  | For more information, see the original paper: | 
|  | https://arxiv.org/pdf/1702.08882.pdf | 
|  |  | 
|  | Inputs : | 
|  | output_dims -- dimensions of the output vector | 
|  | s -- if s == 0, will obtain linear semi-random features; | 
|  | else if s == 1, will obtain squared semi-random features; | 
|  | else s >= 2, will obtain higher order semi-random features | 
|  | scale_random -- amount to scale the standard deviation | 
|  | (for random parameter initialization when weight_init or | 
|  | bias_init hasn't been specified) | 
|  | scale_learned -- amount to scale the standard deviation | 
|  | (for learned parameter initialization when weight_init or | 
|  | bias_init hasn't been specified) | 
|  |  | 
|  | weight_init_random -- initialization distribution for random weight parameter | 
|  | (if None, will use Gaussian distribution) | 
|  | bias_init_random -- initialization distribution for random bias pararmeter | 
|  | (if None, will use Uniform distribution) | 
|  | weight_init_learned -- initialization distribution for learned weight parameter | 
|  | (if None, will use Gaussian distribution) | 
|  | bias_init_learned -- initialization distribution for learned bias pararmeter | 
|  | (if None, will use Uniform distribution) | 
|  | weight_optim -- optimizer for weight params for learned features | 
|  | bias_optim -- optimizer for bias param for learned features | 
|  |  | 
|  | set_weight_as_global_constant -- if True, initialized random parameters | 
|  | will be constant across all distributed | 
|  | instances of the layer | 
|  | """ | 
|  | def __init__( | 
|  | self, | 
|  | model, | 
|  | input_record, | 
|  | output_dims, | 
|  | s=1, | 
|  | scale_random=1.0, | 
|  | scale_learned=1.0, | 
|  | weight_init_random=None, | 
|  | bias_init_random=None, | 
|  | weight_init_learned=None, | 
|  | bias_init_learned=None, | 
|  | weight_optim=None, | 
|  | bias_optim=None, | 
|  | set_weight_as_global_constant=False, | 
|  | name='semi_random_features', | 
|  | **kwargs): | 
|  |  | 
|  | if isinstance(input_record, schema.Struct): | 
|  | schema.is_schema_subset( | 
|  | schema.Struct( | 
|  | ('full', schema.Scalar()), | 
|  | ('random', schema.Scalar()), | 
|  | ), | 
|  | input_record | 
|  | ) | 
|  | self.input_record_full = input_record.full | 
|  | self.input_record_random = input_record.random | 
|  |  | 
|  | elif isinstance(input_record, schema.Scalar): | 
|  | self.input_record_full = input_record | 
|  | self.input_record_random = input_record | 
|  |  | 
|  | super(SemiRandomFeatures, self).__init__( | 
|  | model, | 
|  | self.input_record_full, | 
|  | output_dims, | 
|  | s=s, | 
|  | scale=scale_random,       # To initialize the random parameters | 
|  | weight_init=weight_init_random, | 
|  | bias_init=bias_init_random, | 
|  | weight_optim=None, | 
|  | bias_optim=None, | 
|  | set_weight_as_global_constant=set_weight_as_global_constant, | 
|  | initialize_output_schema=False, | 
|  | name=name, | 
|  | **kwargs) | 
|  |  | 
|  | self.output_schema = schema.Struct( | 
|  | ('full', schema.Scalar( | 
|  | (np.float32, output_dims), | 
|  | model.net.NextScopedBlob(name + '_full_output') | 
|  | ),), | 
|  | ('random', schema.Scalar( | 
|  | (np.float32, output_dims), | 
|  | model.net.NextScopedBlob(name + '_random_output') | 
|  | ),), | 
|  | ) | 
|  |  | 
|  | # To initialize the learnable parameters | 
|  | assert (scale_learned > 0.0), \ | 
|  | "Expected scale (learned) > 0, got %s" % scale_learned | 
|  | self.stddev = scale_learned * np.sqrt(1.0 / self.input_dims) | 
|  |  | 
|  | # Learned Parameters | 
|  | (self.learned_w, self.learned_b) = self._initialize_params( | 
|  | 'learned_w', | 
|  | 'learned_b', | 
|  | w_init=weight_init_learned, | 
|  | b_init=bias_init_learned, | 
|  | w_optim=weight_optim, | 
|  | b_optim=bias_optim | 
|  | ) | 
|  |  | 
|  | def add_ops(self, net): | 
|  | # Learned features: wx + b | 
|  | learned_features = net.FC(self.input_record_full.field_blobs() + | 
|  | [self.learned_w, self.learned_b], | 
|  | net.NextScopedBlob('learned_features')) | 
|  | # Random features: wx + b | 
|  | random_features = net.FC(self.input_record_random.field_blobs() + | 
|  | [self.random_w, self.random_b], | 
|  | net.NextScopedBlob('random_features')) | 
|  | processed_random_features = self._heaviside_with_power( | 
|  | net, | 
|  | random_features, | 
|  | self.output_schema.random.field_blobs(), | 
|  | self.s | 
|  | ) | 
|  | net.Mul([processed_random_features, learned_features], | 
|  | self.output_schema.full.field_blobs()) |