caffe2/operators/bisect_percentile_op.cc - platform/external/pytorch - Git at Google

 #include "caffe2/operators/bisect_percentile_op.h"

 namespace caffe2 {

 REGISTER_CPU_OPERATOR(BisectPercentile, BisectPercentileOp<CPUContext>);
 OPERATOR_SCHEMA(BisectPercentile)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc(R"DOC(
     This operator is to map raw feature values into the percentile
     representations based on Bisection for more than one feature.

     The input is the bath of input feature values, with the size of (batch_size,
     num_feature), where num_feature = F (F >= 1).

     For each feature, we also need additional information regarding the feature
     value distribution.
     There are several vectors to keep data to percentile mappping information
     as arguments (context):
     1. feature raw values (R)
     2. feature percentile mapping (P)
     3. feature percentile lower bound (L)
     4. feature percentile upper bound (U)

     A toy example:
     Suppose the sampled data distribution is as follows:
     1, 1, 2, 2, 2, 2, 2, 2, 3, 4
     We have the mapping vectors as follows:
     R = [1, 2, 3, 4]
     P = [0.15, 0.55, 0.9, 1.0]
     L = [0.1, 0.3, 0.9, 1.0]
     U = [0.2, 0.8, 0.9, 1.0]
     Where P is computed as (L + U) / 2.

     For a given list of feature values, X = [x_0, x_1, ..., x_i, ...], for each
     feature value (x_i) we first apply bisection to find the right index (t),
     such that R[t] <= x_i < R[t+1].
     If x_i = R[t], P[t] is returned;
     otherwise, the interpolation is apply by (R[t], R[t+1]) and (U[t] and L[t]).

     As there are F features (F >= 1), we concate all the R_f, P_f, L_f, and
     U_f for each feature f and use an additional input length to keep track of
     the number of points for each set of raw feature value to percentile mapping.
     For example, there are two features:
     R_1 =[0.1, 0.4, 0.5];
     R_2 = [0.3, 1.2];
     We will build R = [0.1, 0.4, 0.5, 0.3, 1.2]; besides, we have
     lengths = [3, 2]
     to indicate the boundaries of the percentile information.

 )DOC")
     .Arg(
         "percentile_raw",
         "1D tensor, which is the concatenation of all sorted raw feature "
         "values for all features.")
     .Arg(
         "percentile_mapping",
         "1D tensor. There is one-one mapping between percentile_mapping and "
         "percentile_raw such that each element in percentile_mapping "
         "corresponds to the percentile value of the corresponding raw feature "
         "value.")
     .Arg(
         "percentile_lower",
         "1D tensor. There is one-one mapping between percentile_upper and "
         "percentile_raw such that each element in percentile_mapping "
         "corresponds to the percentile lower bound of the corresponding raw "
         "feature value.")
     .Arg(
         "percentile_upper",
         "1D tensor. There is one-one mapping between percentile_upper and "
         "percentile_raw such that each element in percentile_mapping "
         "corresponds to the percentile upper bound of the corresponding raw "
         "feature value.")
     .Arg(
         "lengths",
         "1D tensor. There is one-one mapping between percentile_upper and "
         "percentile_raw such that each element in percentile_mapping "
         "corresponds to the percentile upper bound of the corresponding raw "
         "feature value.")
     .Input(
         0,
         "raw_values",
         "Input 2D tensor of floats of size (N, D), where N is the batch size "
         "and D is the feature dimension.")
     .Output(
         0,
         "percentile",
         "2D tensor of output with the same dimensions as the input raw_values.");

 NO_GRADIENT(BisectPercentile);

 } // namespace caffe2
	#include "caffe2/operators/bisect_percentile_op.h"

	namespace caffe2 {

	REGISTER_CPU_OPERATOR(BisectPercentile, BisectPercentileOp<CPUContext>);
	OPERATOR_SCHEMA(BisectPercentile)
	.NumInputs(1)
	.NumOutputs(1)
	.SetDoc(R"DOC(
	This operator is to map raw feature values into the percentile
	representations based on Bisection for more than one feature.

	The input is the bath of input feature values, with the size of (batch_size,
	num_feature), where num_feature = F (F >= 1).

	For each feature, we also need additional information regarding the feature
	value distribution.
	There are several vectors to keep data to percentile mappping information
	as arguments (context):
	1. feature raw values (R)
	2. feature percentile mapping (P)
	3. feature percentile lower bound (L)
	4. feature percentile upper bound (U)

	A toy example:
	Suppose the sampled data distribution is as follows:
	1, 1, 2, 2, 2, 2, 2, 2, 3, 4
	We have the mapping vectors as follows:
	R = [1, 2, 3, 4]
	P = [0.15, 0.55, 0.9, 1.0]
	L = [0.1, 0.3, 0.9, 1.0]
	U = [0.2, 0.8, 0.9, 1.0]
	Where P is computed as (L + U) / 2.

	For a given list of feature values, X = [x_0, x_1, ..., x_i, ...], for each
	feature value (x_i) we first apply bisection to find the right index (t),
	such that R[t] <= x_i < R[t+1].
	If x_i = R[t], P[t] is returned;
	otherwise, the interpolation is apply by (R[t], R[t+1]) and (U[t] and L[t]).

	As there are F features (F >= 1), we concate all the R_f, P_f, L_f, and
	U_f for each feature f and use an additional input length to keep track of
	the number of points for each set of raw feature value to percentile mapping.
	For example, there are two features:
	R_1 =[0.1, 0.4, 0.5];
	R_2 = [0.3, 1.2];
	We will build R = [0.1, 0.4, 0.5, 0.3, 1.2]; besides, we have
	lengths = [3, 2]
	to indicate the boundaries of the percentile information.

	)DOC")
	.Arg(
	"percentile_raw",
	"1D tensor, which is the concatenation of all sorted raw feature "
	"values for all features.")
	.Arg(
	"percentile_mapping",
	"1D tensor. There is one-one mapping between percentile_mapping and "
	"percentile_raw such that each element in percentile_mapping "
	"corresponds to the percentile value of the corresponding raw feature "
	"value.")
	.Arg(
	"percentile_lower",
	"1D tensor. There is one-one mapping between percentile_upper and "
	"percentile_raw such that each element in percentile_mapping "
	"corresponds to the percentile lower bound of the corresponding raw "
	"feature value.")
	.Arg(
	"percentile_upper",
	"1D tensor. There is one-one mapping between percentile_upper and "
	"percentile_raw such that each element in percentile_mapping "
	"corresponds to the percentile upper bound of the corresponding raw "
	"feature value.")
	.Arg(
	"lengths",
	"1D tensor. There is one-one mapping between percentile_upper and "
	"percentile_raw such that each element in percentile_mapping "
	"corresponds to the percentile upper bound of the corresponding raw "
	"feature value.")
	.Input(
	0,
	"raw_values",
	"Input 2D tensor of floats of size (N, D), where N is the batch size "
	"and D is the feature dimension.")
	.Output(
	0,
	"percentile",
	"2D tensor of output with the same dimensions as the input raw_values.");

	NO_GRADIENT(BisectPercentile);

	} // namespace caffe2