|  | #include "caffe2/operators/bucketize_op.h" | 
|  |  | 
|  | #include "caffe2/core/operator.h" | 
|  | #include "caffe2/core/tensor.h" | 
|  |  | 
|  | namespace caffe2 { | 
|  |  | 
|  | template <> | 
|  | bool BucketizeOp<CPUContext>::RunOnDevice() { | 
|  | auto& input = Input(X); | 
|  | CAFFE_ENFORCE_GE(input.dim(), 1); | 
|  |  | 
|  | auto N = input.numel(); | 
|  | auto* output = Output(INDICES, input.sizes(), at::dtype<int32_t>()); | 
|  | const auto* input_data = input.template data<float>(); | 
|  | auto* output_data = output->template mutable_data<int32_t>(); | 
|  |  | 
|  | math::Set<int32_t, CPUContext>(output->numel(), 0.0, output_data, &context_); | 
|  |  | 
|  | for (int64_t pos = 0; pos < N; pos++) { | 
|  | // here we assume the boundary values for each feature are sorted | 
|  | auto bucket_idx = | 
|  | std::lower_bound( | 
|  | boundaries_.begin(), boundaries_.end(), input_data[pos]) - | 
|  | boundaries_.begin(); | 
|  | output_data[pos] = bucket_idx; | 
|  | } | 
|  |  | 
|  | return true; | 
|  | }; | 
|  | REGISTER_CPU_OPERATOR(Bucketize, BucketizeOp<CPUContext>); | 
|  |  | 
|  | OPERATOR_SCHEMA(Bucketize) | 
|  | .NumInputs(1) | 
|  | .NumOutputs(1) | 
|  | .SetDoc(R"DOC( | 
|  | This operator works as bucketize in tensorflow and digitize | 
|  | in numpy. It bucketizes the input 'X' based on argument 'boundaries'. | 
|  | For each value x in input 'data', the operator returns index i given | 
|  | boundaries[i-1] < x <= boundaries[i]. | 
|  | If values in 'data' are beyond the bounds of boundaries, 0 or | 
|  | len(boundaries) is returned as appropriate. | 
|  | The boundaries need to be monotonically increasing. | 
|  | For example | 
|  |  | 
|  | If data = [2, 4, 1] and boundaries = [0.1, 2.5], then | 
|  |  | 
|  | output = [1, 2, 1] | 
|  |  | 
|  | If data = [[2, 3], [4, 1], [2, 5]] and boundaries = [0.1, 2.5], then | 
|  |  | 
|  | output = [[1, 2], [2, 1], [1, 2]] | 
|  |  | 
|  | )DOC") | 
|  | .Input(0, "data", "input tensor") | 
|  | .Output( | 
|  | 0, | 
|  | "output", | 
|  | "indices of bins given by boundaries to which each value" | 
|  | "in data belongs") | 
|  | .TensorInferenceFunction([](const OperatorDef& /* def */, | 
|  | const vector<TensorShape>& in) { | 
|  | vector<TensorShape> out(in); | 
|  | out[0].set_data_type(TensorProto::INT32); | 
|  | return out; | 
|  | }) | 
|  | .Arg("boundaries", "bucketization boundaries"); | 
|  |  | 
|  | NO_GRADIENT(BucketizeOp); | 
|  | } // namespace caffe2 | 
|  |  | 
|  | using BucketizeInt = caffe2::BucketizeOp<caffe2::CPUContext>; | 
|  |  | 
|  | C10_EXPORT_CAFFE2_OP_TO_C10_CPU( | 
|  | Bucketize, | 
|  | "_caffe2::Bucketize(Tensor data, float[] boundaries) -> Tensor output", | 
|  | BucketizeInt); |