|  | #include "caffe2/image/image_input_op.h" | 
|  |  | 
|  | #ifdef CAFFE2_USE_MKLDNN | 
|  | #include <caffe2/ideep/operators/operator_fallback_ideep.h> | 
|  | #include <caffe2/ideep/utils/ideep_operator.h> | 
|  | #endif | 
|  |  | 
|  | namespace caffe2 { | 
|  |  | 
|  | template <> | 
|  | bool ImageInputOp<CPUContext>::ApplyTransformOnGPU( | 
|  | const std::vector<std::int64_t>&, | 
|  | const c10::Device&) { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | REGISTER_CPU_OPERATOR(ImageInput, ImageInputOp<CPUContext>); | 
|  |  | 
|  | OPERATOR_SCHEMA(ImageInput) | 
|  | .NumInputs(0, 1) | 
|  | .NumOutputs(2, INT_MAX) | 
|  | .TensorInferenceFunction([](const OperatorDef& def, | 
|  | const vector<TensorShape>& /* unused */) { | 
|  | vector<TensorShape> out(2); | 
|  | ArgumentHelper helper(def); | 
|  | int batch_size = helper.GetSingleArgument<int>("batch_size", 0); | 
|  | int crop = helper.GetSingleArgument<int>("crop", -1); | 
|  | int color = helper.GetSingleArgument<int>("color", 1); | 
|  | CHECK_GT(crop, 0); | 
|  | out[0] = CreateTensorShape( | 
|  | vector<int>{batch_size, crop, crop, color ? 3 : 1}, | 
|  | TensorProto::FLOAT); | 
|  | out[1] = | 
|  | CreateTensorShape(vector<int>{1, batch_size}, TensorProto::INT32); | 
|  | return out; | 
|  | }) | 
|  | .SetDoc(R"DOC( | 
|  | Imports and processes images from a database. For each run of the operator, | 
|  | batch_size images will be processed. GPUs can optionally be used for | 
|  | part of the processing. | 
|  |  | 
|  | The following transformations are applied to the image | 
|  | - A bounding box is applied to the initial image (optional) | 
|  | - The image is rescaled either up or down (with the scale argument) or | 
|  | just up (with the minsize argument) | 
|  | - The image is randomly cropped (crop size is passed as an argument but | 
|  | the location of the crop is random except if is_test is passed in which case | 
|  | the image in cropped at the center) | 
|  | - The image is normalized. Each of its color channels can have separate | 
|  | normalization values | 
|  |  | 
|  | The dimension of the output image will always be cropxcrop | 
|  | )DOC") | 
|  | .Arg( | 
|  | "batch_size", | 
|  | "Number of images to output for each run of the operator" | 
|  | ". Must be 1 or greater") | 
|  | .Arg("color", "Number of color channels (1 or 3). Defaults to 1") | 
|  | .Arg("color_jitter", "Whether or not to do color jitter. Defaults to 0") | 
|  | .Arg( | 
|  | "img_saturation", | 
|  | "Image saturation scale used in color jittering. " | 
|  | "Defaults to 0.4") | 
|  | .Arg( | 
|  | "img_brightness", | 
|  | "Image brightness scale used in color jittering. " | 
|  | "Defaults to 0.4") | 
|  | .Arg( | 
|  | "img_contrast", | 
|  | "Image contrast scale used in color jittering. " | 
|  | "Defaults to 0.4") | 
|  | .Arg( | 
|  | "color_lighting", | 
|  | "Whether or not to do color lighting." | 
|  | " Defaults to 0") | 
|  | .Arg( | 
|  | "color_lighting_std", | 
|  | "Std of normal distribution where color lighting" | 
|  | " scaling factor is sampled. Defaults to 0.1") | 
|  | .Arg( | 
|  | "scale_jitter_type", | 
|  | "Type 0: No scale jittering " | 
|  | "Type 1: Inception-style scale jittering") | 
|  | .Arg( | 
|  | "label_type", | 
|  | "Type 0: single integer label for multi-class " | 
|  | "classification. Type 1: sparse active label indices for multi-label " | 
|  | "classification. Type 2: dense label embedding vector for label " | 
|  | "embedding regression") | 
|  | .Arg( | 
|  | "scale", | 
|  | "Scale the size of the smallest dimension of the image to" | 
|  | " this. Scale and minsize are mutually exclusive." | 
|  | " Must be larger than crop") | 
|  | .Arg( | 
|  | "minsize", | 
|  | "Scale the size of the smallest dimension of the image to" | 
|  | " this only if the size is initially smaller. Scale and minsize are" | 
|  | " mutually exclusive. Must be larger than crop.") | 
|  | .Arg( | 
|  | "warp", | 
|  | "If 1, both dimensions of the image will be set to minsize or" | 
|  | " scale; otherwise, the other dimension is proportionally scaled." | 
|  | " Defaults to 0") | 
|  | .Arg("crop", "Size to crop the image to. Must be provided") | 
|  | .Arg("mirror", "Whether or not to mirror the image. Defaults to 0") | 
|  | .Arg( | 
|  | "mean", | 
|  | "Mean by which to normalize color channels." | 
|  | " Defaults to 0.") | 
|  | .Arg( | 
|  | "mean_per_channel", | 
|  | "Vector of means per color channel " | 
|  | " (1 or 3 elements). Defaults to mean argument. Channel order BGR") | 
|  | .Arg( | 
|  | "std", | 
|  | "Standard deviation by which to normalize color channels." | 
|  | " Defaults to 1.") | 
|  | .Arg( | 
|  | "std_per_channel", | 
|  | "Vector of standard dev. per color channel " | 
|  | " (1 or 3 elements). Defaults to std argument. Channel order is BGR") | 
|  | .Arg("bounding_ymin", "Bounding box coordinate. Defaults to -1 (none)") | 
|  | .Arg("bounding_xmin", "Bounding box coordinate. Defaults to -1 (none)") | 
|  | .Arg("bounding_height", "Bounding box coordinate. Defaults to -1 (none)") | 
|  | .Arg("bounding_width", "Bounding box coordinate. Defaults to -1 (none)") | 
|  | .ArgIsTest("Set to 1 to do deterministic cropping. Defaults to 0") | 
|  | .Arg("use_caffe_datum", "1 if the input is in Caffe format. Defaults to 0") | 
|  | .Arg( | 
|  | "use_gpu_transform", | 
|  | "1 if GPU acceleration should be used." | 
|  | " Defaults to 0. Can only be 1 in a CUDAContext") | 
|  | .Arg( | 
|  | "decode_threads", | 
|  | "Number of CPU decode/transform threads." | 
|  | " Defaults to 4") | 
|  | .Arg("output_type", "If gpu_transform, can set to FLOAT or FLOAT16.") | 
|  | .Arg("db", "Name of the database (if not passed as input)") | 
|  | .Arg( | 
|  | "db_type", | 
|  | "Type of database (if not passed as input)." | 
|  | " Defaults to leveldb") | 
|  | .Arg( | 
|  | "output_sizes", | 
|  | "The sizes of any outputs besides the data and label " | 
|  | "(should have a number of elements equal to the number of additional " | 
|  | "outputs)") | 
|  | .Arg( | 
|  | "random_scale", | 
|  | "[min, max] shortest-side desired for image resize. " | 
|  | "Defaults to [-1, -1] or no random resize desired.") | 
|  | .Input(0, "reader", "The input reader (a db::DBReader)") | 
|  | .Output(0, "data", "Tensor containing the images") | 
|  | .Output(1, "label", "Tensor containing the labels") | 
|  | .Output( | 
|  | 2, | 
|  | "additional outputs", | 
|  | "Any outputs after the first 2 will be " | 
|  | "Tensors read from the input TensorProtos"); | 
|  |  | 
|  | NO_GRADIENT(ImageInput); | 
|  |  | 
|  | #ifdef CAFFE2_USE_MKLDNN | 
|  | REGISTER_IDEEP_OPERATOR(ImageInput, IDEEPFallbackOp<ImageInputOp<CPUContext>>); | 
|  | #endif | 
|  |  | 
|  | } // namespace caffe2 |