| syntax = "proto2"; | 
 |  | 
 | package caffe2; | 
 |  | 
 | // A few notes about the Caffe2's protobuffer convention: | 
 | // (1) Most objects are registered by their types, such as operators and nets. | 
 | //     For these, we have a string-type field "type" for registration purposes. | 
 | // (2) We do not use extension because that used to create quite some conflicts | 
 | //     in Caffe's protobuf design. | 
 | // (3) We have not used any proto3 specific features, such as Any or Map. This | 
 | //     is mainly for backward compatibility purposes but we may consider using | 
 | //     those in the future. | 
 |  | 
 | // TensorProto stores serialized Tensor objects. | 
 | message TensorProto { | 
 |   // The dimensions in the tensor. | 
 |   repeated int64 dims = 1; | 
 |  | 
 |   // Data type | 
 |   enum DataType { | 
 |     UNDEFINED = 0; | 
 |  | 
 |     // Basic types | 
 |     FLOAT = 1;     // float | 
 |     INT32 = 2;     // int | 
 |     BYTE = 3;      // byte, when deserialized, is going to be restored as uint8 | 
 |     STRING = 4;    // string | 
 |  | 
 |     // Less-commonly used data types | 
 |     BOOL = 5;      // bool | 
 |     UINT8 = 6;     // uint8_t | 
 |     INT8 = 7;      // int8_t | 
 |     UINT16 = 8;    // uint16_t | 
 |     INT16 = 9;     // int16_t | 
 |     INT64 = 10;    // int64_t | 
 |     FLOAT16 = 12;  // at::Half | 
 |     DOUBLE = 13;   // double | 
 |  | 
 |     ZERO_COLLISION_HASH = 14;  // zero-collision hash state | 
 |     REBATCHING_BUFFER= 15;     // rebatching buffer | 
 |   } | 
 |   // The type of the deserialized tensor data | 
 |   optional DataType data_type = 2 [default = FLOAT]; | 
 |  | 
 |   // The format of the serialized data. | 
 |   enum SerializationFormat { | 
 |     // FMT_PROTOBUF is the existing serialization format from before the | 
 |     // data_format field was introduced. Most data types are serialized using | 
 |     // the protobuf typed fields, although in some cases raw little endian data | 
 |     // is stored in the byte_data field instead. | 
 |     FMT_PROTOBUF = 0; | 
 |     // bfloat16 data stored in the raw_data field. | 
 |     FMT_BFLOAT16 = 1; | 
 |   } | 
 |   // data_format is a SerializationFormat enum value. | 
 |   // However, we intentionally store it as an integer value so we can | 
 |   // distinguish between old messages that do not have a data_format value vs | 
 |   // new messages that have a SerializationFormat value that we don't | 
 |   // understand.  If we stored this as an enum then protobuf would deserialize | 
 |   // both of these cases the same way. | 
 |   optional uint32 data_format = 15 [default = 0]; | 
 |  | 
 |   // For float | 
 |   repeated float float_data = 3 [packed = true]; | 
 |   // For int32, uint8, int8, uint16, int16, bool, and float16 | 
 |   // Note about float16: in storage we will basically convert float16 byte-wise | 
 |   // to unsigned short and then store them in the int32_data field. | 
 |   // Note: storing int8 and uint8 values in this field unfortunately results in | 
 |   // larger serialized data than necessary, as protobuf's varint encoding | 
 |   // scheme requires 2 bytes to represent int8 and uint8 values that have the | 
 |   // MSB set. | 
 |   repeated int32 int32_data = 4 [packed = true]; | 
 |   // For bytes | 
 |   optional bytes byte_data = 5; | 
 |   // For strings | 
 |   repeated bytes string_data = 6; | 
 |   // For double | 
 |   repeated double double_data = 9 [packed = true]; | 
 |   // For int64 | 
 |   repeated int64 int64_data = 10 [packed = true]; | 
 |   // store the raw data, contents are serialized as little-endian | 
 |   optional bytes raw_data = 13; | 
 |  | 
 |   // Optionally, a name for the tensor. | 
 |   optional string name = 7; | 
 |  | 
 |   // Optionally, a TensorProto can contain the details about the device that | 
 |   // it was serialized from. This is useful in cases like snapshotting a whole | 
 |   // workspace in a multi-GPU environment. | 
 |   optional DeviceOption device_detail = 8; | 
 |  | 
 |   // When loading from chunks this is going to indicate where to put data in the | 
 |   // full array. When not used full data have to be present | 
 |   message Segment { | 
 |     required int64 begin = 1; | 
 |     required int64 end = 2; | 
 |   } | 
 |   optional Segment segment = 11; | 
 |  | 
 |   // Field numbers 12 and 14 were previously used for now-deprecated fields. | 
 |   // reserved 12, 14; | 
 | } | 
 |  | 
 | message QTensorProto { | 
 |   repeated int64 dims = 1; | 
 |   required int32 precision = 2; | 
 |   required double scale = 3; | 
 |   required double bias = 4; | 
 |   required bool is_signed = 5; | 
 |   repeated int32 data = 6 [packed = true]; | 
 |   optional string name = 7; | 
 |   optional TensorProto.DataType data_type = 8 [default = INT32]; | 
 |  | 
 |   // Multi-group quantization params | 
 |   repeated double scales = 9; | 
 |   repeated double biases = 10; | 
 |  | 
 |   // Multi-group quantization needed, indicates in which dimension | 
 |   // we do the "group wise quantization" | 
 |   optional int32 axis = 11; | 
 |  | 
 |   // It should be true if it is a multi-group quantization proto | 
 |   optional bool is_multiparam = 12 [default = false]; | 
 | } | 
 |  | 
 | // TensorProtos stores multiple TensorProto objects in one single proto. This | 
 | // is useful for small tensors; For anything big, consider using a DB for | 
 | // storage. | 
 | message TensorProtos { | 
 |   repeated TensorProto protos = 1; | 
 | } | 
 |  | 
 | message TensorShape { | 
 |   repeated int64 dims = 1; | 
 |   optional TensorProto.DataType data_type = 2 [default = FLOAT]; | 
 |   repeated int32 unknown_dims = 3; | 
 |   optional bool unknown_shape = 4 [default = false]; | 
 |   optional string name = 5; | 
 | } | 
 |  | 
 | message TensorShapes { | 
 |   repeated TensorShape shapes = 1; | 
 | } | 
 |  | 
 | // TensorBoundShape is used to save bound shape inference result for a tensor. | 
 | // TensorBoundShape.shape is inferred shape for this tensor. | 
 | // TensorBoundShape.dimType contains dim_type for every dimension. | 
 | // eg: for dimension i, shape.dims[i] is the inferred shape and | 
 | // dim_type[i] is corresponding dim_type. | 
 | message TensorBoundShape { | 
 |   optional TensorShape shape = 1; | 
 |   enum DimType { | 
 |     UNKNOWN = 0;   // unknown | 
 |     CONSTANT  = 1; // constant | 
 |     // batch, corresponding dimension is batch_size | 
 |     BATCH = 2; | 
 |     // batch_of_feature_max, | 
 |     // corresponding shape is inferred_feature_length * batch_size | 
 |     BATCH_OF_FEATURE_MAX = 3; | 
 |     // batch_of_feature_max_default | 
 |     // corresponding shape is default_feature_length * batch_size | 
 |     BATCH_OF_FEATURE_MAX_DEFAULT = 4; | 
 |     // feature_max, corresponding shape is inferred_feature_length | 
 |     FEATURE_MAX = 5; | 
 |     // feature_max_default, corresponding shape is default_feature_length | 
 |     FEATURE_MAX_DEFAULT = 6; | 
 |  | 
 |   } | 
 |   repeated DimType dim_type = 2;  // dim_type.size() == shape.dims.size() | 
 |   optional string name = 3; | 
 |   // a flag to indicate whether the shape is final and cannot be changed | 
 |   // eg: input/output of in-place ops | 
 |   optional bool shape_is_final = 4; | 
 | } | 
 |  | 
 | message TensorBoundShapes { | 
 |   repeated TensorBoundShape shapes = 1; | 
 |   optional int64 max_batch_size = 2; | 
 |   optional int64 max_feature_len = 3; | 
 | } | 
 |  | 
 | message AOTConfig { | 
 |   required int64 max_batch_size = 1; | 
 |   required int64 max_seq_size = 2; | 
 |   required bool in_batch_broadcast = 3; | 
 |   optional string onnxifi_blacklist_ops = 4; | 
 |   optional int32 onnxifi_min_ops = 5; | 
 | } | 
 |  | 
 | // A named argument containing either singular float, integer and string | 
 | // values, or repeated float, int and string arrays. | 
 | message Argument { | 
 |   optional string name = 1; | 
 |  | 
 |   optional float f = 2; | 
 |   optional int64 i = 3; | 
 |   optional bytes s = 4; | 
 |   optional TensorProto t = 10; | 
 |   optional NetDef n = 8; | 
 |  | 
 |   repeated float floats = 5; | 
 |   repeated int64 ints = 6; | 
 |   repeated bytes strings = 7; | 
 |   repeated TensorProto tensors = 11; | 
 |   repeated NetDef nets = 9; | 
 |   repeated QTensorProto qtensors = 12; | 
 | } | 
 |  | 
 | // DeviceType that Caffe2 currently supports. | 
 | // Note: if you add a device type, make sure you add the corresponding device | 
 | // line in the DeviceTypeName() function in caffe2/utils/proto_utils.cc | 
 | // and update c10/core/DeviceType.h | 
 | enum DeviceTypeProto { | 
 |   PROTO_CPU = 0;                    // In default, we will use CPU. | 
 |   PROTO_CUDA = 1;                   // CUDA. | 
 |   PROTO_MKLDNN = 2;                 // Reserved for explicit MKLDNN | 
 |   PROTO_OPENGL = 3;                 // OpenGL | 
 |   PROTO_OPENCL = 4;                 // OpenCL | 
 |   PROTO_IDEEP = 5;                  // IDEEP. | 
 |   PROTO_HIP = 6;                    // AMD HIP | 
 |   PROTO_FPGA = 7;                   // FPGA | 
 |   PROTO_MSNPU = 8;                  // MSNPU | 
 |   PROTO_XLA = 9;                    // XLA / TPU | 
 |   PROTO_MLC = 10;                   // ML Compute | 
 |   // Change the following number if you add more devices in the code. | 
 |   PROTO_COMPILE_TIME_MAX_DEVICE_TYPES = 11; | 
 | } | 
 |  | 
 | // Device-specific options. We do not distinguish DeviceOption protos for | 
 | // different DeviceTypes, so currently all devices share the same DeviceOption | 
 | // proto. Fields that are specific to a device type is ignored if the type does | 
 | // not match. | 
 | // Note: if you add fields to the DeviceOption, make sure you add the | 
 | // corresponding changes to IsSameDevice() function in utils/proto_utils.{h,cc}. | 
 | message DeviceOption { | 
 |   // [general] Options that need to be carried out before running the execution. | 
 |   // optional DeviceType device_type = 1 [ default = CPU ]; | 
 |   optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU. | 
 |   // [general] Used together with device_type to identify the exact device | 
 |   optional int32 device_id = 2; | 
 |   // [general] The random seed to start the device random number generator with. | 
 |   optional uint32 random_seed = 3; | 
 |   // [general] What node this op should execute on. | 
 |   // Used for net transformation purposes. Must be empty at execution time. | 
 |   optional string node_name = 4; | 
 |   // [CPU and Linux specific] NUMA node id | 
 |   optional int32 numa_node_id = 5; | 
 |   // [general] Extra information passed, not used at execution time currently. | 
 |   repeated string extra_info = 6; | 
 | } | 
 |  | 
 | // Operator Definition. | 
 | message OperatorDef { | 
 |   repeated string input = 1; // the name of the input blobs | 
 |   repeated string output = 2; // the name of output top blobs | 
 |   optional string name = 3; // the operator name. This is optional. | 
 |   // the operator type. This is needed to create the object from the operator | 
 |   // registry. | 
 |   optional string type = 4; | 
 |   // arg is for the argument defined in operator schema | 
 |   repeated Argument arg = 5; | 
 |  | 
 |   // The device option that the operator should run under. | 
 |   optional DeviceOption device_option = 6; | 
 |  | 
 |   // Optionally, one can specify an engine when there are multiple | 
 |   // implementations available simultaneously for one device type. | 
 |   // If one specifies an engine but that engine does not exist in the compiled | 
 |   // Caffe2 binary, Caffe2 will fall back to the default engine of that device | 
 |   // type. | 
 |   optional string engine = 7; | 
 |  | 
 |  | 
 |   // Additional 'fake' inputs used for expressing control dependencies | 
 |   // in the operator graph. This can be used to ensure that an | 
 |   // operator does not run until another operator is ready, for e.g. | 
 |   // scheduling control. These are not passed as actual inputs to the | 
 |   // Operator implementation, and are only used by the Net class for | 
 |   // scheduling purposes. | 
 |   repeated string control_input = 8; | 
 |  | 
 |   // is_gradient_op argument is only used as a hint in shape inference | 
 |   // and has no runtime significance | 
 |   optional bool is_gradient_op = 9 [default = false]; | 
 |  | 
 |   // debug information associated with the construction of the operator. | 
 |   // This is an optional string with no assumed characteristics as | 
 |   // operators can be constructed in any language. | 
 |   optional string debug_info = 10; | 
 |  | 
 |   // the domain of the operator to help runtime distinguish which operator | 
 |   // library this OperatorDef refers to. For example, both caffe2 and aten | 
 |   // has `Add` operator, with domain, we can easily decide which operator | 
 |   // to execute. to support multiple operator libs, we use domain to | 
 |   // distinguish which operator lib we refer to: | 
 |   //   - "caffe2" means this uses Caffe2 operator library | 
 |   //   - "aten" means this uses ATen operator library | 
 |   //   - "c10" is for the fused library | 
 |   //   - if the domain is missing or empty, we use "caffe2", this is for | 
 |   //     legacy models, new serializer should always export an OperatorDef | 
 |   //     with domain and op_version | 
 |   optional string domain = 11; | 
 |   // each operator is has its own version number. | 
 |   // operator version information | 
 |   // each time, we change the API or semantics of the operator, | 
 |   // we bump the version for the operator. | 
 |   // the runtime system should check the op_version of each OperatorDef | 
 |   // and decide it should reject or accept the model | 
 |   optional int64 op_version = 12; | 
 | } | 
 |  | 
 | // MapFieldEntry follows the pattern for cross-proto-version maps. | 
 | // See https://developers.google.com/protocol-buffers/docs/proto3#maps | 
 | message MapFieldEntry { | 
 |   required string key = 1; | 
 |   required string val = 2; | 
 | }; | 
 |  | 
 | // Used to hold backend-specific options. | 
 | message BackendOptions { | 
 |   // Name of the backend that the specified options apply to. | 
 |   required string backend_name = 1; | 
 |   // Flexible map for passing in the options. | 
 |   repeated MapFieldEntry option = 2; | 
 | }; | 
 |  | 
 | // Partition definition. | 
 | message PartitionInfo { | 
 |   // Name of the partition. | 
 |   required string name = 1; | 
 |  | 
 |   // A list of logic device ID, indicating which devices this partition | 
 |   // can be executed on. If empty, it means the partition won't run on | 
 |   // device but on host CPU instead. | 
 |   repeated int32 device_id = 2; | 
 |  | 
 |   // Extra debug info. | 
 |   optional string extra_info = 3; | 
 |  | 
 |   // Flexible map for passing options specific to a backend. | 
 |   repeated BackendOptions backend_options = 4; | 
 | } | 
 |  | 
 | // Network definition. | 
 | message NetDef { | 
 |   optional string name = 1; // the network's name | 
 |   // Operators that the network contains. | 
 |   // Note: this is not named "operator" because that is a reserved word in C++. | 
 |   repeated OperatorDef op = 2; | 
 |  | 
 |   // The type of network that the net should be run with. This routes the | 
 |   // network instantiation to different execution modes. The default mode, | 
 |   // "simple", runs the operators in a sequential way as the original Caffe | 
 |   // implementation does. | 
 |   optional string type = 3; | 
 |  | 
 |   // the number of workers, if the operators in the network is to be carried out | 
 |   // in parallel. | 
 |   // Note: This is to be deprecated. Using the arg field with "num_workers" as | 
 |   // key. | 
 |   // Note 2: The old uses of this were never actually cleaned up | 
 |   optional int32 num_workers = 4; | 
 |  | 
 |   // The device option for the network. If a network has a specific device | 
 |   // option and one of its operators does not have it set, we will copy over the | 
 |   // device option to the operator. This allows us to basically avoid putting | 
 |   // device options at every operator. | 
 |   optional DeviceOption device_option = 5; | 
 |  | 
 |   repeated Argument arg = 6; | 
 |  | 
 |   // Two optional fields to declare external input and output of a net. | 
 |   // If these two are set, when a net is created, we will sanity check for | 
 |   // every op whether its input is declared (either as an external input, | 
 |   // or as an intermediate blob created by one of the ops), and sanity check | 
 |   // if all blobs in external_output are produced. | 
 |   // | 
 |   // In cases of memory optimization, declaring external_input and | 
 |   // external_output also ensures that storage of these blobs are persistent: | 
 |   // for any blob in external_input and external_output, after a network run | 
 |   // finishes, their content are actually the right content. Any intermediate | 
 |   // blobs' contents may be overwritten. | 
 |   repeated string external_input = 7; | 
 |   repeated string external_output = 8; | 
 |  | 
 |   // Partitioning info, indexed by partition names. | 
 |   repeated PartitionInfo partition_info = 9; | 
 | } | 
 |  | 
 |  | 
 | // ExecutionStep is actually a sort-of-hacky way we simulate iteration right | 
 | // now. | 
 | message ExecutionStep { | 
 |   // ExecutionStep should either contain a set of substeps, or a set of | 
 |   // network names to run in this execution step. They should NOT both be set | 
 |   // at the same time. | 
 |   optional string name = 1; | 
 |   // An execution step could be recursive, in which it involves a set of | 
 |   // substeps. | 
 |   repeated ExecutionStep substep = 2; | 
 |   // Alternatively, an execution step could involve one or more networks. | 
 |   // Note that you cannot have both substeps and networks. Choose one. | 
 |   // Note that an execution step refers networks by their name. The actual | 
 |   // network definition of the same name should be included in the network field | 
 |   // of the plan. The reason is that a network object might hold internal states | 
 |   // (think of a data layer), so we want to have the same network object that | 
 |   // multiple steps could ask to run. | 
 |   repeated string network = 3; | 
 |   // Number of iterations to run this step. The substeps or the networks | 
 |   // specified will be run sequentially, and one sequential run is considered | 
 |   // one iteration. If this is not set, the number of iterations is assumed to | 
 |   // be 1. | 
 |   optional int64 num_iter = 4; | 
 |  | 
 |   // Criteria network specifies a single output (TensorCPU<bool>) of | 
 |   // size (1), is run on every iteration by the executor, and | 
 |   // execution terminates when the output[0] is `false`. | 
 |   optional string criteria_network = 5 [deprecated=true]; | 
 |  | 
 |   // DEPRECATED. Use `run_every_ms`. | 
 |   optional string report_net = 7; | 
 |   optional int32 report_interval = 8; | 
 |  | 
 |   // If provided, execute this step at every time interval (in millisecs) | 
 |   // while its sibiling execution steps execute in parallel. This step is | 
 |   // guaranteed to run at least once after all non-interval siblings finished. | 
 |   optional int64 run_every_ms = 11; | 
 |  | 
 |   // If false or not set, execute sub-steps serially. | 
 |   // If true, execute all substeps concurrently, each one in a separate thread. | 
 |   optional bool concurrent_substeps = 6; | 
 |  | 
 |   // Name of a scalar boolean tensor. | 
 |   // ES checks this blob AFTER every substeps/subnets. | 
 |   // If specified, and the value is true, then ES will skip the rest and return | 
 |   // immediately. | 
 |   // This means that the report_net and the first step will always be called. | 
 |   // Use cases: | 
 |   // 1) the first substep stops the rest if data condition not met | 
 |   // 2) the first substep decide which of the rest of the steps should be run. | 
 |   // 3) external control | 
 |   // | 
 |   // ** It is the user's responsibility to not to put this blob in race conditions. | 
 |   // ** For example when setting this blob in concurrent substeps | 
 |   optional string should_stop_blob = 9; | 
 |  | 
 |   // if only_once is true, this step will only be executed once. this ONLY takes | 
 |   // effect when using should_stop_blob | 
 |   optional bool only_once = 10; | 
 |  | 
 |   // Whether to create a child workspace for this step. | 
 |   // If yes, the workflow and nets are re-created every time this step is run. | 
 |   optional bool create_workspace = 12; | 
 |  | 
 |   // How many copies of the children execution steps to run concurrently. | 
 |   optional int32 num_concurrent_instances = 13; | 
 | } | 
 |  | 
 | message PlanDef { | 
 |   // All the networks that are used in this execution. Note that networks should | 
 |   // be ordered in the way they are executed, i.e. for a layer in a network, all | 
 |   // its input blobs should already have been initialized by the layers or | 
 |   // networks defined before it. | 
 |   optional string name = 1; | 
 |   // The networks that are going to be used in this plan. | 
 |   repeated NetDef network = 2; | 
 |   repeated ExecutionStep execution_step = 3; | 
 | } | 
 |  | 
 | // Protobuf format for blobs that are not Tensors. We use a key to store the | 
 | // type of the blob. For example for a serialized DBProto, the type should | 
 | // be "DBReader" and the content should be a serialized DBProto object. | 
 | message BlobProto { | 
 |   optional string name = 1; | 
 |   optional string type = 2; | 
 |   optional TensorProto tensor = 3; | 
 |   optional bytes content = 4; | 
 |   optional QTensorProto qtensor = 5; | 
 |   // If blob is not Tensor and is divided into chunks, content_num_chunks | 
 |   // contains number of chunks, into which blob was divided. | 
 |   optional int32 content_num_chunks = 6; | 
 |   optional int32 content_chunk_id = 7; | 
 | } | 
 |  | 
 | // Protobuf format to serialize DBReader. | 
 | message DBReaderProto { | 
 |   // The name for the DB object in the workspace. | 
 |   optional string name = 1; | 
 |   // The source of the DB | 
 |   optional string source = 2; | 
 |   // The type of the DB | 
 |   optional string db_type = 3; | 
 |   // The current key of the DB if the DB supports seeking. | 
 |   optional string key = 4; | 
 | } | 
 |  | 
 | message BlobSerializationOptions { | 
 |   // This set of options will only apply to blobs whose name matches this | 
 |   // pattern.  If the blob_name_pattern is empty then it will be treated as | 
 |   // matching all blobs. | 
 |   optional string blob_name_regex = 1; | 
 |  | 
 |   // Note: | 
 |   // - a chunk_size of 0 means "use the default chunk size".  The default chunk | 
 |   //   size is controlled by the --caffe2_tensor_chunk_size command line flag. | 
 |   // - a chunk size of -1 means to disable chunking, and serialize the blob in | 
 |   //   a single chunk. | 
 |   optional int64 chunk_size = 2; | 
 |  | 
 |   enum FloatFormat { | 
 |     // Use the current default serialization format, as chosen by the | 
 |     // current version of the code.  (At the time of writing this is PROTOBUF) | 
 |     FLOAT_DEFAULT = 0; | 
 |     // Store the data in the TensorProto's float_data field | 
 |     FLOAT_PROTOBUF = 1; | 
 |     // Serialize float values as bfloat16.  Note that this conversion is lossy. | 
 |     FLOAT_BFLOAT16 = 2; | 
 |   } | 
 |  | 
 |   // Settings for how to serialize tensors containing float values | 
 |   optional FloatFormat float_format = 3; | 
 | } | 
 |  | 
 | message SerializationOptions { | 
 |   // A set of options to use when serialializing blobs. | 
 |   // This is a list, sorted from highest to lowest precedence.  When | 
 |   // serializing a blob, the first entry whose blob_name_pattern matches the | 
 |   // blob name will be used. | 
 |   repeated BlobSerializationOptions options = 1; | 
 | } |