| syntax = "proto2"; |
| |
| package caffe2; |
| |
| // A few notes about the Caffe2's protobuffer convention: |
| // (1) Most objects are registered by their types, such as operators and nets. |
| // For these, we have a string-type field "type" for registration purposes. |
| // (2) We do not use extension because that used to create quite some conflicts |
| // in Caffe's protobuf design. |
| // (3) We have not used any proto3 specific features, such as Any or Map. This |
| // is mainly for backward compability purposes but we may consider using |
| // those in the future. |
| |
| // ExternalDataProto stores the pointer to the content of TensorProto |
| // the content are stored in the raw format as little endian |
| message ExternalDataProto { |
| // type of the external storage type, can be the following: |
| enum SourceType { |
| // the container defined in torch/csrc/jit/serialization.h is used, |
| // and record_id is the tag to help the runtime identify the data |
| // this type of storage is set as DEFAULT and recommended for external |
| // data storage |
| INLINE_CONTAINER = 0; |
| // use external file to store the data, and record_id is the POSIX relative path |
| // to the file. this (simple) file is only for the data, and the data is stored |
| // as little endian in the file |
| SIMPLE_FILE = 1; |
| } |
| optional SourceType source_type = 1 [default = INLINE_CONTAINER]; |
| // used together with type |
| optional string record_id = 2; |
| // the size of the entire record (in bytes) |
| optional uint64 record_size = 5; |
| // the offset of the starting point, the content may be shared between |
| // multiple tensors |
| optional int64 offset = 3 [default = 0]; |
| // the strides of the content |
| repeated int64 strides = 4; |
| } |
| |
| // TensorProto stores serialized Tensor objects. |
| message TensorProto { |
| // The dimensions in the tensor. |
| repeated int64 dims = 1; |
| |
| // Data type |
| enum DataType { |
| UNDEFINED = 0; |
| |
| // Basic types |
| FLOAT = 1; // float |
| INT32 = 2; // int |
| BYTE = 3; // byte, when deserialized, is going to be restored as uint8 |
| STRING = 4; // string |
| |
| // Less-commonly used data types |
| BOOL = 5; // bool |
| UINT8 = 6; // uint8_t |
| INT8 = 7; // int8_t |
| UINT16 = 8; // uint16_t |
| INT16 = 9; // int16_t |
| INT64 = 10; // int64_t |
| FLOAT16 = 12; // at::Half |
| DOUBLE = 13; // double |
| } |
| optional DataType data_type = 2 [default = FLOAT]; |
| |
| // data storage |
| enum StorageType { |
| // the content is stored in typed field, for example, if the data_type is |
| // FLOAT, float_data is used to store the content. |
| TYPED = 1; |
| // the content is serialized in field raw_data as little-endian |
| RAW = 2; |
| // the pointer to the content is stored in field external_data |
| // the content is serialized as little-endian |
| EXTERNAL = 3; |
| // When StorageType is NO_CONTENT, we use TensorProto to store only type |
| // and shape information. Reuse TensorProto to store type and shape |
| // because we can just have one proto, not having another ValueInfoProto |
| NO_CONTENT = 4; |
| } |
| optional StorageType storage_type = 12 [default = TYPED]; |
| // For float |
| repeated float float_data = 3 [packed = true]; |
| // For int32, uint8, int8, uint16, int16, bool, and float16 |
| // Note about float16: in storage we will basically convert float16 byte-wise |
| // to unsigned short and then store them in the int32_data field. |
| repeated int32 int32_data = 4 [packed = true]; |
| // For bytes |
| optional bytes byte_data = 5; |
| // For strings |
| repeated bytes string_data = 6; |
| // For double |
| repeated double double_data = 9 [packed = true]; |
| // For int64 |
| repeated int64 int64_data = 10 [packed = true]; |
| // store the raw data, contents are serialized as little-endian |
| optional bytes raw_data = 13; |
| // store the pointer to the data |
| optional ExternalDataProto external_data = 14; |
| |
| // Optionally, a name for the tensor. |
| optional string name = 7; |
| |
| // Optionally, a TensorProto can contain the details about the device that |
| // it was serialized from. This is useful in cases like snapshotting a whole |
| // workspace in a multi-GPU environment. |
| optional DeviceOption device_detail = 8; |
| |
| // When loading from chunks this is going to indicate where to put data in the |
| // full array. When not used full data have to be present |
| message Segment { |
| required int64 begin = 1; |
| required int64 end = 2; |
| } |
| optional Segment segment = 11; |
| } |
| |
| message QTensorProto { |
| repeated int64 dims = 1; |
| required int32 precision = 2; |
| required double scale = 3; |
| required double bias = 4; |
| required bool is_signed = 5; |
| repeated int32 data = 6 [packed = true]; |
| optional string name = 7; |
| optional TensorProto.DataType data_type = 8 [default = INT32]; |
| |
| // Multi-group quantization params |
| repeated double scales = 9; |
| repeated double biases = 10; |
| |
| // Multi-group quantization needed, indicates in which dimension |
| // we do the "group wise quantization" |
| optional int32 axis = 11; |
| |
| // It should be true if it is a multi-group quantization proto |
| optional bool is_multiparam = 12 [default = false]; |
| } |
| |
| // TensorProtos stores multiple TensorProto objects in one single proto. This |
| // is useful for small tensors; For anything big, consider using a DB for |
| // storage. |
| message TensorProtos { |
| repeated TensorProto protos = 1; |
| } |
| |
| message TensorShape { |
| repeated int64 dims = 1; |
| optional TensorProto.DataType data_type = 2 [default = FLOAT]; |
| repeated int32 unknown_dims = 3; |
| optional bool unknown_shape = 4 [default = false]; |
| optional string name = 5; |
| } |
| |
| message TensorShapes { |
| repeated TensorShape shapes = 1; |
| } |
| |
| // A named argument containing either singular float, integer and string |
| // values, or repeated float, int and string arrays. |
| message Argument { |
| optional string name = 1; |
| |
| optional float f = 2; |
| optional int64 i = 3; |
| optional bytes s = 4; |
| optional TensorProto t = 10; |
| optional NetDef n = 8; |
| |
| repeated float floats = 5; |
| repeated int64 ints = 6; |
| repeated bytes strings = 7; |
| repeated TensorProto tensors = 11; |
| repeated NetDef nets = 9; |
| repeated QTensorProto qtensors = 12; |
| } |
| |
| // DeviceType that Caffe2 currently supports. |
| // Note: if you add a device type, make sure you add the corresponding device |
| // line in the DeviceTypeName() function in caffe2/utils/proto_utils.cc |
| // and update c10/core/DeviceType.h |
| enum DeviceTypeProto { |
| PROTO_CPU = 0; // In default, we will use CPU. |
| PROTO_CUDA = 1; // CUDA. |
| PROTO_MKLDNN = 2; // Reserved for explicit MKLDNN |
| PROTO_OPENGL = 3; // OpenGL |
| PROTO_OPENCL = 4; // OpenCL |
| PROTO_IDEEP = 5; // IDEEP. |
| PROTO_HIP = 6; // AMD HIP |
| PROTO_FPGA = 7; // FPGA |
| PROTO_MSNPU = 8; // MSNPU |
| PROTO_XLA = 9; // XLA / TPU |
| // Change the following number if you add more devices in the code. |
| PROTO_COMPILE_TIME_MAX_DEVICE_TYPES = 10; |
| PROTO_ONLY_FOR_TEST = 20901; // This device type is only for test. |
| } |
| |
| // Device-specific options. We do not distinguish DeviceOption protos for |
| // different DeviceTypes, so currently all devices share the same DeviceOption |
| // proto. Fields that are specific to a device type is ignored if the type does |
| // not match. |
| // Note: if you add fields to the DeviceOption, make sure you add the |
| // corresponding changes to IsSameDevice() function in utils/proto_utils.{h,cc}. |
| message DeviceOption { |
| // [general] Options that need to be carried out before running the execution. |
| // optional DeviceType device_type = 1 [ default = CPU ]; |
| optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU. |
| // [general] Used together with device_type to identify the exact device |
| optional int32 device_id = 2; |
| // [general] The random seed to start the device random number generator with. |
| optional uint32 random_seed = 3; |
| // [general] What node this op should execute on. |
| // Used for net transformation purposes. Must be empty at execution time. |
| optional string node_name = 4; |
| // [CPU and Linux specific] NUMA node id |
| optional int32 numa_node_id = 5; |
| // [general] Extra information passed, not used at execution time currently. |
| repeated string extra_info = 6; |
| } |
| |
| // Operator Definition. |
| message OperatorDef { |
| repeated string input = 1; // the name of the input blobs |
| repeated string output = 2; // the name of output top blobs |
| optional string name = 3; // the operator name. This is optional. |
| // the operator type. This is needed to create the object from the operator |
| // registry. |
| optional string type = 4; |
| // arg is for the argument defined in operator schema |
| repeated Argument arg = 5; |
| |
| // The device option that the operator should run under. |
| optional DeviceOption device_option = 6; |
| |
| // Optionally, one can specify an engine when there are multiple |
| // implementations available simultaneously for one device type. |
| // If one specifies an engine but that engine does not exist in the compiled |
| // Caffe2 binary, Caffe2 will fall back to the default engine of that device |
| // type. |
| optional string engine = 7; |
| |
| |
| // Additional 'fake' inputs used for expressing control dependencies |
| // in the operator graph. This can be used to ensure that an |
| // operator does not run until another operator is ready, for e.g. |
| // scheduling control. These are not passed as actual inputs to the |
| // Operator implementation, and are only used by the Net class for |
| // scheduling purposes. |
| repeated string control_input = 8; |
| |
| // is_gradient_op argument is only used as a hint in shape inference |
| // and has no runtime significance |
| optional bool is_gradient_op = 9 [default = false]; |
| |
| // debug information associated with the construction of the operator. |
| // This is an optional string with no assumed characteristics as |
| // operators can be constructed in any language. |
| optional string debug_info = 10; |
| |
| // the domain of the operator to help runtime distinguish which operator |
| // library this OperatorDef refers to. For example, both caffe2 and aten |
| // has `Add` operator, with domain, we can easily decide which operator |
| // to execute. to support multiple operator libs, we use domain to |
| // distinguish which operator lib we refer to: |
| // - "caffe2" means this uses Caffe2 operator library |
| // - "aten" means this uses ATen operator library |
| // - "c10" is for the fused library |
| // - if the domain is missing or empty, we use "caffe2", this is for |
| // legacy models, new serializer should always export an OperatorDef |
| // with domain and op_version |
| optional string domain = 11; |
| // each operator is has its own version number. |
| // operator version information |
| // each time, we change the API or semantics of the operator, |
| // we bump the version for the operator. |
| // the runtime system should check the op_version of each OperatorDef |
| // and decide it should reject or accept the model |
| optional int64 op_version = 12; |
| } |
| |
| // Network definition. |
| message NetDef { |
| optional string name = 1; // the network's name |
| // Operators that the network contains. |
| // Note: this is not named "operator" because that is a reserved word in C++. |
| repeated OperatorDef op = 2; |
| |
| // The type of network that the net should be run with. This routes the |
| // network instantiation to different execution modes. The default mode, |
| // "simple", runs the operators in a sequential way as the original Caffe |
| // implementation does. |
| optional string type = 3; |
| |
| // the number of workers, if the operators in the network is to be carried out |
| // in parallel. |
| // Note: This is to be deprecated. Using the arg field with "num_workers" as |
| // key. |
| optional int32 num_workers = 4 [deprecated=true]; |
| |
| // The device option for the network. If a network has a specific device |
| // option and one of its operators does not have it set, we will copy over the |
| // device option to the operator. This allows us to basically avoid putting |
| // device options at every operator. |
| optional DeviceOption device_option = 5; |
| |
| repeated Argument arg = 6; |
| |
| // Two optional fields to declare external input and output of a net. |
| // If these two are set, when a net is created, we will sanity check for |
| // every op whether its input is declared (either as an external input, |
| // or as an intermediate blob created by one of the ops), and sanity check |
| // if all blobs in external_output are produced. |
| // |
| // In cases of memory optimization, declaring external_input and |
| // external_output also ensures that storage of these blobs are persistent: |
| // for any blob in external_input and external_output, after a network run |
| // finishes, their content are actually the right content. Any intermediate |
| // blobs' contents may be overwritten. |
| repeated string external_input = 7; |
| repeated string external_output = 8; |
| } |
| |
| // ExecutionStep is actually a sort-of-hacky way we simulate iteration right |
| // now. |
| message ExecutionStep { |
| // ExecutionStep should either contain a set of substeps, or a set of |
| // network names to run in this execution step. They should NOT both be set |
| // at the same time. |
| optional string name = 1; |
| // An execution step could be recursive, in which it involves a set of |
| // substeps. |
| repeated ExecutionStep substep = 2; |
| // Alternatively, an execution step could involve one or more networks. |
| // Note that you cannot have both substeps and networks. Choose one. |
| // Note that an execution step refers networks by their name. The actual |
| // network definition of the same name should be included in the network field |
| // of the plan. The reason is that a network object might hold internal states |
| // (think of a data layer), so we want to have the same network object that |
| // multiple steps could ask to run. |
| repeated string network = 3; |
| // Number of iterations to run this step. The substeps or the networks |
| // specified will be run sequentially, and one sequential run is considered |
| // one iteration. If this is not set, the number of iterations is assumed to |
| // be 1. |
| optional int64 num_iter = 4; |
| |
| // Criteria network specifies a single output (TensorCPU<bool>) of |
| // size (1), is run on every iteration by the executor, and |
| // execution terminates when the output[0] is `false`. |
| optional string criteria_network = 5 [deprecated=true]; |
| |
| // DEPRECATED. Use `run_every_ms`. |
| optional string report_net = 7; |
| optional int32 report_interval = 8; |
| |
| // If provided, execute this step at every time interval (in millisecs) |
| // while its sibiling execution steps execute in parallel. This step is |
| // guaranteed to run at least once after all non-interval siblings finished. |
| optional int64 run_every_ms = 11; |
| |
| // If false or not set, execute sub-steps serially. |
| // If true, execute all substeps concurrently, each one in a separte thread. |
| optional bool concurrent_substeps = 6; |
| |
| // Name of a scalar boolean tensor. |
| // ES checks this blob AFTER every substeps/subnets. |
| // If specified, and the value is true, then ES will skip the rest and return |
| // immediately. |
| // This means that the report_net and the first step will always be called. |
| // Use cases: |
| // 1) the first substep stops the rest if data condition not met |
| // 2) the first substep decide which of the rest of the steps should be run. |
| // 3) external control |
| // |
| // ** It is the user's responsibility to not to put this blob in race conditions. |
| // ** For example when setting this blob in concurrent substeps |
| optional string should_stop_blob = 9; |
| |
| // if only_once is true, this step will only be executed once. this ONLY takes |
| // effect when using should_stop_blob |
| optional bool only_once = 10; |
| |
| // Whether to create a child workspace for this step. |
| // If yes, the workflow and nets are re-created every time this step is run. |
| optional bool create_workspace = 12; |
| |
| // How many copies of the children execution steps to run concurrently. |
| optional int32 num_concurrent_instances = 13; |
| } |
| |
| message PlanDef { |
| // All the networks that are used in this execution. Note that networks should |
| // be ordered in the way they are executed, i.e. for a layer in a network, all |
| // its input blobs should already have been initialized by the layers or |
| // networks defined before it. |
| optional string name = 1; |
| // The networks that are going to be used in this plan. |
| repeated NetDef network = 2; |
| repeated ExecutionStep execution_step = 3; |
| } |
| |
| // Protobuf format for blobs that are not Tensors. We use a key to store the |
| // type of the blob. For example for a serialized DBProto, the type should |
| // be "DBReader" and the content should be a serialized DBProto object. |
| message BlobProto { |
| optional string name = 1; |
| optional string type = 2; |
| optional TensorProto tensor = 3; |
| optional bytes content = 4; |
| optional QTensorProto qtensor = 5; |
| // If blob is not Tensor and is divided into chunks, content_num_chunks |
| // contains number of chunks, into which blob was divided. |
| optional int32 content_num_chunks = 6; |
| optional int32 content_chunk_id = 7; |
| } |
| |
| // Protobuf format to serialize DBReader. |
| message DBReaderProto { |
| // The name for the DB object in the workspace. |
| optional string name = 1; |
| // The source of the DB |
| optional string source = 2; |
| // The type of the DB |
| optional string db_type = 3; |
| // The current key of the DB if the DB supports seeking. |
| optional string key = 4; |
| } |