caffe2/proto/caffe2.proto - platform/external/pytorch - Git at Google

 syntax = "proto2";

 package caffe2;

 // A few notes about the Caffe2's protobuffer convention:
 // (1) Most objects are registered by their types, such as operators and nets.
 //     For these, we have a string-type field "type" for registration purposes.
 // (2) We do not use extension because that used to create quite some conflicts
 //     in Caffe's protobuf design.
 // (3) We have not used any proto3 specific features, such as Any or Map. This
 //     is mainly for backward compatibility purposes but we may consider using
 //     those in the future.

 // TensorProto stores serialized Tensor objects.
 message TensorProto {
   // The dimensions in the tensor.
   repeated int64 dims = 1;

   // Data type
   enum DataType {
     UNDEFINED = 0;

     // Basic types
     FLOAT = 1;     // float
     INT32 = 2;     // int
     BYTE = 3;      // byte, when deserialized, is going to be restored as uint8
     STRING = 4;    // string

     // Less-commonly used data types
     BOOL = 5;      // bool
     UINT8 = 6;     // uint8_t
     INT8 = 7;      // int8_t
     UINT16 = 8;    // uint16_t
     INT16 = 9;     // int16_t
     INT64 = 10;    // int64_t
     FLOAT16 = 12;  // at::Half
     DOUBLE = 13;   // double

     ZERO_COLLISION_HASH = 14;  // zero-collision hash state
     REBATCHING_BUFFER= 15;     // rebatching buffer
   }
   // The type of the deserialized tensor data
   optional DataType data_type = 2 [default = FLOAT];

   // The format of the serialized data.
   enum SerializationFormat {
     // FMT_PROTOBUF is the existing serialization format from before the
     // data_format field was introduced. Most data types are serialized using
     // the protobuf typed fields, although in some cases raw little endian data
     // is stored in the byte_data field instead.
     FMT_PROTOBUF = 0;
     // bfloat16 data stored in the raw_data field.
     FMT_BFLOAT16 = 1;
   }
   // data_format is a SerializationFormat enum value.
   // However, we intentionally store it as an integer value so we can
   // distinguish between old messages that do not have a data_format value vs
   // new messages that have a SerializationFormat value that we don't
   // understand.  If we stored this as an enum then protobuf would deserialize
   // both of these cases the same way.
   optional uint32 data_format = 15 [default = 0];

   // For float
   repeated float float_data = 3 [packed = true];
   // For int32, uint8, int8, uint16, int16, bool, and float16
   // Note about float16: in storage we will basically convert float16 byte-wise
   // to unsigned short and then store them in the int32_data field.
   // Note: storing int8 and uint8 values in this field unfortunately results in
   // larger serialized data than necessary, as protobuf's varint encoding
   // scheme requires 2 bytes to represent int8 and uint8 values that have the
   // MSB set.
   repeated int32 int32_data = 4 [packed = true];
   // For bytes
   optional bytes byte_data = 5;
   // For strings
   repeated bytes string_data = 6;
   // For double
   repeated double double_data = 9 [packed = true];
   // For int64
   repeated int64 int64_data = 10 [packed = true];
   // store the raw data, contents are serialized as little-endian
   optional bytes raw_data = 13;

   // Optionally, a name for the tensor.
   optional string name = 7;

   // Optionally, a TensorProto can contain the details about the device that
   // it was serialized from. This is useful in cases like snapshotting a whole
   // workspace in a multi-GPU environment.
   optional DeviceOption device_detail = 8;

   // When loading from chunks this is going to indicate where to put data in the
   // full array. When not used full data have to be present
   message Segment {
     required int64 begin = 1;
     required int64 end = 2;
   }
   optional Segment segment = 11;

   // Field numbers 12 and 14 were previously used for now-deprecated fields.
   // reserved 12, 14;
 }

 message QTensorProto {
   repeated int64 dims = 1;
   required int32 precision = 2;
   required double scale = 3;
   required double bias = 4;
   required bool is_signed = 5;
   repeated int32 data = 6 [packed = true];
   optional string name = 7;
   optional TensorProto.DataType data_type = 8 [default = INT32];

   // Multi-group quantization params
   repeated double scales = 9;
   repeated double biases = 10;

   // Multi-group quantization needed, indicates in which dimension
   // we do the "group wise quantization"
   optional int32 axis = 11;

   // It should be true if it is a multi-group quantization proto
   optional bool is_multiparam = 12 [default = false];
 }

 // TensorProtos stores multiple TensorProto objects in one single proto. This
 // is useful for small tensors; For anything big, consider using a DB for
 // storage.
 message TensorProtos {
   repeated TensorProto protos = 1;
 }

 message TensorShape {
   repeated int64 dims = 1;
   optional TensorProto.DataType data_type = 2 [default = FLOAT];
   repeated int32 unknown_dims = 3;
   optional bool unknown_shape = 4 [default = false];
   optional string name = 5;
 }

 message TensorShapes {
   repeated TensorShape shapes = 1;
 }

 // TensorBoundShape is used to save bound shape inference result for a tensor.
 // TensorBoundShape.shape is inferred shape for this tensor.
 // TensorBoundShape.dimType contains dim_type for every dimension.
 // eg: for dimension i, shape.dims[i] is the inferred shape and
 // dim_type[i] is corresponding dim_type.
 message TensorBoundShape {
   optional TensorShape shape = 1;
   enum DimType {
     UNKNOWN = 0;   // unknown
     CONSTANT  = 1; // constant
     // batch, corresponding dimension is batch_size
     BATCH = 2;
     // batch_of_feature_max,
     // corresponding shape is inferred_feature_length * batch_size
     BATCH_OF_FEATURE_MAX = 3;
     // batch_of_feature_max_default
     // corresponding shape is default_feature_length * batch_size
     BATCH_OF_FEATURE_MAX_DEFAULT = 4;
     // feature_max, corresponding shape is inferred_feature_length
     FEATURE_MAX = 5;
     // feature_max_default, corresponding shape is default_feature_length
     FEATURE_MAX_DEFAULT = 6;

   }
   repeated DimType dim_type = 2;  // dim_type.size() == shape.dims.size()
   optional string name = 3;
   // a flag to indicate whether the shape is final and cannot be changed
   // eg: input/output of in-place ops
   optional bool shape_is_final = 4;
 }

 message TensorBoundShapes {
   repeated TensorBoundShape shapes = 1;
   optional int64 max_batch_size = 2;
   optional int64 max_feature_len = 3;
 }

 message AOTConfig {
   required int64 max_batch_size = 1;
   required int64 max_seq_size = 2;
   required bool in_batch_broadcast = 3;
   optional string onnxifi_blacklist_ops = 4;
   optional int32 onnxifi_min_ops = 5;
 }

 // A named argument containing either singular float, integer and string
 // values, or repeated float, int and string arrays.
 message Argument {
   optional string name = 1;

   optional float f = 2;
   optional int64 i = 3;
   optional bytes s = 4;
   optional TensorProto t = 10;
   optional NetDef n = 8;

   repeated float floats = 5;
   repeated int64 ints = 6;
   repeated bytes strings = 7;
   repeated TensorProto tensors = 11;
   repeated NetDef nets = 9;
   repeated QTensorProto qtensors = 12;
 }

 // DeviceType that Caffe2 currently supports.
 // Note: if you add a device type, make sure you add the corresponding device
 // line in the DeviceTypeName() function in caffe2/utils/proto_utils.cc
 // and update c10/core/DeviceType.h
 enum DeviceTypeProto {
   PROTO_CPU = 0;                    // In default, we will use CPU.
   PROTO_CUDA = 1;                   // CUDA.
   PROTO_MKLDNN = 2;                 // Reserved for explicit MKLDNN
   PROTO_OPENGL = 3;                 // OpenGL
   PROTO_OPENCL = 4;                 // OpenCL
   PROTO_IDEEP = 5;                  // IDEEP.
   PROTO_HIP = 6;                    // AMD HIP
   PROTO_FPGA = 7;                   // FPGA
   PROTO_MSNPU = 8;                  // MSNPU
   PROTO_XLA = 9;                    // XLA / TPU
   PROTO_MLC = 10;                   // ML Compute
   // Change the following number if you add more devices in the code.
   PROTO_COMPILE_TIME_MAX_DEVICE_TYPES = 11;
 }

 // Device-specific options. We do not distinguish DeviceOption protos for
 // different DeviceTypes, so currently all devices share the same DeviceOption
 // proto. Fields that are specific to a device type is ignored if the type does
 // not match.
 // Note: if you add fields to the DeviceOption, make sure you add the
 // corresponding changes to IsSameDevice() function in utils/proto_utils.{h,cc}.
 message DeviceOption {
   // [general] Options that need to be carried out before running the execution.
   // optional DeviceType device_type = 1 [ default = CPU ];
   optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU.
   // [general] Used together with device_type to identify the exact device
   optional int32 device_id = 2;
   // [general] The random seed to start the device random number generator with.
   optional uint32 random_seed = 3;
   // [general] What node this op should execute on.
   // Used for net transformation purposes. Must be empty at execution time.
   optional string node_name = 4;
   // [CPU and Linux specific] NUMA node id
   optional int32 numa_node_id = 5;
   // [general] Extra information passed, not used at execution time currently.
   repeated string extra_info = 6;
 }

 // Operator Definition.
 message OperatorDef {
   repeated string input = 1; // the name of the input blobs
   repeated string output = 2; // the name of output top blobs
   optional string name = 3; // the operator name. This is optional.
   // the operator type. This is needed to create the object from the operator
   // registry.
   optional string type = 4;
   // arg is for the argument defined in operator schema
   repeated Argument arg = 5;

   // The device option that the operator should run under.
   optional DeviceOption device_option = 6;

   // Optionally, one can specify an engine when there are multiple
   // implementations available simultaneously for one device type.
   // If one specifies an engine but that engine does not exist in the compiled
   // Caffe2 binary, Caffe2 will fall back to the default engine of that device
   // type.
   optional string engine = 7;


   // Additional 'fake' inputs used for expressing control dependencies
   // in the operator graph. This can be used to ensure that an
   // operator does not run until another operator is ready, for e.g.
   // scheduling control. These are not passed as actual inputs to the
   // Operator implementation, and are only used by the Net class for
   // scheduling purposes.
   repeated string control_input = 8;

   // is_gradient_op argument is only used as a hint in shape inference
   // and has no runtime significance
   optional bool is_gradient_op = 9 [default = false];

   // debug information associated with the construction of the operator.
   // This is an optional string with no assumed characteristics as
   // operators can be constructed in any language.
   optional string debug_info = 10;

   // the domain of the operator to help runtime distinguish which operator
   // library this OperatorDef refers to. For example, both caffe2 and aten
   // has `Add` operator, with domain, we can easily decide which operator
   // to execute. to support multiple operator libs, we use domain to
   // distinguish which operator lib we refer to:
   //   - "caffe2" means this uses Caffe2 operator library
   //   - "aten" means this uses ATen operator library
   //   - "c10" is for the fused library
   //   - if the domain is missing or empty, we use "caffe2", this is for
   //     legacy models, new serializer should always export an OperatorDef
   //     with domain and op_version
   optional string domain = 11;
   // each operator is has its own version number.
   // operator version information
   // each time, we change the API or semantics of the operator,
   // we bump the version for the operator.
   // the runtime system should check the op_version of each OperatorDef
   // and decide it should reject or accept the model
   optional int64 op_version = 12;
 }

 // MapFieldEntry follows the pattern for cross-proto-version maps.
 // See https://developers.google.com/protocol-buffers/docs/proto3#maps
 message MapFieldEntry {
   required string key = 1;
   required string val = 2;
 };

 // Used to hold backend-specific options.
 message BackendOptions {
   // Name of the backend that the specified options apply to.
   required string backend_name = 1;
   // Flexible map for passing in the options.
   repeated MapFieldEntry option = 2;
 };

 // Partition definition.
 message PartitionInfo {
   // Name of the partition.
   required string name = 1;

   // A list of logic device ID, indicating which devices this partition
   // can be executed on. If empty, it means the partition won't run on
   // device but on host CPU instead.
   repeated int32 device_id = 2;

   // Extra debug info.
   optional string extra_info = 3;

   // Flexible map for passing options specific to a backend.
   repeated BackendOptions backend_options = 4;
 }

 // Network definition.
 message NetDef {
   optional string name = 1; // the network's name
   // Operators that the network contains.
   // Note: this is not named "operator" because that is a reserved word in C++.
   repeated OperatorDef op = 2;

   // The type of network that the net should be run with. This routes the
   // network instantiation to different execution modes. The default mode,
   // "simple", runs the operators in a sequential way as the original Caffe
   // implementation does.
   optional string type = 3;

   // the number of workers, if the operators in the network is to be carried out
   // in parallel.
   // Note: This is to be deprecated. Using the arg field with "num_workers" as
   // key.
   // Note 2: The old uses of this were never actually cleaned up
   optional int32 num_workers = 4;

   // The device option for the network. If a network has a specific device
   // option and one of its operators does not have it set, we will copy over the
   // device option to the operator. This allows us to basically avoid putting
   // device options at every operator.
   optional DeviceOption device_option = 5;

   repeated Argument arg = 6;

   // Two optional fields to declare external input and output of a net.
   // If these two are set, when a net is created, we will sanity check for
   // every op whether its input is declared (either as an external input,
   // or as an intermediate blob created by one of the ops), and sanity check
   // if all blobs in external_output are produced.
   //
   // In cases of memory optimization, declaring external_input and
   // external_output also ensures that storage of these blobs are persistent:
   // for any blob in external_input and external_output, after a network run
   // finishes, their content are actually the right content. Any intermediate
   // blobs' contents may be overwritten.
   repeated string external_input = 7;
   repeated string external_output = 8;

   // Partitioning info, indexed by partition names.
   repeated PartitionInfo partition_info = 9;
 }


 // ExecutionStep is actually a sort-of-hacky way we simulate iteration right
 // now.
 message ExecutionStep {
   // ExecutionStep should either contain a set of substeps, or a set of
   // network names to run in this execution step. They should NOT both be set
   // at the same time.
   optional string name = 1;
   // An execution step could be recursive, in which it involves a set of
   // substeps.
   repeated ExecutionStep substep = 2;
   // Alternatively, an execution step could involve one or more networks.
   // Note that you cannot have both substeps and networks. Choose one.
   // Note that an execution step refers networks by their name. The actual
   // network definition of the same name should be included in the network field
   // of the plan. The reason is that a network object might hold internal states
   // (think of a data layer), so we want to have the same network object that
   // multiple steps could ask to run.
   repeated string network = 3;
   // Number of iterations to run this step. The substeps or the networks
   // specified will be run sequentially, and one sequential run is considered
   // one iteration. If this is not set, the number of iterations is assumed to
   // be 1.
   optional int64 num_iter = 4;

   // Criteria network specifies a single output (TensorCPU<bool>) of
   // size (1), is run on every iteration by the executor, and
   // execution terminates when the output[0] is `false`.
   optional string criteria_network = 5 [deprecated=true];

   // DEPRECATED. Use `run_every_ms`.
   optional string report_net = 7;
   optional int32 report_interval = 8;

   // If provided, execute this step at every time interval (in millisecs)
   // while its sibiling execution steps execute in parallel. This step is
   // guaranteed to run at least once after all non-interval siblings finished.
   optional int64 run_every_ms = 11;

   // If false or not set, execute sub-steps serially.
   // If true, execute all substeps concurrently, each one in a separate thread.
   optional bool concurrent_substeps = 6;

   // Name of a scalar boolean tensor.
   // ES checks this blob AFTER every substeps/subnets.
   // If specified, and the value is true, then ES will skip the rest and return
   // immediately.
   // This means that the report_net and the first step will always be called.
   // Use cases:
   // 1) the first substep stops the rest if data condition not met
   // 2) the first substep decide which of the rest of the steps should be run.
   // 3) external control
   //
   // ** It is the user's responsibility to not to put this blob in race conditions.
   // ** For example when setting this blob in concurrent substeps
   optional string should_stop_blob = 9;

   // if only_once is true, this step will only be executed once. this ONLY takes
   // effect when using should_stop_blob
   optional bool only_once = 10;

   // Whether to create a child workspace for this step.
   // If yes, the workflow and nets are re-created every time this step is run.
   optional bool create_workspace = 12;

   // How many copies of the children execution steps to run concurrently.
   optional int32 num_concurrent_instances = 13;
 }

 message PlanDef {
   // All the networks that are used in this execution. Note that networks should
   // be ordered in the way they are executed, i.e. for a layer in a network, all
   // its input blobs should already have been initialized by the layers or
   // networks defined before it.
   optional string name = 1;
   // The networks that are going to be used in this plan.
   repeated NetDef network = 2;
   repeated ExecutionStep execution_step = 3;
 }

 // Protobuf format for blobs that are not Tensors. We use a key to store the
 // type of the blob. For example for a serialized DBProto, the type should
 // be "DBReader" and the content should be a serialized DBProto object.
 message BlobProto {
   optional string name = 1;
   optional string type = 2;
   optional TensorProto tensor = 3;
   optional bytes content = 4;
   optional QTensorProto qtensor = 5;
   // If blob is not Tensor and is divided into chunks, content_num_chunks
   // contains number of chunks, into which blob was divided.
   optional int32 content_num_chunks = 6;
   optional int32 content_chunk_id = 7;
 }

 // Protobuf format to serialize DBReader.
 message DBReaderProto {
   // The name for the DB object in the workspace.
   optional string name = 1;
   // The source of the DB
   optional string source = 2;
   // The type of the DB
   optional string db_type = 3;
   // The current key of the DB if the DB supports seeking.
   optional string key = 4;
 }

 message BlobSerializationOptions {
   // This set of options will only apply to blobs whose name matches this
   // pattern.  If the blob_name_pattern is empty then it will be treated as
   // matching all blobs.
   optional string blob_name_regex = 1;

   // Note:
   // - a chunk_size of 0 means "use the default chunk size".  The default chunk
   //   size is controlled by the --caffe2_tensor_chunk_size command line flag.
   // - a chunk size of -1 means to disable chunking, and serialize the blob in
   //   a single chunk.
   optional int64 chunk_size = 2;

   enum FloatFormat {
     // Use the current default serialization format, as chosen by the
     // current version of the code.  (At the time of writing this is PROTOBUF)
     FLOAT_DEFAULT = 0;
     // Store the data in the TensorProto's float_data field
     FLOAT_PROTOBUF = 1;
     // Serialize float values as bfloat16.  Note that this conversion is lossy.
     FLOAT_BFLOAT16 = 2;
   }

   // Settings for how to serialize tensors containing float values
   optional FloatFormat float_format = 3;
 }

 message SerializationOptions {
   // A set of options to use when serialializing blobs.
   // This is a list, sorted from highest to lowest precedence.  When
   // serializing a blob, the first entry whose blob_name_pattern matches the
   // blob name will be used.
   repeated BlobSerializationOptions options = 1;
 }
	syntax = "proto2";

	package caffe2;

	// A few notes about the Caffe2's protobuffer convention:
	// (1) Most objects are registered by their types, such as operators and nets.
	// For these, we have a string-type field "type" for registration purposes.
	// (2) We do not use extension because that used to create quite some conflicts
	// in Caffe's protobuf design.
	// (3) We have not used any proto3 specific features, such as Any or Map. This
	// is mainly for backward compatibility purposes but we may consider using
	// those in the future.

	// TensorProto stores serialized Tensor objects.
	message TensorProto {
	// The dimensions in the tensor.
	repeated int64 dims = 1;

	// Data type
	enum DataType {
	UNDEFINED = 0;

	// Basic types
	FLOAT = 1; // float
	INT32 = 2; // int
	BYTE = 3; // byte, when deserialized, is going to be restored as uint8
	STRING = 4; // string

	// Less-commonly used data types
	BOOL = 5; // bool
	UINT8 = 6; // uint8_t
	INT8 = 7; // int8_t
	UINT16 = 8; // uint16_t
	INT16 = 9; // int16_t
	INT64 = 10; // int64_t
	FLOAT16 = 12; // at::Half
	DOUBLE = 13; // double

	ZERO_COLLISION_HASH = 14; // zero-collision hash state
	REBATCHING_BUFFER= 15; // rebatching buffer
	}
	// The type of the deserialized tensor data
	optional DataType data_type = 2 [default = FLOAT];

	// The format of the serialized data.
	enum SerializationFormat {
	// FMT_PROTOBUF is the existing serialization format from before the
	// data_format field was introduced. Most data types are serialized using
	// the protobuf typed fields, although in some cases raw little endian data
	// is stored in the byte_data field instead.
	FMT_PROTOBUF = 0;
	// bfloat16 data stored in the raw_data field.
	FMT_BFLOAT16 = 1;
	}
	// data_format is a SerializationFormat enum value.
	// However, we intentionally store it as an integer value so we can
	// distinguish between old messages that do not have a data_format value vs
	// new messages that have a SerializationFormat value that we don't
	// understand. If we stored this as an enum then protobuf would deserialize
	// both of these cases the same way.
	optional uint32 data_format = 15 [default = 0];

	// For float
	repeated float float_data = 3 [packed = true];
	// For int32, uint8, int8, uint16, int16, bool, and float16
	// Note about float16: in storage we will basically convert float16 byte-wise
	// to unsigned short and then store them in the int32_data field.
	// Note: storing int8 and uint8 values in this field unfortunately results in
	// larger serialized data than necessary, as protobuf's varint encoding
	// scheme requires 2 bytes to represent int8 and uint8 values that have the
	// MSB set.
	repeated int32 int32_data = 4 [packed = true];
	// For bytes
	optional bytes byte_data = 5;
	// For strings
	repeated bytes string_data = 6;
	// For double
	repeated double double_data = 9 [packed = true];
	// For int64
	repeated int64 int64_data = 10 [packed = true];
	// store the raw data, contents are serialized as little-endian
	optional bytes raw_data = 13;

	// Optionally, a name for the tensor.
	optional string name = 7;

	// Optionally, a TensorProto can contain the details about the device that
	// it was serialized from. This is useful in cases like snapshotting a whole
	// workspace in a multi-GPU environment.
	optional DeviceOption device_detail = 8;

	// When loading from chunks this is going to indicate where to put data in the
	// full array. When not used full data have to be present
	message Segment {
	required int64 begin = 1;
	required int64 end = 2;
	}
	optional Segment segment = 11;

	// Field numbers 12 and 14 were previously used for now-deprecated fields.
	// reserved 12, 14;
	}

	message QTensorProto {
	repeated int64 dims = 1;
	required int32 precision = 2;
	required double scale = 3;
	required double bias = 4;
	required bool is_signed = 5;
	repeated int32 data = 6 [packed = true];
	optional string name = 7;
	optional TensorProto.DataType data_type = 8 [default = INT32];

	// Multi-group quantization params
	repeated double scales = 9;
	repeated double biases = 10;

	// Multi-group quantization needed, indicates in which dimension
	// we do the "group wise quantization"
	optional int32 axis = 11;

	// It should be true if it is a multi-group quantization proto
	optional bool is_multiparam = 12 [default = false];
	}

	// TensorProtos stores multiple TensorProto objects in one single proto. This
	// is useful for small tensors; For anything big, consider using a DB for
	// storage.
	message TensorProtos {
	repeated TensorProto protos = 1;
	}

	message TensorShape {
	repeated int64 dims = 1;
	optional TensorProto.DataType data_type = 2 [default = FLOAT];
	repeated int32 unknown_dims = 3;
	optional bool unknown_shape = 4 [default = false];
	optional string name = 5;
	}

	message TensorShapes {
	repeated TensorShape shapes = 1;
	}

	// TensorBoundShape is used to save bound shape inference result for a tensor.
	// TensorBoundShape.shape is inferred shape for this tensor.
	// TensorBoundShape.dimType contains dim_type for every dimension.
	// eg: for dimension i, shape.dims[i] is the inferred shape and
	// dim_type[i] is corresponding dim_type.
	message TensorBoundShape {
	optional TensorShape shape = 1;
	enum DimType {
	UNKNOWN = 0; // unknown
	CONSTANT = 1; // constant
	// batch, corresponding dimension is batch_size
	BATCH = 2;
	// batch_of_feature_max,
	// corresponding shape is inferred_feature_length * batch_size
	BATCH_OF_FEATURE_MAX = 3;
	// batch_of_feature_max_default
	// corresponding shape is default_feature_length * batch_size
	BATCH_OF_FEATURE_MAX_DEFAULT = 4;
	// feature_max, corresponding shape is inferred_feature_length
	FEATURE_MAX = 5;
	// feature_max_default, corresponding shape is default_feature_length
	FEATURE_MAX_DEFAULT = 6;

	}
	repeated DimType dim_type = 2; // dim_type.size() == shape.dims.size()
	optional string name = 3;
	// a flag to indicate whether the shape is final and cannot be changed
	// eg: input/output of in-place ops
	optional bool shape_is_final = 4;
	}

	message TensorBoundShapes {
	repeated TensorBoundShape shapes = 1;
	optional int64 max_batch_size = 2;
	optional int64 max_feature_len = 3;
	}

	message AOTConfig {
	required int64 max_batch_size = 1;
	required int64 max_seq_size = 2;
	required bool in_batch_broadcast = 3;
	optional string onnxifi_blacklist_ops = 4;
	optional int32 onnxifi_min_ops = 5;
	}

	// A named argument containing either singular float, integer and string
	// values, or repeated float, int and string arrays.
	message Argument {
	optional string name = 1;

	optional float f = 2;
	optional int64 i = 3;
	optional bytes s = 4;
	optional TensorProto t = 10;
	optional NetDef n = 8;

	repeated float floats = 5;
	repeated int64 ints = 6;
	repeated bytes strings = 7;
	repeated TensorProto tensors = 11;
	repeated NetDef nets = 9;
	repeated QTensorProto qtensors = 12;
	}

	// DeviceType that Caffe2 currently supports.
	// Note: if you add a device type, make sure you add the corresponding device
	// line in the DeviceTypeName() function in caffe2/utils/proto_utils.cc
	// and update c10/core/DeviceType.h
	enum DeviceTypeProto {
	PROTO_CPU = 0; // In default, we will use CPU.
	PROTO_CUDA = 1; // CUDA.
	PROTO_MKLDNN = 2; // Reserved for explicit MKLDNN
	PROTO_OPENGL = 3; // OpenGL
	PROTO_OPENCL = 4; // OpenCL
	PROTO_IDEEP = 5; // IDEEP.
	PROTO_HIP = 6; // AMD HIP
	PROTO_FPGA = 7; // FPGA
	PROTO_MSNPU = 8; // MSNPU
	PROTO_XLA = 9; // XLA / TPU
	PROTO_MLC = 10; // ML Compute
	// Change the following number if you add more devices in the code.
	PROTO_COMPILE_TIME_MAX_DEVICE_TYPES = 11;
	}

	// Device-specific options. We do not distinguish DeviceOption protos for
	// different DeviceTypes, so currently all devices share the same DeviceOption
	// proto. Fields that are specific to a device type is ignored if the type does
	// not match.
	// Note: if you add fields to the DeviceOption, make sure you add the
	// corresponding changes to IsSameDevice() function in utils/proto_utils.{h,cc}.
	message DeviceOption {
	// [general] Options that need to be carried out before running the execution.
	// optional DeviceType device_type = 1 [ default = CPU ];
	optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU.
	// [general] Used together with device_type to identify the exact device
	optional int32 device_id = 2;
	// [general] The random seed to start the device random number generator with.
	optional uint32 random_seed = 3;
	// [general] What node this op should execute on.
	// Used for net transformation purposes. Must be empty at execution time.
	optional string node_name = 4;
	// [CPU and Linux specific] NUMA node id
	optional int32 numa_node_id = 5;
	// [general] Extra information passed, not used at execution time currently.
	repeated string extra_info = 6;
	}

	// Operator Definition.
	message OperatorDef {
	repeated string input = 1; // the name of the input blobs
	repeated string output = 2; // the name of output top blobs
	optional string name = 3; // the operator name. This is optional.
	// the operator type. This is needed to create the object from the operator
	// registry.
	optional string type = 4;
	// arg is for the argument defined in operator schema
	repeated Argument arg = 5;

	// The device option that the operator should run under.
	optional DeviceOption device_option = 6;

	// Optionally, one can specify an engine when there are multiple
	// implementations available simultaneously for one device type.
	// If one specifies an engine but that engine does not exist in the compiled
	// Caffe2 binary, Caffe2 will fall back to the default engine of that device
	// type.
	optional string engine = 7;


	// Additional 'fake' inputs used for expressing control dependencies
	// in the operator graph. This can be used to ensure that an
	// operator does not run until another operator is ready, for e.g.
	// scheduling control. These are not passed as actual inputs to the
	// Operator implementation, and are only used by the Net class for
	// scheduling purposes.
	repeated string control_input = 8;

	// is_gradient_op argument is only used as a hint in shape inference
	// and has no runtime significance
	optional bool is_gradient_op = 9 [default = false];

	// debug information associated with the construction of the operator.
	// This is an optional string with no assumed characteristics as
	// operators can be constructed in any language.
	optional string debug_info = 10;

	// the domain of the operator to help runtime distinguish which operator
	// library this OperatorDef refers to. For example, both caffe2 and aten
	// has `Add` operator, with domain, we can easily decide which operator
	// to execute. to support multiple operator libs, we use domain to
	// distinguish which operator lib we refer to:
	// - "caffe2" means this uses Caffe2 operator library
	// - "aten" means this uses ATen operator library
	// - "c10" is for the fused library
	// - if the domain is missing or empty, we use "caffe2", this is for
	// legacy models, new serializer should always export an OperatorDef
	// with domain and op_version
	optional string domain = 11;
	// each operator is has its own version number.
	// operator version information
	// each time, we change the API or semantics of the operator,
	// we bump the version for the operator.
	// the runtime system should check the op_version of each OperatorDef
	// and decide it should reject or accept the model
	optional int64 op_version = 12;
	}

	// MapFieldEntry follows the pattern for cross-proto-version maps.
	// See https://developers.google.com/protocol-buffers/docs/proto3#maps
	message MapFieldEntry {
	required string key = 1;
	required string val = 2;
	};

	// Used to hold backend-specific options.
	message BackendOptions {
	// Name of the backend that the specified options apply to.
	required string backend_name = 1;
	// Flexible map for passing in the options.
	repeated MapFieldEntry option = 2;
	};

	// Partition definition.
	message PartitionInfo {
	// Name of the partition.
	required string name = 1;

	// A list of logic device ID, indicating which devices this partition
	// can be executed on. If empty, it means the partition won't run on
	// device but on host CPU instead.
	repeated int32 device_id = 2;

	// Extra debug info.
	optional string extra_info = 3;

	// Flexible map for passing options specific to a backend.
	repeated BackendOptions backend_options = 4;
	}

	// Network definition.
	message NetDef {
	optional string name = 1; // the network's name
	// Operators that the network contains.
	// Note: this is not named "operator" because that is a reserved word in C++.
	repeated OperatorDef op = 2;

	// The type of network that the net should be run with. This routes the
	// network instantiation to different execution modes. The default mode,
	// "simple", runs the operators in a sequential way as the original Caffe
	// implementation does.
	optional string type = 3;

	// the number of workers, if the operators in the network is to be carried out
	// in parallel.
	// Note: This is to be deprecated. Using the arg field with "num_workers" as
	// key.
	// Note 2: The old uses of this were never actually cleaned up
	optional int32 num_workers = 4;

	// The device option for the network. If a network has a specific device
	// option and one of its operators does not have it set, we will copy over the
	// device option to the operator. This allows us to basically avoid putting
	// device options at every operator.
	optional DeviceOption device_option = 5;

	repeated Argument arg = 6;

	// Two optional fields to declare external input and output of a net.
	// If these two are set, when a net is created, we will sanity check for
	// every op whether its input is declared (either as an external input,
	// or as an intermediate blob created by one of the ops), and sanity check
	// if all blobs in external_output are produced.
	//
	// In cases of memory optimization, declaring external_input and
	// external_output also ensures that storage of these blobs are persistent:
	// for any blob in external_input and external_output, after a network run
	// finishes, their content are actually the right content. Any intermediate
	// blobs' contents may be overwritten.
	repeated string external_input = 7;
	repeated string external_output = 8;

	// Partitioning info, indexed by partition names.
	repeated PartitionInfo partition_info = 9;
	}


	// ExecutionStep is actually a sort-of-hacky way we simulate iteration right
	// now.
	message ExecutionStep {
	// ExecutionStep should either contain a set of substeps, or a set of
	// network names to run in this execution step. They should NOT both be set
	// at the same time.
	optional string name = 1;
	// An execution step could be recursive, in which it involves a set of
	// substeps.
	repeated ExecutionStep substep = 2;
	// Alternatively, an execution step could involve one or more networks.
	// Note that you cannot have both substeps and networks. Choose one.
	// Note that an execution step refers networks by their name. The actual
	// network definition of the same name should be included in the network field
	// of the plan. The reason is that a network object might hold internal states
	// (think of a data layer), so we want to have the same network object that
	// multiple steps could ask to run.
	repeated string network = 3;
	// Number of iterations to run this step. The substeps or the networks
	// specified will be run sequentially, and one sequential run is considered
	// one iteration. If this is not set, the number of iterations is assumed to
	// be 1.
	optional int64 num_iter = 4;

	// Criteria network specifies a single output (TensorCPU<bool>) of
	// size (1), is run on every iteration by the executor, and
	// execution terminates when the output[0] is `false`.
	optional string criteria_network = 5 [deprecated=true];

	// DEPRECATED. Use `run_every_ms`.
	optional string report_net = 7;
	optional int32 report_interval = 8;

	// If provided, execute this step at every time interval (in millisecs)
	// while its sibiling execution steps execute in parallel. This step is
	// guaranteed to run at least once after all non-interval siblings finished.
	optional int64 run_every_ms = 11;

	// If false or not set, execute sub-steps serially.
	// If true, execute all substeps concurrently, each one in a separate thread.
	optional bool concurrent_substeps = 6;

	// Name of a scalar boolean tensor.
	// ES checks this blob AFTER every substeps/subnets.
	// If specified, and the value is true, then ES will skip the rest and return
	// immediately.
	// This means that the report_net and the first step will always be called.
	// Use cases:
	// 1) the first substep stops the rest if data condition not met
	// 2) the first substep decide which of the rest of the steps should be run.
	// 3) external control
	//
	// ** It is the user's responsibility to not to put this blob in race conditions.
	// ** For example when setting this blob in concurrent substeps
	optional string should_stop_blob = 9;

	// if only_once is true, this step will only be executed once. this ONLY takes
	// effect when using should_stop_blob
	optional bool only_once = 10;

	// Whether to create a child workspace for this step.
	// If yes, the workflow and nets are re-created every time this step is run.
	optional bool create_workspace = 12;

	// How many copies of the children execution steps to run concurrently.
	optional int32 num_concurrent_instances = 13;
	}

	message PlanDef {
	// All the networks that are used in this execution. Note that networks should
	// be ordered in the way they are executed, i.e. for a layer in a network, all
	// its input blobs should already have been initialized by the layers or
	// networks defined before it.
	optional string name = 1;
	// The networks that are going to be used in this plan.
	repeated NetDef network = 2;
	repeated ExecutionStep execution_step = 3;
	}

	// Protobuf format for blobs that are not Tensors. We use a key to store the
	// type of the blob. For example for a serialized DBProto, the type should
	// be "DBReader" and the content should be a serialized DBProto object.
	message BlobProto {
	optional string name = 1;
	optional string type = 2;
	optional TensorProto tensor = 3;
	optional bytes content = 4;
	optional QTensorProto qtensor = 5;
	// If blob is not Tensor and is divided into chunks, content_num_chunks
	// contains number of chunks, into which blob was divided.
	optional int32 content_num_chunks = 6;
	optional int32 content_chunk_id = 7;
	}

	// Protobuf format to serialize DBReader.
	message DBReaderProto {
	// The name for the DB object in the workspace.
	optional string name = 1;
	// The source of the DB
	optional string source = 2;
	// The type of the DB
	optional string db_type = 3;
	// The current key of the DB if the DB supports seeking.
	optional string key = 4;
	}

	message BlobSerializationOptions {
	// This set of options will only apply to blobs whose name matches this
	// pattern. If the blob_name_pattern is empty then it will be treated as
	// matching all blobs.
	optional string blob_name_regex = 1;

	// Note:
	// - a chunk_size of 0 means "use the default chunk size". The default chunk
	// size is controlled by the --caffe2_tensor_chunk_size command line flag.
	// - a chunk size of -1 means to disable chunking, and serialize the blob in
	// a single chunk.
	optional int64 chunk_size = 2;

	enum FloatFormat {
	// Use the current default serialization format, as chosen by the
	// current version of the code. (At the time of writing this is PROTOBUF)
	FLOAT_DEFAULT = 0;
	// Store the data in the TensorProto's float_data field
	FLOAT_PROTOBUF = 1;
	// Serialize float values as bfloat16. Note that this conversion is lossy.
	FLOAT_BFLOAT16 = 2;
	}

	// Settings for how to serialize tensors containing float values
	optional FloatFormat float_format = 3;
	}

	message SerializationOptions {
	// A set of options to use when serialializing blobs.
	// This is a list, sorted from highest to lowest precedence. When
	// serializing a blob, the first entry whose blob_name_pattern matches the
	// blob name will be used.
	repeated BlobSerializationOptions options = 1;
	}