caffe2/proto/caffe2.proto - platform/external/pytorch - Git at Google

 syntax = "proto2";

 package caffe2;

 // A few notes about the Caffe2's protobuffer convention:
 // (1) Most objects are registered by their types, such as operators and nets.
 //     For these, we have a string-type field "type" for registration purposes.
 // (2) We do not use extension because that used to create quite some conflicts
 //     in Caffe's protobuf design.
 // (3) We have not used any proto3 specific features, such as Any or Map. This
 //     is mainly for backward compability purposes but we may consider using
 //     those in the future.

 // ExternalDataProto stores the pointer to the content of TensorProto
 // the content are stored in the raw format as little endian
 message ExternalDataProto {
   // type of the external storage type, can be the following:
   enum SourceType {
     // the container defined in torch/csrc/jit/serialization.h is used,
     // and record_id is the tag to help the runtime identify the data
     // this type of storage is set as DEFAULT and recommended for external
     // data storage
     INLINE_CONTAINER = 0;
     // use external file to store the data, and record_id is the POSIX relative path
     // to the file. this (simple) file is only for the data, and the data is stored
     // as little endian in the file
     SIMPLE_FILE = 1;
   }
   optional SourceType source_type = 1 [default = INLINE_CONTAINER];
   // used together with type
   optional string record_id = 2;
   // the size of the entire record (in bytes)
   optional uint64 record_size = 5;
   // the offset of the starting point, the content may be shared between
   // multiple tensors
   optional int64 offset = 3 [default = 0];
   // the strides of the content
   repeated int64 strides = 4;
 }

 // TensorProto stores serialized Tensor objects.
 message TensorProto {
   // The dimensions in the tensor.
   repeated int64 dims = 1;

   // Data type
   enum DataType {
     UNDEFINED = 0;

     // Basic types
     FLOAT = 1;     // float
     INT32 = 2;     // int
     BYTE = 3;      // byte, when deserialized, is going to be restored as uint8
     STRING = 4;    // string

     // Less-commonly used data types
     BOOL = 5;      // bool
     UINT8 = 6;     // uint8_t
     INT8 = 7;      // int8_t
     UINT16 = 8;    // uint16_t
     INT16 = 9;     // int16_t
     INT64 = 10;    // int64_t
     FLOAT16 = 12;  // at::Half
     DOUBLE = 13;   // double
   }
   optional DataType data_type = 2 [default = FLOAT];

   // data storage
   enum StorageType {
     // the content is stored in typed field, for example, if the data_type is
     // FLOAT, float_data is used to store the content.
     TYPED = 1;
     // the content is serialized in field raw_data as little-endian
     RAW = 2;
     // the pointer to the content is stored in field external_data
     // the content is serialized as little-endian
     EXTERNAL = 3;
     // When StorageType is NO_CONTENT, we use TensorProto to store only type
     // and shape information. Reuse TensorProto to store type and shape
     // because we can just have one proto, not having another ValueInfoProto
     NO_CONTENT = 4;
   }
   optional StorageType storage_type = 12 [default = TYPED];
   // For float
   repeated float float_data = 3 [packed = true];
   // For int32, uint8, int8, uint16, int16, bool, and float16
   // Note about float16: in storage we will basically convert float16 byte-wise
   // to unsigned short and then store them in the int32_data field.
   repeated int32 int32_data = 4 [packed = true];
   // For bytes
   optional bytes byte_data = 5;
   // For strings
   repeated bytes string_data = 6;
   // For double
   repeated double double_data = 9 [packed = true];
   // For int64
   repeated int64 int64_data = 10 [packed = true];
   // store the raw data, contents are serialized as little-endian
   optional bytes raw_data = 13;
   // store the pointer to the data
   optional ExternalDataProto external_data = 14;

   // Optionally, a name for the tensor.
   optional string name = 7;

   // Optionally, a TensorProto can contain the details about the device that
   // it was serialized from. This is useful in cases like snapshotting a whole
   // workspace in a multi-GPU environment.
   optional DeviceOption device_detail = 8;

   // When loading from chunks this is going to indicate where to put data in the
   // full array. When not used full data have to be present
   message Segment {
     required int64 begin = 1;
     required int64 end = 2;
   }
   optional Segment segment = 11;
 }

 message QTensorProto {
   repeated int64 dims = 1;
   required int32 precision = 2;
   required double scale = 3;
   required double bias = 4;
   required bool is_signed = 5;
   repeated int32 data = 6 [packed = true];
   optional string name = 7;
   optional TensorProto.DataType data_type = 8 [default = INT32];

   // Multi-group quantization params
   repeated double scales = 9;
   repeated double biases = 10;

   // Multi-group quantization needed, indicates in which dimension
   // we do the "group wise quantization"
   optional int32 axis = 11;

   // It should be true if it is a multi-group quantization proto
   optional bool is_multiparam = 12 [default = false];
 }

 // TensorProtos stores multiple TensorProto objects in one single proto. This
 // is useful for small tensors; For anything big, consider using a DB for
 // storage.
 message TensorProtos {
   repeated TensorProto protos = 1;
 }

 message TensorShape {
   repeated int64 dims = 1;
   optional TensorProto.DataType data_type = 2 [default = FLOAT];
   repeated int32 unknown_dims = 3;
   optional bool unknown_shape = 4 [default = false];
   optional string name = 5;
 }

 message TensorShapes {
   repeated TensorShape shapes = 1;
 }

 // A named argument containing either singular float, integer and string
 // values, or repeated float, int and string arrays.
 message Argument {
   optional string name = 1;

   optional float f = 2;
   optional int64 i = 3;
   optional bytes s = 4;
   optional TensorProto t = 10;
   optional NetDef n = 8;

   repeated float floats = 5;
   repeated int64 ints = 6;
   repeated bytes strings = 7;
   repeated TensorProto tensors = 11;
   repeated NetDef nets = 9;
   repeated QTensorProto qtensors = 12;
 }

 // DeviceType that Caffe2 currently supports.
 // Note: if you add a device type, make sure you add the corresponding device
 // line in the DeviceTypeName() function in caffe2/utils/proto_utils.cc
 // and update c10/core/DeviceType.h
 enum DeviceTypeProto {
   PROTO_CPU = 0;                    // In default, we will use CPU.
   PROTO_CUDA = 1;                   // CUDA.
   PROTO_MKLDNN = 2;                 // Reserved for explicit MKLDNN
   PROTO_OPENGL = 3;                 // OpenGL
   PROTO_OPENCL = 4;                 // OpenCL
   PROTO_IDEEP = 5;                  // IDEEP.
   PROTO_HIP = 6;                    // AMD HIP
   PROTO_FPGA = 7;                   // FPGA
   PROTO_MSNPU = 8;                  // MSNPU
   PROTO_XLA = 9;                    // XLA / TPU
   // Change the following number if you add more devices in the code.
   PROTO_COMPILE_TIME_MAX_DEVICE_TYPES = 10;
   PROTO_ONLY_FOR_TEST = 20901;   // This device type is only for test.
 }

 // Device-specific options. We do not distinguish DeviceOption protos for
 // different DeviceTypes, so currently all devices share the same DeviceOption
 // proto. Fields that are specific to a device type is ignored if the type does
 // not match.
 // Note: if you add fields to the DeviceOption, make sure you add the
 // corresponding changes to IsSameDevice() function in utils/proto_utils.{h,cc}.
 message DeviceOption {
   // [general] Options that need to be carried out before running the execution.
   // optional DeviceType device_type = 1 [ default = CPU ];
   optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU.
   // [general] Used together with device_type to identify the exact device
   optional int32 device_id = 2;
   // [general] The random seed to start the device random number generator with.
   optional uint32 random_seed = 3;
   // [general] What node this op should execute on.
   // Used for net transformation purposes. Must be empty at execution time.
   optional string node_name = 4;
   // [CPU and Linux specific] NUMA node id
   optional int32 numa_node_id = 5;
   // [general] Extra information passed, not used at execution time currently.
   repeated string extra_info = 6;
 }

 // Operator Definition.
 message OperatorDef {
   repeated string input = 1; // the name of the input blobs
   repeated string output = 2; // the name of output top blobs
   optional string name = 3; // the operator name. This is optional.
   // the operator type. This is needed to create the object from the operator
   // registry.
   optional string type = 4;
   // arg is for the argument defined in operator schema
   repeated Argument arg = 5;

   // The device option that the operator should run under.
   optional DeviceOption device_option = 6;

   // Optionally, one can specify an engine when there are multiple
   // implementations available simultaneously for one device type.
   // If one specifies an engine but that engine does not exist in the compiled
   // Caffe2 binary, Caffe2 will fall back to the default engine of that device
   // type.
   optional string engine = 7;


   // Additional 'fake' inputs used for expressing control dependencies
   // in the operator graph. This can be used to ensure that an
   // operator does not run until another operator is ready, for e.g.
   // scheduling control. These are not passed as actual inputs to the
   // Operator implementation, and are only used by the Net class for
   // scheduling purposes.
   repeated string control_input = 8;

   // is_gradient_op argument is only used as a hint in shape inference
   // and has no runtime significance
   optional bool is_gradient_op = 9 [default = false];

   // debug information associated with the construction of the operator.
   // This is an optional string with no assumed characteristics as
   // operators can be constructed in any language.
   optional string debug_info = 10;

   // the domain of the operator to help runtime distinguish which operator
   // library this OperatorDef refers to. For example, both caffe2 and aten
   // has `Add` operator, with domain, we can easily decide which operator
   // to execute. to support multiple operator libs, we use domain to
   // distinguish which operator lib we refer to:
   //   - "caffe2" means this uses Caffe2 operator library
   //   - "aten" means this uses ATen operator library
   //   - "c10" is for the fused library
   //   - if the domain is missing or empty, we use "caffe2", this is for
   //     legacy models, new serializer should always export an OperatorDef
   //     with domain and op_version
   optional string domain = 11;
   // each operator is has its own version number.
   // operator version information
   // each time, we change the API or semantics of the operator,
   // we bump the version for the operator.
   // the runtime system should check the op_version of each OperatorDef
   // and decide it should reject or accept the model
   optional int64 op_version = 12;
 }

 // Network definition.
 message NetDef {
   optional string name = 1; // the network's name
   // Operators that the network contains.
   // Note: this is not named "operator" because that is a reserved word in C++.
   repeated OperatorDef op = 2;

   // The type of network that the net should be run with. This routes the
   // network instantiation to different execution modes. The default mode,
   // "simple", runs the operators in a sequential way as the original Caffe
   // implementation does.
   optional string type = 3;

   // the number of workers, if the operators in the network is to be carried out
   // in parallel.
   // Note: This is to be deprecated. Using the arg field with "num_workers" as
   // key.
   optional int32 num_workers = 4 [deprecated=true];

   // The device option for the network. If a network has a specific device
   // option and one of its operators does not have it set, we will copy over the
   // device option to the operator. This allows us to basically avoid putting
   // device options at every operator.
   optional DeviceOption device_option = 5;

   repeated Argument arg = 6;

   // Two optional fields to declare external input and output of a net.
   // If these two are set, when a net is created, we will sanity check for
   // every op whether its input is declared (either as an external input,
   // or as an intermediate blob created by one of the ops), and sanity check
   // if all blobs in external_output are produced.
   //
   // In cases of memory optimization, declaring external_input and
   // external_output also ensures that storage of these blobs are persistent:
   // for any blob in external_input and external_output, after a network run
   // finishes, their content are actually the right content. Any intermediate
   // blobs' contents may be overwritten.
   repeated string external_input = 7;
   repeated string external_output = 8;
 }

 // ExecutionStep is actually a sort-of-hacky way we simulate iteration right
 // now.
 message ExecutionStep {
   // ExecutionStep should either contain a set of substeps, or a set of
   // network names to run in this execution step. They should NOT both be set
   // at the same time.
   optional string name = 1;
   // An execution step could be recursive, in which it involves a set of
   // substeps.
   repeated ExecutionStep substep = 2;
   // Alternatively, an execution step could involve one or more networks.
   // Note that you cannot have both substeps and networks. Choose one.
   // Note that an execution step refers networks by their name. The actual
   // network definition of the same name should be included in the network field
   // of the plan. The reason is that a network object might hold internal states
   // (think of a data layer), so we want to have the same network object that
   // multiple steps could ask to run.
   repeated string network = 3;
   // Number of iterations to run this step. The substeps or the networks
   // specified will be run sequentially, and one sequential run is considered
   // one iteration. If this is not set, the number of iterations is assumed to
   // be 1.
   optional int64 num_iter = 4;

   // Criteria network specifies a single output (TensorCPU<bool>) of
   // size (1), is run on every iteration by the executor, and
   // execution terminates when the output[0] is `false`.
   optional string criteria_network = 5 [deprecated=true];

   // DEPRECATED. Use `run_every_ms`.
   optional string report_net = 7;
   optional int32 report_interval = 8;

   // If provided, execute this step at every time interval (in millisecs)
   // while its sibiling execution steps execute in parallel. This step is
   // guaranteed to run at least once after all non-interval siblings finished.
   optional int64 run_every_ms = 11;

   // If false or not set, execute sub-steps serially.
   // If true, execute all substeps concurrently, each one in a separte thread.
   optional bool concurrent_substeps = 6;

   // Name of a scalar boolean tensor.
   // ES checks this blob AFTER every substeps/subnets.
   // If specified, and the value is true, then ES will skip the rest and return
   // immediately.
   // This means that the report_net and the first step will always be called.
   // Use cases:
   // 1) the first substep stops the rest if data condition not met
   // 2) the first substep decide which of the rest of the steps should be run.
   // 3) external control
   //
   // ** It is the user's responsibility to not to put this blob in race conditions.
   // ** For example when setting this blob in concurrent substeps
   optional string should_stop_blob = 9;

   // if only_once is true, this step will only be executed once. this ONLY takes
   // effect when using should_stop_blob
   optional bool only_once = 10;

   // Whether to create a child workspace for this step.
   // If yes, the workflow and nets are re-created every time this step is run.
   optional bool create_workspace = 12;

   // How many copies of the children execution steps to run concurrently.
   optional int32 num_concurrent_instances = 13;
 }

 message PlanDef {
   // All the networks that are used in this execution. Note that networks should
   // be ordered in the way they are executed, i.e. for a layer in a network, all
   // its input blobs should already have been initialized by the layers or
   // networks defined before it.
   optional string name = 1;
   // The networks that are going to be used in this plan.
   repeated NetDef network = 2;
   repeated ExecutionStep execution_step = 3;
 }

 // Protobuf format for blobs that are not Tensors. We use a key to store the
 // type of the blob. For example for a serialized DBProto, the type should
 // be "DBReader" and the content should be a serialized DBProto object.
 message BlobProto {
   optional string name = 1;
   optional string type = 2;
   optional TensorProto tensor = 3;
   optional bytes content = 4;
   optional QTensorProto qtensor = 5;
   // If blob is not Tensor and is divided into chunks, content_num_chunks
   // contains number of chunks, into which blob was divided.
   optional int32 content_num_chunks = 6;
   optional int32 content_chunk_id = 7;
 }

 // Protobuf format to serialize DBReader.
 message DBReaderProto {
   // The name for the DB object in the workspace.
   optional string name = 1;
   // The source of the DB
   optional string source = 2;
   // The type of the DB
   optional string db_type = 3;
   // The current key of the DB if the DB supports seeking.
   optional string key = 4;
 }
	syntax = "proto2";

	package caffe2;

	// A few notes about the Caffe2's protobuffer convention:
	// (1) Most objects are registered by their types, such as operators and nets.
	// For these, we have a string-type field "type" for registration purposes.
	// (2) We do not use extension because that used to create quite some conflicts
	// in Caffe's protobuf design.
	// (3) We have not used any proto3 specific features, such as Any or Map. This
	// is mainly for backward compability purposes but we may consider using
	// those in the future.

	// ExternalDataProto stores the pointer to the content of TensorProto
	// the content are stored in the raw format as little endian
	message ExternalDataProto {
	// type of the external storage type, can be the following:
	enum SourceType {
	// the container defined in torch/csrc/jit/serialization.h is used,
	// and record_id is the tag to help the runtime identify the data
	// this type of storage is set as DEFAULT and recommended for external
	// data storage
	INLINE_CONTAINER = 0;
	// use external file to store the data, and record_id is the POSIX relative path
	// to the file. this (simple) file is only for the data, and the data is stored
	// as little endian in the file
	SIMPLE_FILE = 1;
	}
	optional SourceType source_type = 1 [default = INLINE_CONTAINER];
	// used together with type
	optional string record_id = 2;
	// the size of the entire record (in bytes)
	optional uint64 record_size = 5;
	// the offset of the starting point, the content may be shared between
	// multiple tensors
	optional int64 offset = 3 [default = 0];
	// the strides of the content
	repeated int64 strides = 4;
	}

	// TensorProto stores serialized Tensor objects.
	message TensorProto {
	// The dimensions in the tensor.
	repeated int64 dims = 1;

	// Data type
	enum DataType {
	UNDEFINED = 0;

	// Basic types
	FLOAT = 1; // float
	INT32 = 2; // int
	BYTE = 3; // byte, when deserialized, is going to be restored as uint8
	STRING = 4; // string

	// Less-commonly used data types
	BOOL = 5; // bool
	UINT8 = 6; // uint8_t
	INT8 = 7; // int8_t
	UINT16 = 8; // uint16_t
	INT16 = 9; // int16_t
	INT64 = 10; // int64_t
	FLOAT16 = 12; // at::Half
	DOUBLE = 13; // double
	}
	optional DataType data_type = 2 [default = FLOAT];

	// data storage
	enum StorageType {
	// the content is stored in typed field, for example, if the data_type is
	// FLOAT, float_data is used to store the content.
	TYPED = 1;
	// the content is serialized in field raw_data as little-endian
	RAW = 2;
	// the pointer to the content is stored in field external_data
	// the content is serialized as little-endian
	EXTERNAL = 3;
	// When StorageType is NO_CONTENT, we use TensorProto to store only type
	// and shape information. Reuse TensorProto to store type and shape
	// because we can just have one proto, not having another ValueInfoProto
	NO_CONTENT = 4;
	}
	optional StorageType storage_type = 12 [default = TYPED];
	// For float
	repeated float float_data = 3 [packed = true];
	// For int32, uint8, int8, uint16, int16, bool, and float16
	// Note about float16: in storage we will basically convert float16 byte-wise
	// to unsigned short and then store them in the int32_data field.
	repeated int32 int32_data = 4 [packed = true];
	// For bytes
	optional bytes byte_data = 5;
	// For strings
	repeated bytes string_data = 6;
	// For double
	repeated double double_data = 9 [packed = true];
	// For int64
	repeated int64 int64_data = 10 [packed = true];
	// store the raw data, contents are serialized as little-endian
	optional bytes raw_data = 13;
	// store the pointer to the data
	optional ExternalDataProto external_data = 14;

	// Optionally, a name for the tensor.
	optional string name = 7;

	// Optionally, a TensorProto can contain the details about the device that
	// it was serialized from. This is useful in cases like snapshotting a whole
	// workspace in a multi-GPU environment.
	optional DeviceOption device_detail = 8;

	// When loading from chunks this is going to indicate where to put data in the
	// full array. When not used full data have to be present
	message Segment {
	required int64 begin = 1;
	required int64 end = 2;
	}
	optional Segment segment = 11;
	}

	message QTensorProto {
	repeated int64 dims = 1;
	required int32 precision = 2;
	required double scale = 3;
	required double bias = 4;
	required bool is_signed = 5;
	repeated int32 data = 6 [packed = true];
	optional string name = 7;
	optional TensorProto.DataType data_type = 8 [default = INT32];

	// Multi-group quantization params
	repeated double scales = 9;
	repeated double biases = 10;

	// Multi-group quantization needed, indicates in which dimension
	// we do the "group wise quantization"
	optional int32 axis = 11;

	// It should be true if it is a multi-group quantization proto
	optional bool is_multiparam = 12 [default = false];
	}

	// TensorProtos stores multiple TensorProto objects in one single proto. This
	// is useful for small tensors; For anything big, consider using a DB for
	// storage.
	message TensorProtos {
	repeated TensorProto protos = 1;
	}

	message TensorShape {
	repeated int64 dims = 1;
	optional TensorProto.DataType data_type = 2 [default = FLOAT];
	repeated int32 unknown_dims = 3;
	optional bool unknown_shape = 4 [default = false];
	optional string name = 5;
	}

	message TensorShapes {
	repeated TensorShape shapes = 1;
	}

	// A named argument containing either singular float, integer and string
	// values, or repeated float, int and string arrays.
	message Argument {
	optional string name = 1;

	optional float f = 2;
	optional int64 i = 3;
	optional bytes s = 4;
	optional TensorProto t = 10;
	optional NetDef n = 8;

	repeated float floats = 5;
	repeated int64 ints = 6;
	repeated bytes strings = 7;
	repeated TensorProto tensors = 11;
	repeated NetDef nets = 9;
	repeated QTensorProto qtensors = 12;
	}

	// DeviceType that Caffe2 currently supports.
	// Note: if you add a device type, make sure you add the corresponding device
	// line in the DeviceTypeName() function in caffe2/utils/proto_utils.cc
	// and update c10/core/DeviceType.h
	enum DeviceTypeProto {
	PROTO_CPU = 0; // In default, we will use CPU.
	PROTO_CUDA = 1; // CUDA.
	PROTO_MKLDNN = 2; // Reserved for explicit MKLDNN
	PROTO_OPENGL = 3; // OpenGL
	PROTO_OPENCL = 4; // OpenCL
	PROTO_IDEEP = 5; // IDEEP.
	PROTO_HIP = 6; // AMD HIP
	PROTO_FPGA = 7; // FPGA
	PROTO_MSNPU = 8; // MSNPU
	PROTO_XLA = 9; // XLA / TPU
	// Change the following number if you add more devices in the code.
	PROTO_COMPILE_TIME_MAX_DEVICE_TYPES = 10;
	PROTO_ONLY_FOR_TEST = 20901; // This device type is only for test.
	}

	// Device-specific options. We do not distinguish DeviceOption protos for
	// different DeviceTypes, so currently all devices share the same DeviceOption
	// proto. Fields that are specific to a device type is ignored if the type does
	// not match.
	// Note: if you add fields to the DeviceOption, make sure you add the
	// corresponding changes to IsSameDevice() function in utils/proto_utils.{h,cc}.
	message DeviceOption {
	// [general] Options that need to be carried out before running the execution.
	// optional DeviceType device_type = 1 [ default = CPU ];
	optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU.
	// [general] Used together with device_type to identify the exact device
	optional int32 device_id = 2;
	// [general] The random seed to start the device random number generator with.
	optional uint32 random_seed = 3;
	// [general] What node this op should execute on.
	// Used for net transformation purposes. Must be empty at execution time.
	optional string node_name = 4;
	// [CPU and Linux specific] NUMA node id
	optional int32 numa_node_id = 5;
	// [general] Extra information passed, not used at execution time currently.
	repeated string extra_info = 6;
	}

	// Operator Definition.
	message OperatorDef {
	repeated string input = 1; // the name of the input blobs
	repeated string output = 2; // the name of output top blobs
	optional string name = 3; // the operator name. This is optional.
	// the operator type. This is needed to create the object from the operator
	// registry.
	optional string type = 4;
	// arg is for the argument defined in operator schema
	repeated Argument arg = 5;

	// The device option that the operator should run under.
	optional DeviceOption device_option = 6;

	// Optionally, one can specify an engine when there are multiple
	// implementations available simultaneously for one device type.
	// If one specifies an engine but that engine does not exist in the compiled
	// Caffe2 binary, Caffe2 will fall back to the default engine of that device
	// type.
	optional string engine = 7;


	// Additional 'fake' inputs used for expressing control dependencies
	// in the operator graph. This can be used to ensure that an
	// operator does not run until another operator is ready, for e.g.
	// scheduling control. These are not passed as actual inputs to the
	// Operator implementation, and are only used by the Net class for
	// scheduling purposes.
	repeated string control_input = 8;

	// is_gradient_op argument is only used as a hint in shape inference
	// and has no runtime significance
	optional bool is_gradient_op = 9 [default = false];

	// debug information associated with the construction of the operator.
	// This is an optional string with no assumed characteristics as
	// operators can be constructed in any language.
	optional string debug_info = 10;

	// the domain of the operator to help runtime distinguish which operator
	// library this OperatorDef refers to. For example, both caffe2 and aten
	// has `Add` operator, with domain, we can easily decide which operator
	// to execute. to support multiple operator libs, we use domain to
	// distinguish which operator lib we refer to:
	// - "caffe2" means this uses Caffe2 operator library
	// - "aten" means this uses ATen operator library
	// - "c10" is for the fused library
	// - if the domain is missing or empty, we use "caffe2", this is for
	// legacy models, new serializer should always export an OperatorDef
	// with domain and op_version
	optional string domain = 11;
	// each operator is has its own version number.
	// operator version information
	// each time, we change the API or semantics of the operator,
	// we bump the version for the operator.
	// the runtime system should check the op_version of each OperatorDef
	// and decide it should reject or accept the model
	optional int64 op_version = 12;
	}

	// Network definition.
	message NetDef {
	optional string name = 1; // the network's name
	// Operators that the network contains.
	// Note: this is not named "operator" because that is a reserved word in C++.
	repeated OperatorDef op = 2;

	// The type of network that the net should be run with. This routes the
	// network instantiation to different execution modes. The default mode,
	// "simple", runs the operators in a sequential way as the original Caffe
	// implementation does.
	optional string type = 3;

	// the number of workers, if the operators in the network is to be carried out
	// in parallel.
	// Note: This is to be deprecated. Using the arg field with "num_workers" as
	// key.
	optional int32 num_workers = 4 [deprecated=true];

	// The device option for the network. If a network has a specific device
	// option and one of its operators does not have it set, we will copy over the
	// device option to the operator. This allows us to basically avoid putting
	// device options at every operator.
	optional DeviceOption device_option = 5;

	repeated Argument arg = 6;

	// Two optional fields to declare external input and output of a net.
	// If these two are set, when a net is created, we will sanity check for
	// every op whether its input is declared (either as an external input,
	// or as an intermediate blob created by one of the ops), and sanity check
	// if all blobs in external_output are produced.
	//
	// In cases of memory optimization, declaring external_input and
	// external_output also ensures that storage of these blobs are persistent:
	// for any blob in external_input and external_output, after a network run
	// finishes, their content are actually the right content. Any intermediate
	// blobs' contents may be overwritten.
	repeated string external_input = 7;
	repeated string external_output = 8;
	}

	// ExecutionStep is actually a sort-of-hacky way we simulate iteration right
	// now.
	message ExecutionStep {
	// ExecutionStep should either contain a set of substeps, or a set of
	// network names to run in this execution step. They should NOT both be set
	// at the same time.
	optional string name = 1;
	// An execution step could be recursive, in which it involves a set of
	// substeps.
	repeated ExecutionStep substep = 2;
	// Alternatively, an execution step could involve one or more networks.
	// Note that you cannot have both substeps and networks. Choose one.
	// Note that an execution step refers networks by their name. The actual
	// network definition of the same name should be included in the network field
	// of the plan. The reason is that a network object might hold internal states
	// (think of a data layer), so we want to have the same network object that
	// multiple steps could ask to run.
	repeated string network = 3;
	// Number of iterations to run this step. The substeps or the networks
	// specified will be run sequentially, and one sequential run is considered
	// one iteration. If this is not set, the number of iterations is assumed to
	// be 1.
	optional int64 num_iter = 4;

	// Criteria network specifies a single output (TensorCPU<bool>) of
	// size (1), is run on every iteration by the executor, and
	// execution terminates when the output[0] is `false`.
	optional string criteria_network = 5 [deprecated=true];

	// DEPRECATED. Use `run_every_ms`.
	optional string report_net = 7;
	optional int32 report_interval = 8;

	// If provided, execute this step at every time interval (in millisecs)
	// while its sibiling execution steps execute in parallel. This step is
	// guaranteed to run at least once after all non-interval siblings finished.
	optional int64 run_every_ms = 11;

	// If false or not set, execute sub-steps serially.
	// If true, execute all substeps concurrently, each one in a separte thread.
	optional bool concurrent_substeps = 6;

	// Name of a scalar boolean tensor.
	// ES checks this blob AFTER every substeps/subnets.
	// If specified, and the value is true, then ES will skip the rest and return
	// immediately.
	// This means that the report_net and the first step will always be called.
	// Use cases:
	// 1) the first substep stops the rest if data condition not met
	// 2) the first substep decide which of the rest of the steps should be run.
	// 3) external control
	//
	// ** It is the user's responsibility to not to put this blob in race conditions.
	// ** For example when setting this blob in concurrent substeps
	optional string should_stop_blob = 9;

	// if only_once is true, this step will only be executed once. this ONLY takes
	// effect when using should_stop_blob
	optional bool only_once = 10;

	// Whether to create a child workspace for this step.
	// If yes, the workflow and nets are re-created every time this step is run.
	optional bool create_workspace = 12;

	// How many copies of the children execution steps to run concurrently.
	optional int32 num_concurrent_instances = 13;
	}

	message PlanDef {
	// All the networks that are used in this execution. Note that networks should
	// be ordered in the way they are executed, i.e. for a layer in a network, all
	// its input blobs should already have been initialized by the layers or
	// networks defined before it.
	optional string name = 1;
	// The networks that are going to be used in this plan.
	repeated NetDef network = 2;
	repeated ExecutionStep execution_step = 3;
	}

	// Protobuf format for blobs that are not Tensors. We use a key to store the
	// type of the blob. For example for a serialized DBProto, the type should
	// be "DBReader" and the content should be a serialized DBProto object.
	message BlobProto {
	optional string name = 1;
	optional string type = 2;
	optional TensorProto tensor = 3;
	optional bytes content = 4;
	optional QTensorProto qtensor = 5;
	// If blob is not Tensor and is divided into chunks, content_num_chunks
	// contains number of chunks, into which blob was divided.
	optional int32 content_num_chunks = 6;
	optional int32 content_chunk_id = 7;
	}

	// Protobuf format to serialize DBReader.
	message DBReaderProto {
	// The name for the DB object in the workspace.
	optional string name = 1;
	// The source of the DB
	optional string source = 2;
	// The type of the DB
	optional string db_type = 3;
	// The current key of the DB if the DB supports seeking.
	optional string key = 4;
	}