schema/program.fbs - platform/external/executorch - Git at Google

 // Copyright (c) Meta Platforms, Inc. and affiliates.

 //
 // See README.md before modifying this file.
 //

 include "scalar_type.fbs";

 namespace executorch_flatbuffer;

 // Identifier of a valid executor schema.
 file_identifier "ET12";
 // Extension of written files.
 file_extension "pte";

 // Table that contains the metadata about how
 // to unflatten the flattened input/output from compiler
 table ContainerMetadata {
   encoded_inp_str:string;
   encoded_out_str:string;
 }

 table Null {}

 // Contains information relevant to the allocation of non-constant
 // buffer data (e.g. from tensors).
 // This refers to where the buffer needs to be placed in an existing
 // memory and at what offset from its base address.
 table AllocationDetails {
   memory_id:uint;  // ID of the memory where this data needs to be placed.
   memory_offset:uint;  // Offset (in bytes) w.r.t. the memory base address.
 }

 // Indicates the types of shape a Tensor may have, from the point
 // of view of their dynamism.
 enum TensorShapeDynamism : byte {
   // Static shape. Memory is allocated by the compiler.
   STATIC = 0,
   // Dynamic shape but with an upper bound.
   // Memory is allocated by the compiler.
   DYNAMIC_BOUND = 1,
   // Dynamic shape without upper bound.
   // Memory allocation is handled by the runtime.
   DYNAMIC_UNBOUND = 2,
 }

 table Tensor {
   scalar_type:ScalarType;

   // Offset in scalar_type elements (e.g., multiples of 4 bytes for an int
   // scalar type) from the beginning of the tensor buffer to the beginning of
   // the actual data. Currently, the runtime only supports a value of zero.
   storage_offset:int;

   sizes:[int];

   // Specifies in what order the dimensions are laid out in memory (from outer to inner).
   // For example, given a rank 3 Tensor of size (3, 5, 2). If we name dimensions: [row, column, batch], then a dim_order of:
   // (2, 0, 1) represents a [batch, row, column] ordering where "column" is the innermost dimension, then comes "row", and the outermost dimension is "batch".
   // (0, 2, 1) represents a [row, batch, column] ordering where "column" is the innermost dimension, then comes "batch", and the outermost dimension is "row".
   dim_order:[ubyte];

   // out of scope M1
   requires_grad:bool;

   // Overall, a Tensor is either constant or non-constant, except we differentiate 2 special
   // variants of non-constant Tensor ("input" and control-flow "placeholder") as a special
   // optimization to avoid holding unnecessary AllocationDetails.
   // In summary:
   //   constant_buffer_idx > 0, pre_allocation = Null: Tensor is a constant
   //   constant_buffer_idx = 0, pre_allocation = Non Null: Tensor is a non-constant.
   //   constant_buffer_idx = 0, pre_allocation = Null: Tensor is a non-constant
   //     that will receive a dataptr at input time or during execution.
   //
   // Index to the program's constant buffer table, value 0 is reserved to indicate non constant
   constant_buffer_idx:uint;

   // [Optional] preallocation details for non-constants (null otherwise).
   allocation_info:AllocationDetails;

   // May not be needed.
   layout:byte;

   // Determines the type of the tensor's shape, from the point of view of its
   // dynamic or not behavior, and consequently how the allocation of the
   // underlying memory is handled, and also how to interpret the sizes and
   // strides fields.
   // 1. dynamism == STATIC: sizes field represents the static shape of
   //    the tensor.
   // 2. dynamism == DYNAMIC_BOUND: sizes field represents the upper bound shape
   //    of the tensor. Each dimension of the tensor at runtime should never
   //    exceed the corresponding dimension of the upper bound shape.
   //
   // 3. dynamism == DYNAMIC_UNBOUND: the stored sizes field can be ignored since
   //    shape is fully dynamic.
   shape_dynamism: TensorShapeDynamism;
 }

 table Int {
   int_val:long;
 }

 table Bool {
   bool_val:bool;
 }

 table Double {
   double_val:double;
 }

 table String {
   string_val:string;
 }

 table IntList {
   items:[long];
 }

 table DoubleList {
   items:[double];
 }

 table BoolList {
   items:[bool];
 }

 // Unlike primitive lists, tensor lists have mutable members and aliasing behavior when
 // elements are added to them. To match this aliasing behavior, the runtime tensor list is
 // serialized by serializing its elements into the ExecutionPlan.values array, and then
 // serializing their corresponding indices into TensorList.items.
 table TensorList {
   items:[int];  // EValue indices.
 }

 // Similar to TensorList except the indices can also point to None.
 table OptionalTensorList {
   items:[int];
 }

 // Supported values in Executorch kernels, Enums are serialized as ints.
 union KernelTypes {
   Null,
   Int,
   Bool,
   Double,
   Tensor,
   String,
   IntList,
   DoubleList,
   BoolList,
   TensorList,
   OptionalTensorList,
 }

 // Abstraction for program values. A subset of types supported in core pytorch kernels.
 table EValue {
   val:KernelTypes;
 }

 table Operator {
   // Operator registry and lookup is uniquely identified by its name, and overload name.
   // TODO(larryliu): is there a more efficient way to represent this
   name:string;
   overload:string;
 }

 table KernelCall {
   // Index to the operators table in the program.
   op_index:int;

   // Indexes to the (values) required by the operation (in and out).
   args:[int];
 }

 table DelegateCall {
   // Index to the delegates table in the program.
   delegate_index:int;

   // Indexes to the (values) required by the delegates (in and out).
   args:[int];
 }

 table MoveCall {
   // Index into the values table of the evalue we are moving from
   move_from: int;

   // Index into the values table of the evalue we are moving into
   move_to: int;
 }

 table JumpFalseCall {
   // Index into the values table of boolean that specifies whether or not to jump
   cond_value_index: int;

   // Value to set the executor program counter if the jump occurs
   destination_instruction: int;
 }

 table FreeCall {
   // Index into values table of the tensor whose underlying data blob is being freed
   value_index: int;
 }

 union InstructionArguments {
   KernelCall,
   DelegateCall,
   MoveCall,
   JumpFalseCall,
   FreeCall,
 }

 // Basic unit of execution
 table Instruction {
   instr_args:InstructionArguments;
 }

 table Frame {
   // For storing the frame to print stacktraces
   filename:string;  // Name of the file in which the instruction exists
   lineno:int;       // Line number at which the instruction was called
   name:string;      // Name of the function the instruction was called from
   context:string;   // Source code of the instruction
 }

 table FrameList {
   // For storing the frames to print stacktraces
   items:[Frame];
 }

 // Indicates where a piece of data is stored.
 enum DataLocation : byte {
   // Stored directly in the flatbuffer.
   INLINE = 0,
   // Stored in a segment.
   SEGMENT = 1,
 }

 // Indicates where the delegate data is stored
 table BackendDelegateDataReference {
   // Indicates which list to index into:
   //     INLINE -> Program.backend_delegate_data
   //     SEGMENT -> Program.segments
   location: DataLocation;

   // The index into the list indicated by the location.
   index: uint;
 }

 table CompileSpec {
   // One compile spec. There are can be multiple specs for one method
   key: string; // like max_value
   value: [ubyte]; // like 4, or other types based on needs.
 }

 table BackendDelegate {
   // Used to resolve the delegate backend classes, for example, "TCE0", "TCE1", etc.
   // This string is also used in to_backend.
   id: string;

   // A binary blob (from a subgraph) as an output of preprocessing. Will be
   // provided to the backend code at init time. Can be very large, on the
   // order of 10-100MB.
   processed: BackendDelegateDataReference;

   // The compilation spec for the lowered module's forward function
   // Example: [CompileSpec["max_value", 4]]
   compile_specs: [CompileSpec];
 }

 // A sequence of blocking instructions to be executed in order. The abstraction is not currently leveraged,
 // all current programs are 1 chain. We are leaving chains as part of the program definition for future
 // use cases around graph level async where different threads will be represented as seperate chains.
 table Chain {
   // Indices of the values that are (non-static) inputs into this Chain.
   inputs:[int];

   // Indices of the values that are outputs out of this Chain.
   outputs:[int];

   // List of instructions to be executed in order.
   instructions:[Instruction];

   // Optional list of frames for each instruction.
   // The backend config must have 'emit_stacktrace' set to true to emit
   stacktrace:[FrameList];
 }

 table ExecutionPlan {

   // Name of a method on the nn.Module that was traced to create this program.
   name: string;

   // Type meta data for input/output to the execution plan
   container_meta_type: ContainerMetadata;

   // A list of all values used in this execution plan.
   values:[EValue];

   // Indices to the 'Evalues' that are inputs to this execution plan.
   // This list contains only the non-constant tensors (i.e. not part of
   // the saved program).
   inputs:[int];

   // Indices to the 'Evalues' that are outputs of this execution plan.
   // This signals a lifespan that goes beyond the execution.
   outputs:[int];

   // List of Chains of kernels.
   chains:[Chain];

   // Operators used in this execution plan
   operators:[Operator];

   // A list of delegates and each is a special instance of execution, the same level of chains.
   delegates: [BackendDelegate];

   // List of buffer sizes for non_constant memory allocations. (Think neural net activations)
   // A list instead of a single buffer to account for complex memory heirarchies.
   // TODO(jakeszwe, razy): How to reconcile this with the ability for the hierarchical memory allocator
   // to be id based instead of index based.
   // Runtime should use the len(constant_buffer) as the ground truth of the
   // constants memory buffer size, and ignore non_const_buffer_sizes[0].
   non_const_buffer_sizes: [int64];

 }

 // Constant tensor data stored directly in the flatbuffer.
 table Buffer {
   // During serialization, this alignment may be rewritten to a larger value.
   // The magic "@executorch-tensor-alignment" comment tells EXIR which lines to
   // patch.
   storage:[ubyte] (force_align: 16);  // @executorch-tensor-alignment
 }

 // Delegate data stored directly in the flatbuffer. This is a different type
 // than Buffer because tensors and delegates can have different alignment
 // requirements.
 table BackendDelegateInlineData {
   // During serialization, this alignment may be rewritten to a larger value.
   // The magic "@executorch-delegate-alignment" comment tells EXIR which lines
   // to patch.
   data: [ubyte] (force_align: 16);  // @executorch-delegate-alignment
 }

 // Describes a contiguous piece of data that lives outside of the flatbuffer data,
 // typically appended afterwards in the file. The "extended header" in the file,
 // when present, points to the segment base offset.
 table DataSegment {
   // Segment offsets are relative to the segment base offset provided in
   // the extended file header. Segments will typically be aligned in a
   // way to make it possible to use mmap() to load them.
   offset: uint64;

   // The size in bytes of valid data starting at the offset. The segment
   // data may be followed by padding before the segment that follows it,
   // to make it easier to use mmap().
   size: uint64;
 }

 table Program {
   // Schema version.
   version:uint;

   // List of ExecutionPlans that make up the program. Each ExecutionPlan corresponds with a
   // different entry point into the model.
   execution_plan:[ExecutionPlan];

   // Tables of constant data, used for constant Values (e.g.data field of weight tensors).
   // Each constant is assigned an index into the table which are each individually aligned.
   // 0 index is reserved to be pointed to by non-constant Tensors
   constant_buffer:[Buffer];

   // List of delegate data. Pointed to by BackendDelegateDataReference.
   backend_delegate_data:[BackendDelegateInlineData];

   // List of data segments that follow the Program data in this file, sorted by
   // offset. Elements in this schema can refer to these segments by index.
   segments:[DataSegment];
 }

 root_type Program;
	// Copyright (c) Meta Platforms, Inc. and affiliates.

	//
	// See README.md before modifying this file.
	//

	include "scalar_type.fbs";

	namespace executorch_flatbuffer;

	// Identifier of a valid executor schema.
	file_identifier "ET12";
	// Extension of written files.
	file_extension "pte";

	// Table that contains the metadata about how
	// to unflatten the flattened input/output from compiler
	table ContainerMetadata {
	encoded_inp_str:string;
	encoded_out_str:string;
	}

	table Null {}

	// Contains information relevant to the allocation of non-constant
	// buffer data (e.g. from tensors).
	// This refers to where the buffer needs to be placed in an existing
	// memory and at what offset from its base address.
	table AllocationDetails {
	memory_id:uint; // ID of the memory where this data needs to be placed.
	memory_offset:uint; // Offset (in bytes) w.r.t. the memory base address.
	}

	// Indicates the types of shape a Tensor may have, from the point
	// of view of their dynamism.
	enum TensorShapeDynamism : byte {
	// Static shape. Memory is allocated by the compiler.
	STATIC = 0,
	// Dynamic shape but with an upper bound.
	// Memory is allocated by the compiler.
	DYNAMIC_BOUND = 1,
	// Dynamic shape without upper bound.
	// Memory allocation is handled by the runtime.
	DYNAMIC_UNBOUND = 2,
	}

	table Tensor {
	scalar_type:ScalarType;

	// Offset in scalar_type elements (e.g., multiples of 4 bytes for an int
	// scalar type) from the beginning of the tensor buffer to the beginning of
	// the actual data. Currently, the runtime only supports a value of zero.
	storage_offset:int;

	sizes:[int];

	// Specifies in what order the dimensions are laid out in memory (from outer to inner).
	// For example, given a rank 3 Tensor of size (3, 5, 2). If we name dimensions: [row, column, batch], then a dim_order of:
	// (2, 0, 1) represents a [batch, row, column] ordering where "column" is the innermost dimension, then comes "row", and the outermost dimension is "batch".
	// (0, 2, 1) represents a [row, batch, column] ordering where "column" is the innermost dimension, then comes "batch", and the outermost dimension is "row".
	dim_order:[ubyte];

	// out of scope M1
	requires_grad:bool;

	// Overall, a Tensor is either constant or non-constant, except we differentiate 2 special
	// variants of non-constant Tensor ("input" and control-flow "placeholder") as a special
	// optimization to avoid holding unnecessary AllocationDetails.
	// In summary:
	// constant_buffer_idx > 0, pre_allocation = Null: Tensor is a constant
	// constant_buffer_idx = 0, pre_allocation = Non Null: Tensor is a non-constant.
	// constant_buffer_idx = 0, pre_allocation = Null: Tensor is a non-constant
	// that will receive a dataptr at input time or during execution.
	//
	// Index to the program's constant buffer table, value 0 is reserved to indicate non constant
	constant_buffer_idx:uint;

	// [Optional] preallocation details for non-constants (null otherwise).
	allocation_info:AllocationDetails;

	// May not be needed.
	layout:byte;

	// Determines the type of the tensor's shape, from the point of view of its
	// dynamic or not behavior, and consequently how the allocation of the
	// underlying memory is handled, and also how to interpret the sizes and
	// strides fields.
	// 1. dynamism == STATIC: sizes field represents the static shape of
	// the tensor.
	// 2. dynamism == DYNAMIC_BOUND: sizes field represents the upper bound shape
	// of the tensor. Each dimension of the tensor at runtime should never
	// exceed the corresponding dimension of the upper bound shape.
	//
	// 3. dynamism == DYNAMIC_UNBOUND: the stored sizes field can be ignored since
	// shape is fully dynamic.
	shape_dynamism: TensorShapeDynamism;
	}

	table Int {
	int_val:long;
	}

	table Bool {
	bool_val:bool;
	}

	table Double {
	double_val:double;
	}

	table String {
	string_val:string;
	}

	table IntList {
	items:[long];
	}

	table DoubleList {
	items:[double];
	}

	table BoolList {
	items:[bool];
	}

	// Unlike primitive lists, tensor lists have mutable members and aliasing behavior when
	// elements are added to them. To match this aliasing behavior, the runtime tensor list is
	// serialized by serializing its elements into the ExecutionPlan.values array, and then
	// serializing their corresponding indices into TensorList.items.
	table TensorList {
	items:[int]; // EValue indices.
	}

	// Similar to TensorList except the indices can also point to None.
	table OptionalTensorList {
	items:[int];
	}

	// Supported values in Executorch kernels, Enums are serialized as ints.
	union KernelTypes {
	Null,
	Int,
	Bool,
	Double,
	Tensor,
	String,
	IntList,
	DoubleList,
	BoolList,
	TensorList,
	OptionalTensorList,
	}

	// Abstraction for program values. A subset of types supported in core pytorch kernels.
	table EValue {
	val:KernelTypes;
	}

	table Operator {
	// Operator registry and lookup is uniquely identified by its name, and overload name.
	// TODO(larryliu): is there a more efficient way to represent this
	name:string;
	overload:string;
	}

	table KernelCall {
	// Index to the operators table in the program.
	op_index:int;

	// Indexes to the (values) required by the operation (in and out).
	args:[int];
	}

	table DelegateCall {
	// Index to the delegates table in the program.
	delegate_index:int;

	// Indexes to the (values) required by the delegates (in and out).
	args:[int];
	}

	table MoveCall {
	// Index into the values table of the evalue we are moving from
	move_from: int;

	// Index into the values table of the evalue we are moving into
	move_to: int;
	}

	table JumpFalseCall {
	// Index into the values table of boolean that specifies whether or not to jump
	cond_value_index: int;

	// Value to set the executor program counter if the jump occurs
	destination_instruction: int;
	}

	table FreeCall {
	// Index into values table of the tensor whose underlying data blob is being freed
	value_index: int;
	}

	union InstructionArguments {
	KernelCall,
	DelegateCall,
	MoveCall,
	JumpFalseCall,
	FreeCall,
	}

	// Basic unit of execution
	table Instruction {
	instr_args:InstructionArguments;
	}

	table Frame {
	// For storing the frame to print stacktraces
	filename:string; // Name of the file in which the instruction exists
	lineno:int; // Line number at which the instruction was called
	name:string; // Name of the function the instruction was called from
	context:string; // Source code of the instruction
	}

	table FrameList {
	// For storing the frames to print stacktraces
	items:[Frame];
	}

	// Indicates where a piece of data is stored.
	enum DataLocation : byte {
	// Stored directly in the flatbuffer.
	INLINE = 0,
	// Stored in a segment.
	SEGMENT = 1,
	}

	// Indicates where the delegate data is stored
	table BackendDelegateDataReference {
	// Indicates which list to index into:
	// INLINE -> Program.backend_delegate_data
	// SEGMENT -> Program.segments
	location: DataLocation;

	// The index into the list indicated by the location.
	index: uint;
	}

	table CompileSpec {
	// One compile spec. There are can be multiple specs for one method
	key: string; // like max_value
	value: [ubyte]; // like 4, or other types based on needs.
	}

	table BackendDelegate {
	// Used to resolve the delegate backend classes, for example, "TCE0", "TCE1", etc.
	// This string is also used in to_backend.
	id: string;

	// A binary blob (from a subgraph) as an output of preprocessing. Will be
	// provided to the backend code at init time. Can be very large, on the
	// order of 10-100MB.
	processed: BackendDelegateDataReference;

	// The compilation spec for the lowered module's forward function
	// Example: [CompileSpec["max_value", 4]]
	compile_specs: [CompileSpec];
	}

	// A sequence of blocking instructions to be executed in order. The abstraction is not currently leveraged,
	// all current programs are 1 chain. We are leaving chains as part of the program definition for future
	// use cases around graph level async where different threads will be represented as seperate chains.
	table Chain {
	// Indices of the values that are (non-static) inputs into this Chain.
	inputs:[int];

	// Indices of the values that are outputs out of this Chain.
	outputs:[int];

	// List of instructions to be executed in order.
	instructions:[Instruction];

	// Optional list of frames for each instruction.
	// The backend config must have 'emit_stacktrace' set to true to emit
	stacktrace:[FrameList];
	}

	table ExecutionPlan {

	// Name of a method on the nn.Module that was traced to create this program.
	name: string;

	// Type meta data for input/output to the execution plan
	container_meta_type: ContainerMetadata;

	// A list of all values used in this execution plan.
	values:[EValue];

	// Indices to the 'Evalues' that are inputs to this execution plan.
	// This list contains only the non-constant tensors (i.e. not part of
	// the saved program).
	inputs:[int];

	// Indices to the 'Evalues' that are outputs of this execution plan.
	// This signals a lifespan that goes beyond the execution.
	outputs:[int];

	// List of Chains of kernels.
	chains:[Chain];

	// Operators used in this execution plan
	operators:[Operator];

	// A list of delegates and each is a special instance of execution, the same level of chains.
	delegates: [BackendDelegate];

	// List of buffer sizes for non_constant memory allocations. (Think neural net activations)
	// A list instead of a single buffer to account for complex memory heirarchies.
	// TODO(jakeszwe, razy): How to reconcile this with the ability for the hierarchical memory allocator
	// to be id based instead of index based.
	// Runtime should use the len(constant_buffer) as the ground truth of the
	// constants memory buffer size, and ignore non_const_buffer_sizes[0].
	non_const_buffer_sizes: [int64];

	}

	// Constant tensor data stored directly in the flatbuffer.
	table Buffer {
	// During serialization, this alignment may be rewritten to a larger value.
	// The magic "@executorch-tensor-alignment" comment tells EXIR which lines to
	// patch.
	storage:[ubyte] (force_align: 16); // @executorch-tensor-alignment
	}

	// Delegate data stored directly in the flatbuffer. This is a different type
	// than Buffer because tensors and delegates can have different alignment
	// requirements.
	table BackendDelegateInlineData {
	// During serialization, this alignment may be rewritten to a larger value.
	// The magic "@executorch-delegate-alignment" comment tells EXIR which lines
	// to patch.
	data: [ubyte] (force_align: 16); // @executorch-delegate-alignment
	}

	// Describes a contiguous piece of data that lives outside of the flatbuffer data,
	// typically appended afterwards in the file. The "extended header" in the file,
	// when present, points to the segment base offset.
	table DataSegment {
	// Segment offsets are relative to the segment base offset provided in
	// the extended file header. Segments will typically be aligned in a
	// way to make it possible to use mmap() to load them.
	offset: uint64;

	// The size in bytes of valid data starting at the offset. The segment
	// data may be followed by padding before the segment that follows it,
	// to make it easier to use mmap().
	size: uint64;
	}

	table Program {
	// Schema version.
	version:uint;

	// List of ExecutionPlans that make up the program. Each ExecutionPlan corresponds with a
	// different entry point into the model.
	execution_plan:[ExecutionPlan];

	// Tables of constant data, used for constant Values (e.g.data field of weight tensors).
	// Each constant is assigned an index into the table which are each individually aligned.
	// 0 index is reserved to be pointed to by non-constant Tensors
	constant_buffer:[Buffer];

	// List of delegate data. Pointed to by BackendDelegateDataReference.
	backend_delegate_data:[BackendDelegateInlineData];

	// List of data segments that follow the Program data in this file, sorted by
	// offset. Elements in this schema can refer to these segments by index.
	segments:[DataSegment];
	}

	root_type Program;