blob: e3c7597fcdc470dffc63697f54a3023bdb9b79e4 [file] [log] [blame]
// Copyright (c) Meta Platforms, Inc. and affiliates.
//
// See README.md before modifying this file.
//
include "scalar_type.fbs";
namespace executorch_flatbuffer;
// Identifier of a valid executor schema.
file_identifier "ET12";
// Extension of written files.
file_extension "pte";
// Table that contains the metadata about how
// to unflatten the flattened input/output from compiler
table ContainerMetadata {
encoded_inp_str:string;
encoded_out_str:string;
}
table Null {}
// Contains information relevant to the allocation of non-constant
// buffer data (e.g. from tensors).
// This refers to where the buffer needs to be placed in an existing
// memory and at what offset from its base address.
table AllocationDetails {
memory_id:uint; // ID of the memory where this data needs to be placed.
// Offset in bytes relative to the start of the memory area indicated by
// memory_id.
//
// Originally this field was a single 32-bit uint, but we need 64 bits for
// larger models. To preserve backwards compatibility, the high bits are
// managed in a separate 32-bit field. Users should combine the two fields
// to get the full 64-bit offset.
memory_offset_low:uint; // Least significant 32 bits
memory_offset_high:uint; // Most significant 32 bits. Defaults to zero.
}
// Indicates the types of shape a Tensor may have, from the point
// of view of their dynamism.
enum TensorShapeDynamism : byte {
// Static shape. Memory is allocated by the compiler.
STATIC = 0,
// Dynamic shape but with an upper bound.
// Memory is allocated by the compiler.
DYNAMIC_BOUND = 1,
// Dynamic shape without upper bound.
// Memory allocation is handled by the runtime.
DYNAMIC_UNBOUND = 2,
}
// Table to put additional information about tensors in that is not applicable
// to the vast majority of tensors in the vast majority of programs.
table ExtraTensorInfo {
// [Optional] Specifies the SubsegmentOffsets in
// program.mutable_data_segments that specifies where the data is located in.
// If not present and the data is located in a segment, then the data is in
// the first index.
mutable_data_segments_idx:uint64;
// [Optional] The unique name of the tensor. e.g. 'mod.linear.weight'
fully_qualified_name:string;
}
table Tensor {
scalar_type:ScalarType;
// Offset in scalar_type elements (e.g., multiples of 4 bytes for an int
// scalar type) from the beginning of the tensor buffer to the beginning of
// the actual data. Currently, the runtime only supports a value of zero.
storage_offset:int;
sizes:[int];
// Specifies in what order the dimensions are laid out in memory (from outer
// to inner).
//
// For example, given a rank 3 Tensor of size (3, 5, 2). If we name
// dimensions: [row, column, batch], then a dim_order of:
// - (2, 0, 1) represents a [batch, row, column] ordering where "column" is
// the innermost dimension, then comes "row", and the outermost dimension is
// "batch".
// - (0, 2, 1) represents a [row, batch, column] ordering where "column" is
// the innermost dimension, then comes "batch", and the outermost dimension
// is "row".
dim_order:[ubyte];
// out of scope M1
requires_grad:bool;
// Overall, a Tensor is either constant or mutable. At method load time
// constant tensors receive a dataptr into the serialized program. Mutable
// tensors can either receive a pointer from the heirarchical allocator or a
// nullptr if they will receive a data pointer at execution time (inputs
// and control flow placeholders can be like this). Mutable tensors may or
// may not also have an initial value in the serialized program.
//
// In summary:
// data_buffer_idx > 0, allocation_info = Null: Tensor is a constant.
// data_buffer_idx = 0, allocation_info = Non Null: Tensor is mutable and
// will receive a dataptr at method load time.
// data_buffer_idx = 0, allocation_info = Null: Tensor is mutable and
// will receive a dataptr at input time or during execution.
// data_buffer_idx > 0, allocation_info = Non Null: Tensor is mutable and
// will receive a dataptr at method load time, and has an initial state.
//
// Tensor data is stored inline if program.constant_buffer is null. Otherwise
// it is in a segment. If this tensor's allocation_info is null then the
// tensor data location is specified by program.constant_segment. If the
// allocation_info is non_null then the data is somewhere in
// program.mutable_data_segments. If tensor_info is Null, then the data is
// in program.mutable_data_segments[0] otherwise if tensor_info is non-null
// then the mutable_data_segment index is specified by
// tensor_info.mutable_data_segments_index.
data_buffer_idx:uint;
// [Optional] preallocation details for non-constants (null otherwise).
allocation_info:AllocationDetails;
// May not be needed.
layout:byte;
// Determines the type of the tensor's shape, from the point of view of its
// dynamic or not behavior, and consequently how the allocation of the
// underlying memory is handled, and also how to interpret the sizes and
// strides fields.
// 1. dynamism == STATIC: sizes field represents the static shape of
// the tensor.
// 2. dynamism == DYNAMIC_BOUND: sizes field represents the upper bound shape
// of the tensor. Each dimension of the tensor at runtime should never
// exceed the corresponding dimension of the upper bound shape.
//
// 3. dynamism == DYNAMIC_UNBOUND: the stored sizes field can be ignored since
// shape is fully dynamic.
shape_dynamism:TensorShapeDynamism;
// [Optional] Additional information about the Tensor that is not applicable
// to most tensors.
extra_tensor_info:ExtraTensorInfo;
}
table Int {
int_val:long;
}
table Bool {
bool_val:bool;
}
table Double {
double_val:double;
}
table String {
string_val:string;
}
table IntList {
items:[long];
}
table DoubleList {
items:[double];
}
table BoolList {
items:[bool];
}
// Unlike primitive lists, tensor lists have mutable members and aliasing behavior when
// elements are added to them. To match this aliasing behavior, the runtime tensor list is
// serialized by serializing its elements into the ExecutionPlan.values array, and then
// serializing their corresponding indices into TensorList.items.
table TensorList {
items:[int]; // EValue indices.
}
// Similar to TensorList except the indices can also point to None.
table OptionalTensorList {
items:[int];
}
// Supported values in Executorch kernels, Enums are serialized as ints.
union KernelTypes {
Null,
Int,
Bool,
Double,
Tensor,
String,
IntList,
DoubleList,
BoolList,
TensorList,
OptionalTensorList,
}
// Abstraction for program values. A subset of types supported in core pytorch kernels.
table EValue {
val:KernelTypes;
}
table Operator {
// Operator registry and lookup is uniquely identified by its name, and overload name.
// TODO(larryliu): is there a more efficient way to represent this
name:string;
overload:string;
}
table KernelCall {
// Index to the operators table in the program.
op_index:int;
// Indexes to the (values) required by the operation (in and out).
args:[int];
}
table DelegateCall {
// Index to the delegates table in the program.
delegate_index:int;
// Indexes to the (values) required by the delegates (in and out).
args:[int];
}
table MoveCall {
// Index into the values table of the evalue we are moving from
move_from: int;
// Index into the values table of the evalue we are moving into
move_to: int;
}
table JumpFalseCall {
// Index into the values table of boolean that specifies whether or not to jump
cond_value_index: int;
// Value to set the executor program counter if the jump occurs
destination_instruction: int;
}
table FreeCall {
// Index into values table of the tensor whose underlying data blob is being freed
value_index: int;
}
union InstructionArguments {
KernelCall,
DelegateCall,
MoveCall,
JumpFalseCall,
FreeCall,
}
// Basic unit of execution
table Instruction {
instr_args:InstructionArguments;
}
table Frame {
// For storing the frame to print stacktraces
filename:string; // Name of the file in which the instruction exists
lineno:int; // Line number at which the instruction was called
name:string; // Name of the function the instruction was called from
context:string; // Source code of the instruction
}
table FrameList {
// For storing the frames to print stacktraces
items:[Frame];
}
// Indicates where a piece of data is stored.
enum DataLocation : byte {
// Stored directly in the flatbuffer.
INLINE = 0,
// Stored in a segment.
SEGMENT = 1,
}
// Indicates where the delegate data is stored
table BackendDelegateDataReference {
// Indicates which list to index into:
// INLINE -> Program.backend_delegate_data
// SEGMENT -> Program.segments
location: DataLocation;
// The index into the list indicated by the location.
index: uint;
}
table CompileSpec {
// One compile spec. There are can be multiple specs for one method
key: string; // like max_value
value: [ubyte]; // like 4, or other types based on needs.
}
table BackendDelegate {
// Used to resolve the delegate backend classes, for example, "TCE0", "TCE1", etc.
// This string is also used in to_backend.
id: string;
// A binary blob (from a subgraph) as an output of preprocessing. Will be
// provided to the backend code at init time. Can be very large, on the
// order of 10-100MB.
processed: BackendDelegateDataReference;
// The compilation spec for the lowered module's forward function
// Example: [CompileSpec["max_value", 4]]
compile_specs: [CompileSpec];
}
// A sequence of blocking instructions to be executed in order. The
// abstraction is not currently leveraged, all current programs are 1 chain.
// We are leaving chains as part of the program definition for future use cases
// around graph level async where different threads will be represented as
// seperate chains.
table Chain {
// Indices of the values that are (non-static) inputs into this Chain.
inputs:[int];
// Indices of the values that are outputs out of this Chain.
outputs:[int];
// List of instructions to be executed in order.
instructions:[Instruction];
// Optional list of frames for each instruction.
// The backend config must have 'emit_stacktrace' set to true to emit
stacktrace:[FrameList];
}
table ExecutionPlan {
// Name of a method on the nn.Module that was traced to create this program.
name: string;
// Type meta data for input/output to the execution plan
container_meta_type: ContainerMetadata;
// A list of all values used in this execution plan.
values:[EValue];
// Indices to the 'Evalues' that are inputs to this execution plan.
// This list contains only the non-constant tensors (i.e. not part of
// the saved program).
inputs:[int];
// Indices to the 'Evalues' that are outputs of this execution plan.
// This signals a lifespan that goes beyond the execution.
outputs:[int];
// List of Chains of kernels.
chains:[Chain];
// Operators used in this execution plan
operators:[Operator];
// A list of delegates and each is a special instance of execution, the same level of chains.
delegates: [BackendDelegate];
// List of buffer sizes for non_constant memory allocations. (Think neural net activations)
// A list instead of a single buffer to account for complex memory hierarchies.
// TODO(jakeszwe, razy): How to reconcile this with the ability for the hierarchical memory allocator
// to be id based instead of index based.
// Runtime should use the len(constant_buffer) as the ground truth of the
// constants memory buffer size, and ignore non_const_buffer_sizes[0].
non_const_buffer_sizes: [int64];
}
// Constant tensor data stored directly in the flatbuffer.
table Buffer {
// During serialization, this alignment may be rewritten to a larger value.
// The magic "@executorch-tensor-alignment" comment tells EXIR which lines to
// patch.
storage:[ubyte] (force_align: 16); // @executorch-tensor-alignment
}
// Delegate data stored directly in the flatbuffer. This is a different type
// than Buffer because tensors and delegates can have different alignment
// requirements.
table BackendDelegateInlineData {
// During serialization, this alignment may be rewritten to a larger value.
// The magic "@executorch-delegate-alignment" comment tells EXIR which lines
// to patch.
data: [ubyte] (force_align: 16); // @executorch-delegate-alignment
}
// Describes a contiguous piece of data that lives outside of the flatbuffer data,
// typically appended afterwards in the file. The "extended header" in the file,
// when present, points to the segment base offset.
table DataSegment {
// Segment offsets are relative to the segment base offset provided in
// the extended file header. Segments will typically be aligned in a
// way to make it possible to use mmap() to load them.
offset: uint64;
// The size in bytes of valid data starting at the offset. The segment
// data may be followed by padding before the segment that follows it,
// to make it easier to use mmap().
size: uint64;
}
// Describes data offsets into a particular segment
table SubsegmentOffsets {
// Index of the segment in Program.segments
segment_index: uint;
// Each element is an offset in bytes into the data of the segment pointed to
// by segment_index. Offsets must be aligned to @executorch-tensor-alignment.
offsets: [uint64];
}
table Program {
// Schema version.
version:uint;
// List of ExecutionPlans that make up the program. Each ExecutionPlan corresponds with a
// different entry point into the model.
execution_plan:[ExecutionPlan];
// Tables of constant data, used for constant Values (e.g.data field of weight tensors).
// Each constant is assigned an index into the table which are each individually aligned.
// 0 index is reserved to be pointed to by non-constant Tensors.
// If this field is non-empty, constant_segment.offsets must be empty.
// DEPRECATED: After D61996249 on 2024-09-05, no new PTE files will use this field.
constant_buffer:[Buffer];
// List of delegate data. Pointed to by BackendDelegateDataReference.
backend_delegate_data:[BackendDelegateInlineData];
// List of data segments that follow the Program data in this file, sorted by
// offset. Elements in this schema can refer to these segments by index.
segments:[DataSegment];
// Describes the offsets of each constant tensor, relative to the segment
// offset. If constant_segment.offsets field is non-empty, constant_buffer
// must be empty. constant_segment.offsets[0] is reserved to be pointed to by
// non-constant Tensors.
constant_segment:SubsegmentOffsets;
// [Optional] Describes the offsets into various segments for each mutable
// tensor. Only mutable tensors with a meaningful initial state are
// serialized here (for example weights that will be trained on-device as
// opposed to just layer activations). Seperate from the constant_segment to
// reduce peak memory usage by letting us read directly from the PTE file
// into the mutable tensor, as opposed to loading the .pte data into
// constant memory, copying it over, and then being unable to release the
// constant segment. No two elements should point to the same segment.
mutable_data_segments:[SubsegmentOffsets];
}
root_type Program;