blob: 3333352d8eff88edc17cca353764c95760eac5b0 [file] [log] [blame]
// Copyright (c) Meta Platforms, Inc. and affiliates.
//
// See README.md before modifying this file.
//
include "scalar_type.fbs";
namespace executorch_flatbuffer;
// Identifier of a valid executor schema.
file_identifier "ET12";
// Extension of written files.
file_extension "pte";
// Table that contains the metadata about how
// to unflatten the flattened input/output from compiler
table ContainerMetadata {
encoded_inp_str:string;
encoded_out_str:string;
}
table Null {}
// Contains information relevant to the allocation of non-constant
// buffer data (e.g. from tensors).
// This refers to where the buffer needs to be placed in an existing
// memory and at what offset from its base address.
table AllocationDetails {
memory_id:uint; // ID of the memory where this data needs to be placed.
memory_offset:uint; // Offset (in bytes) w.r.t. the memory base address.
}
// Indicates the types of shape a Tensor may have, from the point
// of view of their dynamism.
enum TensorShapeDynamism : byte {
// Static shape. Memory is allocated by the compiler.
STATIC = 0,
// Dynamic shape but with an upper bound.
// Memory is allocated by the compiler.
DYNAMIC_BOUND = 1,
// Dynamic shape without upper bound.
// Memory allocation is handled by the runtime.
DYNAMIC_UNBOUND = 2,
}
table Tensor {
scalar_type:ScalarType;
// Offset in scalar_type elements (e.g., multiples of 4 bytes for an int
// scalar type) from the beginning of the tensor buffer to the beginning of
// the actual data. Currently, the runtime only supports a value of zero.
storage_offset:int;
sizes:[int];
// Specifies in what order the dimensions are laid out in memory (from outer to inner).
// For example, given a rank 3 Tensor of size (3, 5, 2). If we name dimensions: [row, column, batch], then a dim_order of:
// (2, 0, 1) represents a [batch, row, column] ordering where "column" is the innermost dimension, then comes "row", and the outermost dimension is "batch".
// (0, 2, 1) represents a [row, batch, column] ordering where "column" is the innermost dimension, then comes "batch", and the outermost dimension is "row".
dim_order:[ubyte];
// out of scope M1
requires_grad:bool;
// Overall, a Tensor is either constant or non-constant, except we differentiate 2 special
// variants of non-constant Tensor ("input" and control-flow "placeholder") as a special
// optimization to avoid holding unnecessary AllocationDetails.
// In summary:
// constant_buffer_idx > 0, pre_allocation = Null: Tensor is a constant
// constant_buffer_idx = 0, pre_allocation = Non Null: Tensor is a non-constant.
// constant_buffer_idx = 0, pre_allocation = Null: Tensor is a non-constant
// that will receive a dataptr at input time or during execution.
//
// Index to the program's constant buffer table, value 0 is reserved to indicate non constant
constant_buffer_idx:uint;
// [Optional] preallocation details for non-constants (null otherwise).
allocation_info:AllocationDetails;
// May not be needed.
layout:byte;
// Determines the type of the tensor's shape, from the point of view of its
// dynamic or not behavior, and consequently how the allocation of the
// underlying memory is handled, and also how to interpret the sizes and
// strides fields.
// 1. dynamism == STATIC: sizes field represents the static shape of
// the tensor.
// 2. dynamism == DYNAMIC_BOUND: sizes field represents the upper bound shape
// of the tensor. Each dimension of the tensor at runtime should never
// exceed the corresponding dimension of the upper bound shape.
//
// 3. dynamism == DYNAMIC_UNBOUND: the stored sizes field can be ignored since
// shape is fully dynamic.
shape_dynamism: TensorShapeDynamism;
}
table Int {
int_val:long;
}
table Bool {
bool_val:bool;
}
table Double {
double_val:double;
}
table String {
string_val:string;
}
table IntList {
items:[long];
}
table DoubleList {
items:[double];
}
table BoolList {
items:[bool];
}
// Unlike primitive lists, tensor lists have mutable members and aliasing behavior when
// elements are added to them. To match this aliasing behavior, the runtime tensor list is
// serialized by serializing its elements into the ExecutionPlan.values array, and then
// serializing their corresponding indices into TensorList.items.
table TensorList {
items:[int]; // EValue indices.
}
// Similar to TensorList except the indices can also point to None.
table OptionalTensorList {
items:[int];
}
// Supported values in Executorch kernels, Enums are serialized as ints.
union KernelTypes {
Null,
Int,
Bool,
Double,
Tensor,
String,
IntList,
DoubleList,
BoolList,
TensorList,
OptionalTensorList,
}
// Abstraction for program values. A subset of types supported in core pytorch kernels.
table EValue {
val:KernelTypes;
}
table Operator {
// Operator registry and lookup is uniquely identified by its name, and overload name.
// TODO(larryliu): is there a more efficient way to represent this
name:string;
overload:string;
}
table KernelCall {
// Index to the operators table in the program.
op_index:int;
// Indexes to the (values) required by the operation (in and out).
args:[int];
}
table DelegateCall {
// Index to the delegates table in the program.
delegate_index:int;
// Indexes to the (values) required by the delegates (in and out).
args:[int];
}
table MoveCall {
// Index into the values table of the evalue we are moving from
move_from: int;
// Index into the values table of the evalue we are moving into
move_to: int;
}
table JumpFalseCall {
// Index into the values table of boolean that specifies whether or not to jump
cond_value_index: int;
// Value to set the executor program counter if the jump occurs
destination_instruction: int;
}
table FreeCall {
// Index into values table of the tensor whose underlying data blob is being freed
value_index: int;
}
union InstructionArguments {
KernelCall,
DelegateCall,
MoveCall,
JumpFalseCall,
FreeCall,
}
// Basic unit of execution
table Instruction {
instr_args:InstructionArguments;
}
table Frame {
// For storing the frame to print stacktraces
filename:string; // Name of the file in which the instruction exists
lineno:int; // Line number at which the instruction was called
name:string; // Name of the function the instruction was called from
context:string; // Source code of the instruction
}
table FrameList {
// For storing the frames to print stacktraces
items:[Frame];
}
// Indicates where a piece of data is stored.
enum DataLocation : byte {
// Stored directly in the flatbuffer.
INLINE = 0,
// Stored in a segment.
SEGMENT = 1,
}
// Indicates where the delegate data is stored
table BackendDelegateDataReference {
// Indicates which list to index into:
// INLINE -> Program.backend_delegate_data
// SEGMENT -> Program.segments
location: DataLocation;
// The index into the list indicated by the location.
index: uint;
}
table CompileSpec {
// One compile spec. There are can be multiple specs for one method
key: string; // like max_value
value: [ubyte]; // like 4, or other types based on needs.
}
table BackendDelegate {
// Used to resolve the delegate backend classes, for example, "TCE0", "TCE1", etc.
// This string is also used in to_backend.
id: string;
// A binary blob (from a subgraph) as an output of preprocessing. Will be
// provided to the backend code at init time. Can be very large, on the
// order of 10-100MB.
processed: BackendDelegateDataReference;
// The compilation spec for the lowered module's forward function
// Example: [CompileSpec["max_value", 4]]
compile_specs: [CompileSpec];
}
// A sequence of blocking instructions to be executed in order. The abstraction is not currently leveraged,
// all current programs are 1 chain. We are leaving chains as part of the program definition for future
// use cases around graph level async where different threads will be represented as seperate chains.
table Chain {
// Indices of the values that are (non-static) inputs into this Chain.
inputs:[int];
// Indices of the values that are outputs out of this Chain.
outputs:[int];
// List of instructions to be executed in order.
instructions:[Instruction];
// Optional list of frames for each instruction.
// The backend config must have 'emit_stacktrace' set to true to emit
stacktrace:[FrameList];
}
table ExecutionPlan {
// Name of a method on the nn.Module that was traced to create this program.
name: string;
// Type meta data for input/output to the execution plan
container_meta_type: ContainerMetadata;
// A list of all values used in this execution plan.
values:[EValue];
// Indices to the 'Evalues' that are inputs to this execution plan.
// This list contains only the non-constant tensors (i.e. not part of
// the saved program).
inputs:[int];
// Indices to the 'Evalues' that are outputs of this execution plan.
// This signals a lifespan that goes beyond the execution.
outputs:[int];
// List of Chains of kernels.
chains:[Chain];
// Operators used in this execution plan
operators:[Operator];
// A list of delegates and each is a special instance of execution, the same level of chains.
delegates: [BackendDelegate];
// List of buffer sizes for non_constant memory allocations. (Think neural net activations)
// A list instead of a single buffer to account for complex memory heirarchies.
// TODO(jakeszwe, razy): How to reconcile this with the ability for the hierarchical memory allocator
// to be id based instead of index based.
// Runtime should use the len(constant_buffer) as the ground truth of the
// constants memory buffer size, and ignore non_const_buffer_sizes[0].
non_const_buffer_sizes: [int64];
}
// Constant tensor data stored directly in the flatbuffer.
table Buffer {
// During serialization, this alignment may be rewritten to a larger value.
// The magic "@executorch-tensor-alignment" comment tells EXIR which lines to
// patch.
storage:[ubyte] (force_align: 16); // @executorch-tensor-alignment
}
// Delegate data stored directly in the flatbuffer. This is a different type
// than Buffer because tensors and delegates can have different alignment
// requirements.
table BackendDelegateInlineData {
// During serialization, this alignment may be rewritten to a larger value.
// The magic "@executorch-delegate-alignment" comment tells EXIR which lines
// to patch.
data: [ubyte] (force_align: 16); // @executorch-delegate-alignment
}
// Describes a contiguous piece of data that lives outside of the flatbuffer data,
// typically appended afterwards in the file. The "extended header" in the file,
// when present, points to the segment base offset.
table DataSegment {
// Segment offsets are relative to the segment base offset provided in
// the extended file header. Segments will typically be aligned in a
// way to make it possible to use mmap() to load them.
offset: uint64;
// The size in bytes of valid data starting at the offset. The segment
// data may be followed by padding before the segment that follows it,
// to make it easier to use mmap().
size: uint64;
}
table Program {
// Schema version.
version:uint;
// List of ExecutionPlans that make up the program. Each ExecutionPlan corresponds with a
// different entry point into the model.
execution_plan:[ExecutionPlan];
// Tables of constant data, used for constant Values (e.g.data field of weight tensors).
// Each constant is assigned an index into the table which are each individually aligned.
// 0 index is reserved to be pointed to by non-constant Tensors
constant_buffer:[Buffer];
// List of delegate data. Pointed to by BackendDelegateDataReference.
backend_delegate_data:[BackendDelegateInlineData];
// List of data segments that follow the Program data in this file, sorted by
// offset. Elements in this schema can refer to these segments by index.
segments:[DataSegment];
}
root_type Program;