| // Copyright (c) Meta Platforms, Inc. and affiliates. |
| |
| // |
| // See README.md before modifying this file. |
| // |
| |
| include "scalar_type.fbs"; |
| |
| namespace executorch_flatbuffer; |
| |
| // Identifier of a valid executor schema. |
| file_identifier "ET12"; |
| // Extension of written files. |
| file_extension "pte"; |
| |
| // Table that contains the metadata about how |
| // to unflatten the flattened input/output from compiler |
| table ContainerMetadata { |
| encoded_inp_str:string; |
| encoded_out_str:string; |
| } |
| |
| table Null {} |
| |
| // Contains information relevant to the allocation of non-constant |
| // buffer data (e.g. from tensors). |
| // This refers to where the buffer needs to be placed in an existing |
| // memory and at what offset from its base address. |
| table AllocationDetails { |
| memory_id:uint; // ID of the memory where this data needs to be placed. |
| |
| // Offset in bytes relative to the start of the memory area indicated by |
| // memory_id. |
| // |
| // Originally this field was a single 32-bit uint, but we need 64 bits for |
| // larger models. To preserve backwards compatibility, the high bits are |
| // managed in a separate 32-bit field. Users should combine the two fields |
| // to get the full 64-bit offset. |
| memory_offset_low:uint; // Least significant 32 bits |
| memory_offset_high:uint; // Most significant 32 bits. Defaults to zero. |
| } |
| |
| // Indicates the types of shape a Tensor may have, from the point |
| // of view of their dynamism. |
| enum TensorShapeDynamism : byte { |
| // Static shape. Memory is allocated by the compiler. |
| STATIC = 0, |
| // Dynamic shape but with an upper bound. |
| // Memory is allocated by the compiler. |
| DYNAMIC_BOUND = 1, |
| // Dynamic shape without upper bound. |
| // Memory allocation is handled by the runtime. |
| DYNAMIC_UNBOUND = 2, |
| } |
| |
| |
| // Table to put additional information about tensors in that is not applicable |
| // to the vast majority of tensors in the vast majority of programs. |
| table ExtraTensorInfo { |
| // [Optional] Specifies the SubsegmentOffsets in |
| // program.mutable_data_segments that specifies where the data is located in. |
| // If not present and the data is located in a segment, then the data is in |
| // the first index. |
| mutable_data_segments_idx:uint64; |
| |
| // [Optional] The unique name of the tensor. e.g. 'mod.linear.weight' |
| fully_qualified_name:string; |
| } |
| |
| table Tensor { |
| scalar_type:ScalarType; |
| |
| // Offset in scalar_type elements (e.g., multiples of 4 bytes for an int |
| // scalar type) from the beginning of the tensor buffer to the beginning of |
| // the actual data. Currently, the runtime only supports a value of zero. |
| storage_offset:int; |
| |
| sizes:[int]; |
| |
| // Specifies in what order the dimensions are laid out in memory (from outer |
| // to inner). |
| // |
| // For example, given a rank 3 Tensor of size (3, 5, 2). If we name |
| // dimensions: [row, column, batch], then a dim_order of: |
| // - (2, 0, 1) represents a [batch, row, column] ordering where "column" is |
| // the innermost dimension, then comes "row", and the outermost dimension is |
| // "batch". |
| // - (0, 2, 1) represents a [row, batch, column] ordering where "column" is |
| // the innermost dimension, then comes "batch", and the outermost dimension |
| // is "row". |
| dim_order:[ubyte]; |
| |
| // out of scope M1 |
| requires_grad:bool; |
| |
| // Overall, a Tensor is either constant or mutable. At method load time |
| // constant tensors receive a dataptr into the serialized program. Mutable |
| // tensors can either receive a pointer from the heirarchical allocator or a |
| // nullptr if they will receive a data pointer at execution time (inputs |
| // and control flow placeholders can be like this). Mutable tensors may or |
| // may not also have an initial value in the serialized program. |
| // |
| // In summary: |
| // data_buffer_idx > 0, allocation_info = Null: Tensor is a constant. |
| // data_buffer_idx = 0, allocation_info = Non Null: Tensor is mutable and |
| // will receive a dataptr at method load time. |
| // data_buffer_idx = 0, allocation_info = Null: Tensor is mutable and |
| // will receive a dataptr at input time or during execution. |
| // data_buffer_idx > 0, allocation_info = Non Null: Tensor is mutable and |
| // will receive a dataptr at method load time, and has an initial state. |
| // |
| // Tensor data is stored inline if program.constant_buffer is null. Otherwise |
| // it is in a segment. If this tensor's allocation_info is null then the |
| // tensor data location is specified by program.constant_segment. If the |
| // allocation_info is non_null then the data is somewhere in |
| // program.mutable_data_segments. If tensor_info is Null, then the data is |
| // in program.mutable_data_segments[0] otherwise if tensor_info is non-null |
| // then the mutable_data_segment index is specified by |
| // tensor_info.mutable_data_segments_index. |
| data_buffer_idx:uint; |
| |
| // [Optional] preallocation details for non-constants (null otherwise). |
| allocation_info:AllocationDetails; |
| |
| // May not be needed. |
| layout:byte; |
| |
| // Determines the type of the tensor's shape, from the point of view of its |
| // dynamic or not behavior, and consequently how the allocation of the |
| // underlying memory is handled, and also how to interpret the sizes and |
| // strides fields. |
| // 1. dynamism == STATIC: sizes field represents the static shape of |
| // the tensor. |
| // 2. dynamism == DYNAMIC_BOUND: sizes field represents the upper bound shape |
| // of the tensor. Each dimension of the tensor at runtime should never |
| // exceed the corresponding dimension of the upper bound shape. |
| // |
| // 3. dynamism == DYNAMIC_UNBOUND: the stored sizes field can be ignored since |
| // shape is fully dynamic. |
| shape_dynamism:TensorShapeDynamism; |
| |
| // [Optional] Additional information about the Tensor that is not applicable |
| // to most tensors. |
| extra_tensor_info:ExtraTensorInfo; |
| } |
| |
| table Int { |
| int_val:long; |
| } |
| |
| table Bool { |
| bool_val:bool; |
| } |
| |
| table Double { |
| double_val:double; |
| } |
| |
| table String { |
| string_val:string; |
| } |
| |
| table IntList { |
| items:[long]; |
| } |
| |
| table DoubleList { |
| items:[double]; |
| } |
| |
| table BoolList { |
| items:[bool]; |
| } |
| |
| // Unlike primitive lists, tensor lists have mutable members and aliasing behavior when |
| // elements are added to them. To match this aliasing behavior, the runtime tensor list is |
| // serialized by serializing its elements into the ExecutionPlan.values array, and then |
| // serializing their corresponding indices into TensorList.items. |
| table TensorList { |
| items:[int]; // EValue indices. |
| } |
| |
| // Similar to TensorList except the indices can also point to None. |
| table OptionalTensorList { |
| items:[int]; |
| } |
| |
| // Supported values in Executorch kernels, Enums are serialized as ints. |
| union KernelTypes { |
| Null, |
| Int, |
| Bool, |
| Double, |
| Tensor, |
| String, |
| IntList, |
| DoubleList, |
| BoolList, |
| TensorList, |
| OptionalTensorList, |
| } |
| |
| // Abstraction for program values. A subset of types supported in core pytorch kernels. |
| table EValue { |
| val:KernelTypes; |
| } |
| |
| table Operator { |
| // Operator registry and lookup is uniquely identified by its name, and overload name. |
| // TODO(larryliu): is there a more efficient way to represent this |
| name:string; |
| overload:string; |
| } |
| |
| table KernelCall { |
| // Index to the operators table in the program. |
| op_index:int; |
| |
| // Indexes to the (values) required by the operation (in and out). |
| args:[int]; |
| } |
| |
| table DelegateCall { |
| // Index to the delegates table in the program. |
| delegate_index:int; |
| |
| // Indexes to the (values) required by the delegates (in and out). |
| args:[int]; |
| } |
| |
| table MoveCall { |
| // Index into the values table of the evalue we are moving from |
| move_from: int; |
| |
| // Index into the values table of the evalue we are moving into |
| move_to: int; |
| } |
| |
| table JumpFalseCall { |
| // Index into the values table of boolean that specifies whether or not to jump |
| cond_value_index: int; |
| |
| // Value to set the executor program counter if the jump occurs |
| destination_instruction: int; |
| } |
| |
| table FreeCall { |
| // Index into values table of the tensor whose underlying data blob is being freed |
| value_index: int; |
| } |
| |
| union InstructionArguments { |
| KernelCall, |
| DelegateCall, |
| MoveCall, |
| JumpFalseCall, |
| FreeCall, |
| } |
| |
| // Basic unit of execution |
| table Instruction { |
| instr_args:InstructionArguments; |
| } |
| |
| table Frame { |
| // For storing the frame to print stacktraces |
| filename:string; // Name of the file in which the instruction exists |
| lineno:int; // Line number at which the instruction was called |
| name:string; // Name of the function the instruction was called from |
| context:string; // Source code of the instruction |
| } |
| |
| table FrameList { |
| // For storing the frames to print stacktraces |
| items:[Frame]; |
| } |
| |
| // Indicates where a piece of data is stored. |
| enum DataLocation : byte { |
| // Stored directly in the flatbuffer. |
| INLINE = 0, |
| // Stored in a segment. |
| SEGMENT = 1, |
| } |
| |
| // Indicates where the delegate data is stored |
| table BackendDelegateDataReference { |
| // Indicates which list to index into: |
| // INLINE -> Program.backend_delegate_data |
| // SEGMENT -> Program.segments |
| location: DataLocation; |
| |
| // The index into the list indicated by the location. |
| index: uint; |
| } |
| |
| table CompileSpec { |
| // One compile spec. There are can be multiple specs for one method |
| key: string; // like max_value |
| value: [ubyte]; // like 4, or other types based on needs. |
| } |
| |
| table BackendDelegate { |
| // Used to resolve the delegate backend classes, for example, "TCE0", "TCE1", etc. |
| // This string is also used in to_backend. |
| id: string; |
| |
| // A binary blob (from a subgraph) as an output of preprocessing. Will be |
| // provided to the backend code at init time. Can be very large, on the |
| // order of 10-100MB. |
| processed: BackendDelegateDataReference; |
| |
| // The compilation spec for the lowered module's forward function |
| // Example: [CompileSpec["max_value", 4]] |
| compile_specs: [CompileSpec]; |
| } |
| |
| // A sequence of blocking instructions to be executed in order. The |
| // abstraction is not currently leveraged, all current programs are 1 chain. |
| // We are leaving chains as part of the program definition for future use cases |
| // around graph level async where different threads will be represented as |
| // seperate chains. |
| table Chain { |
| // Indices of the values that are (non-static) inputs into this Chain. |
| inputs:[int]; |
| |
| // Indices of the values that are outputs out of this Chain. |
| outputs:[int]; |
| |
| // List of instructions to be executed in order. |
| instructions:[Instruction]; |
| |
| // Optional list of frames for each instruction. |
| // The backend config must have 'emit_stacktrace' set to true to emit |
| stacktrace:[FrameList]; |
| } |
| |
| table ExecutionPlan { |
| |
| // Name of a method on the nn.Module that was traced to create this program. |
| name: string; |
| |
| // Type meta data for input/output to the execution plan |
| container_meta_type: ContainerMetadata; |
| |
| // A list of all values used in this execution plan. |
| values:[EValue]; |
| |
| // Indices to the 'Evalues' that are inputs to this execution plan. |
| // This list contains only the non-constant tensors (i.e. not part of |
| // the saved program). |
| inputs:[int]; |
| |
| // Indices to the 'Evalues' that are outputs of this execution plan. |
| // This signals a lifespan that goes beyond the execution. |
| outputs:[int]; |
| |
| // List of Chains of kernels. |
| chains:[Chain]; |
| |
| // Operators used in this execution plan |
| operators:[Operator]; |
| |
| // A list of delegates and each is a special instance of execution, the same level of chains. |
| delegates: [BackendDelegate]; |
| |
| // List of buffer sizes for non_constant memory allocations. (Think neural net activations) |
| // A list instead of a single buffer to account for complex memory hierarchies. |
| // TODO(jakeszwe, razy): How to reconcile this with the ability for the hierarchical memory allocator |
| // to be id based instead of index based. |
| // Runtime should use the len(constant_buffer) as the ground truth of the |
| // constants memory buffer size, and ignore non_const_buffer_sizes[0]. |
| non_const_buffer_sizes: [int64]; |
| |
| } |
| |
| // Constant tensor data stored directly in the flatbuffer. |
| table Buffer { |
| // During serialization, this alignment may be rewritten to a larger value. |
| // The magic "@executorch-tensor-alignment" comment tells EXIR which lines to |
| // patch. |
| storage:[ubyte] (force_align: 16); // @executorch-tensor-alignment |
| } |
| |
| // Delegate data stored directly in the flatbuffer. This is a different type |
| // than Buffer because tensors and delegates can have different alignment |
| // requirements. |
| table BackendDelegateInlineData { |
| // During serialization, this alignment may be rewritten to a larger value. |
| // The magic "@executorch-delegate-alignment" comment tells EXIR which lines |
| // to patch. |
| data: [ubyte] (force_align: 16); // @executorch-delegate-alignment |
| } |
| |
| // Describes a contiguous piece of data that lives outside of the flatbuffer data, |
| // typically appended afterwards in the file. The "extended header" in the file, |
| // when present, points to the segment base offset. |
| table DataSegment { |
| // Segment offsets are relative to the segment base offset provided in |
| // the extended file header. Segments will typically be aligned in a |
| // way to make it possible to use mmap() to load them. |
| offset: uint64; |
| |
| // The size in bytes of valid data starting at the offset. The segment |
| // data may be followed by padding before the segment that follows it, |
| // to make it easier to use mmap(). |
| size: uint64; |
| } |
| |
| // Describes data offsets into a particular segment |
| table SubsegmentOffsets { |
| // Index of the segment in Program.segments |
| segment_index: uint; |
| |
| // Each element is an offset in bytes into the data of the segment pointed to |
| // by segment_index. Offsets must be aligned to @executorch-tensor-alignment. |
| offsets: [uint64]; |
| } |
| |
| table Program { |
| // Schema version. |
| version:uint; |
| |
| // List of ExecutionPlans that make up the program. Each ExecutionPlan corresponds with a |
| // different entry point into the model. |
| execution_plan:[ExecutionPlan]; |
| |
| // Tables of constant data, used for constant Values (e.g.data field of weight tensors). |
| // Each constant is assigned an index into the table which are each individually aligned. |
| // 0 index is reserved to be pointed to by non-constant Tensors. |
| // If this field is non-empty, constant_segment.offsets must be empty. |
| // DEPRECATED: After D61996249 on 2024-09-05, no new PTE files will use this field. |
| constant_buffer:[Buffer]; |
| |
| // List of delegate data. Pointed to by BackendDelegateDataReference. |
| backend_delegate_data:[BackendDelegateInlineData]; |
| |
| // List of data segments that follow the Program data in this file, sorted by |
| // offset. Elements in this schema can refer to these segments by index. |
| segments:[DataSegment]; |
| |
| // Describes the offsets of each constant tensor, relative to the segment |
| // offset. If constant_segment.offsets field is non-empty, constant_buffer |
| // must be empty. constant_segment.offsets[0] is reserved to be pointed to by |
| // non-constant Tensors. |
| constant_segment:SubsegmentOffsets; |
| |
| // [Optional] Describes the offsets into various segments for each mutable |
| // tensor. Only mutable tensors with a meaningful initial state are |
| // serialized here (for example weights that will be trained on-device as |
| // opposed to just layer activations). Seperate from the constant_segment to |
| // reduce peak memory usage by letting us read directly from the PTE file |
| // into the mutable tensor, as opposed to loading the .pte data into |
| // constant memory, copying it over, and then being unable to release the |
| // constant segment. No two elements should point to the same segment. |
| mutable_data_segments:[SubsegmentOffsets]; |
| } |
| |
| root_type Program; |