blob: efe717e085edaa00aa9108d4779c9a9bde92d78d [file]
// Copyright (c) Meta Platforms, Inc. and affiliates.
namespace fb_xnnpack;
// Update after any BC breaking changes
file_identifier "XN00";
// datatype for xnn-values
enum XNNDatatype : short {
/// Invalid data type. Valid Values never have this datatype.
xnn_datatype_invalid = 0,
/// IEEE754 single-precision floating-point.
xnn_datatype_fp32 = 1,
/// IEEE754 half-precision floating-point.
xnn_datatype_fp16 = 2,
/// Quantized 8-bit signed integer with shared per-Value quantization parameters.
xnn_datatype_qint8 = 3,
/// Quantized 8-bit unsigned integer with shared per-Value quantization parameters.
xnn_datatype_quint8 = 4,
/// Quantized 32-bit signed integer with shared per-Value quantization parameters.
xnn_datatype_qint32 = 5,
/// Quantized 8-bit signed integer with shared per-channel quantization parameters.
xnn_datatype_qcint8 = 6,
/// Quantized 32-bit signed integer with shared per-channel quantization parameters.
xnn_datatype_qcint32 = 7,
/// Quantized 4-bit signed integer with shared per-channel quantization parameters.
xnn_datatype_qcint4 = 8,
/// Dynamically quantized 8-bit signed integer with per-batch quantization parameters.
xnn_datatype_qdint8 = 9,
/// Quantized 4-bit signed integer with shared blockwise quantization parameters.
xnn_datatype_qbint4 = 10,
}
// type of quantization
union XNNQuantParams {
PerChannelQuant,
PerTensorQuant,
PerTokenDynamicQuant,
PerChannelGroupQuant,
}
// taken from executorch
// Data buffer abstraction.
table Buffer {
storage:[ubyte] (force_align: 16);
}
table PerChannelQuant {
scale:[float];
channel_dim:int;
}
table PerTokenDynamicQuant {
num_nonbatch_dims:int;
}
table PerTensorQuant {
scale:float;
zero_point:int;
}
table PerChannelGroupQuant {
scale:[float];
channel_dim:int;
group_size:int;
scale_bf16:[ushort];
}
table XNNTensorValue {
// type of the tensor elements.
datatype:XNNDatatype;
// number of dimensions in the shape.
num_dims:uint;
// pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
// XNNPACK does not keep any pointers to this array after the function returns.
dims:[uint];
// Index to the program's constant buffer table, value 0 is reserved to indicate non constant
constant_buffer_idx:uint;
// external ID for the Value. The ID must be within the range of reserved Value IDs specified on
// the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
// created for the Value.
external_id:uint;
// binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
// and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
flags:uint;
// pointer to the variable that will be initialized with the Value ID upon successful return. If a
// valid @a external_id was provided, the variable will be initialized with the @a external_id value.
id_out:uint;
// does this value need to be quantized dynamically at runtime?
// if we are quantizing at runtime, this field points to a target dtype
dq_datatype:XNNDatatype = xnn_datatype_invalid;
}
table XNNQuantizedTensorValue {
// Base Tensor Value
tensor_value:XNNTensorValue;
// Quantization parameters
quant_params:XNNQuantParams;
}
union XNodeUnion {
XNNAdd: _XNNNode2x1,
XNNFullyConnected,
XNNSoftmax: _XNNNode1x1,
XNNSigmoid: _XNNNode1x1,
XNNStaticTranspose,
XNNClamp: _XNNNode1x1,
XNNConv2d: _XNNNodeConv,
XNNDiv: _XNNNode2x1,
XNNStaticResizeBilinear2D,
XNNStaticConstantPad,
XNNAvgPooling2d: _XNNPooling2D,
XNNMinimum: _XNNNode2x1,
XNNDepthwiseConv2d: _XNNNodeConv,
XNNMaxPooling2d: _XNNPooling2D,
XNNMultiply: _XNNNode2x1,
XNNSubtract: _XNNNode2x1,
XNNFloor: _XNNNode1x1,
XNNConvert: _XNNNode1x1,
XNNGlobalAvgPooling2d: _XNNNode1x1,
XNNStaticReshape,
XNNArgMaxPooling2d,
XNNSquareRoot: _XNNNode1x1,
XNNCeiling: _XNNNode1x1,
XNNHardswish: _XNNNode1x1,
XNNLeakyReLU,
XNNMaximum: _XNNNode2x1,
XNNNegate: _XNNNode1x1,
XNNSquare: _XNNNode1x1,
XNNELU,
XNNAbs: _XNNNode1x1,
XNNPReLU: _XNNNode2x1,
XNNConcatenate2: _XNNCat,
XNNConcatenate3: _XNNCat,
XNNConcatenate4: _XNNCat,
XNNStaticSlice,
XNNScaledDotProductAttention,
XNNBatchMatrixMultiply: _XNNNode2x1,
}
union XValueUnion {
XNNTensorValue,
XNNQuantizedTensorValue,
}
table OutputMinMax {
output_min:float;
output_max:float;
}
table XNode {
xnode_union:XNodeUnion;
// An int which can be linked back to the node in the origin graph
debug_handle:uint;
output_min_max:OutputMinMax;
}
table XValue {
xvalue_union:XValueUnion;
}
table XNNStaticTranspose {
num_dims:uint;
perm:[uint];
input_id:uint;
output_id:uint;
flags:uint;
}
table XNNStaticResizeBilinear2D {
new_height:uint;
new_width:uint;
input_id:uint;
output_id:uint;
flags:uint;
}
table XNNStaticConstantPad {
pre_paddings:[uint];
post_paddings:[uint];
padding_value:float;
input_id:uint;
output_id:uint;
flags:uint;
}
// A node with two input and one output
// Not meant to be used directly
table _XNNNode2x1 {
input1_id:uint;
input2_id:uint;
output_id:uint;
flags:uint;
}
// A node with one input and one output
// Not meant to be used directly
table _XNNNode1x1 {
input_id:uint;
output_id:uint;
flags:uint;
}
table _XNNCat {
axis: uint;
input1_id: uint;
input2_id: uint;
input3_id: uint;
input4_id: uint;
output_id: uint;
flags: uint;
}
table XNNELU {
alpha:float;
input_id:uint;
output_id:uint;
flags:uint;
}
table XNNFullyConnected {
input1_id:uint;
filter_id:uint;
bias_id:uint;
output_id:uint;
flags:uint;
}
table _XNNNodeConv {
padding_top:uint;
padding_right:uint;
padding_bottom:uint;
padding_left:uint;
kernel_height:uint;
kernel_width:uint;
subsampling_height:uint;
subsampling_width:uint;
dilation_height:uint;
dilation_width:uint;
group_input_channels:uint;
group_output_channels:uint;
groups:uint;
adjustment_height:uint;
adjustment_width:uint;
input1_id:uint;
filter_id:uint;
bias_id:uint;
output_id:uint;
flags:uint;
}
table _XNNPooling2D {
padding_top: uint;
padding_right: uint;
padding_bottom: uint;
padding_left: uint;
pooling_height: uint;
pooling_width: uint;
stride_height: uint;
stride_width: uint;
dilation_height: uint;
dilation_width: uint;
input_id: uint;
output_id: uint;
flags: uint;
}
table XNNStaticReshape {
num_dims:uint;
new_shape:[uint];
input_id: uint;
output_id: uint;
flags: uint;
}
table XNNStaticSlice {
num_dims:uint;
offsets:[uint];
sizes:[uint];
input_id:uint;
output_id:uint;
flags:uint;
}
table XNNScaledDotProductAttention {
query_id:uint;
key_id:uint;
value_id:uint;
scale_id:uint;
mask_id:uint;
output_id:uint;
flags:uint;
}
table XNNArgMaxPooling2d {
padding_top: uint;
padding_right: uint;
padding_bottom: uint;
padding_left: uint;
pooling_height: uint;
pooling_width: uint;
input_id: uint;
output_value_id: uint;
output_index_id: uint;
flags: uint;
}
table XNNLeakyReLU {
negative_slope: float;
input_id: uint;
output_id: uint;
flags: uint;
}
// Describes data offsets for constant data
table ConstantDataOffset {
// Constant data offsets are relative to the constant data base offset provided
// in the XNNPACKHeader.
offset: uint64;
// The size in bytes of valid data starting at the offset. The constant data
// may be followed by padding before the next piece of constant data
size: uint64;
}
table XNNGraph {
// Schema version.
version:string;
xnodes:[XNode];
xvalues:[XValue];
// Number of external inputs/outputs
num_externs:uint;
// Ids of external inputs
input_ids:[uint];
// Ids of external outputs
output_ids:[uint];
// Tables of constant data, used for constant Values (e.g.
// data field of weight tensors). Each constant is assigned an index into the table
// which are each individually aligned. 0 index is reserved to be pointed to by non-constant
// Tensors. Exactly one of constant_buffer and constant_data must be non-empty
constant_buffer:[Buffer];
// the list index is memory buffer id, the value is the memory buffer size.
mem_buffer_sizes: [uint];
// List of the constant data that follows the XNNGraph in this file. Each constant data is assigned an index into
// the table. 0 index is reserved to be pointed to by non-constant Tensor. Exactly one of constant_buffer and
// constant_data must be non-empty
constant_data:[ConstantDataOffset];
}
root_type XNNGraph;