| // Copyright (c) Meta Platforms, Inc. and affiliates. |
| |
| namespace fb_xnnpack; |
| |
| // Update after any BC breaking changes |
| file_identifier "XN00"; |
| |
| // datatype for xnn-values |
| enum XNNDatatype : short { |
| /// Invalid data type. Valid Values never have this datatype. |
| xnn_datatype_invalid = 0, |
| /// IEEE754 single-precision floating-point. |
| xnn_datatype_fp32 = 1, |
| /// IEEE754 half-precision floating-point. |
| xnn_datatype_fp16 = 2, |
| /// Quantized 8-bit signed integer with shared per-Value quantization parameters. |
| xnn_datatype_qint8 = 3, |
| /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters. |
| xnn_datatype_quint8 = 4, |
| /// Quantized 32-bit signed integer with shared per-Value quantization parameters. |
| xnn_datatype_qint32 = 5, |
| /// Quantized 8-bit signed integer with shared per-channel quantization parameters. |
| xnn_datatype_qcint8 = 6, |
| /// Quantized 32-bit signed integer with shared per-channel quantization parameters. |
| xnn_datatype_qcint32 = 7, |
| /// Quantized 4-bit signed integer with shared per-channel quantization parameters. |
| xnn_datatype_qcint4 = 8, |
| /// Dynamically quantized 8-bit signed integer with per-batch quantization parameters. |
| xnn_datatype_qdint8 = 9, |
| /// Quantized 4-bit signed integer with shared blockwise quantization parameters. |
| xnn_datatype_qbint4 = 10, |
| } |
| |
| // type of quantization |
| union XNNQuantParams { |
| PerChannelQuant, |
| PerTensorQuant, |
| PerTokenDynamicQuant, |
| PerChannelGroupQuant, |
| } |
| |
| // taken from executorch |
| // Data buffer abstraction. |
| table Buffer { |
| storage:[ubyte] (force_align: 16); |
| } |
| |
| table PerChannelQuant { |
| scale:[float]; |
| channel_dim:int; |
| } |
| |
| table PerTokenDynamicQuant { |
| num_nonbatch_dims:int; |
| } |
| |
| table PerTensorQuant { |
| scale:float; |
| zero_point:int; |
| } |
| |
| table PerChannelGroupQuant { |
| scale:[float]; |
| channel_dim:int; |
| group_size:int; |
| scale_bf16:[ushort]; |
| } |
| |
| table XNNTensorValue { |
| // type of the tensor elements. |
| datatype:XNNDatatype; |
| // number of dimensions in the shape. |
| num_dims:uint; |
| // pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. |
| // XNNPACK does not keep any pointers to this array after the function returns. |
| dims:[uint]; |
| // Index to the program's constant buffer table, value 0 is reserved to indicate non constant |
| constant_buffer_idx:uint; |
| // external ID for the Value. The ID must be within the range of reserved Value IDs specified on |
| // the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be |
| // created for the Value. |
| external_id:uint; |
| // binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT |
| // and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. |
| flags:uint; |
| // pointer to the variable that will be initialized with the Value ID upon successful return. If a |
| // valid @a external_id was provided, the variable will be initialized with the @a external_id value. |
| id_out:uint; |
| // does this value need to be quantized dynamically at runtime? |
| // if we are quantizing at runtime, this field points to a target dtype |
| dq_datatype:XNNDatatype = xnn_datatype_invalid; |
| } |
| |
| table XNNQuantizedTensorValue { |
| // Base Tensor Value |
| tensor_value:XNNTensorValue; |
| // Quantization parameters |
| quant_params:XNNQuantParams; |
| } |
| |
| union XNodeUnion { |
| XNNAdd: _XNNNode2x1, |
| XNNFullyConnected, |
| XNNSoftmax: _XNNNode1x1, |
| XNNSigmoid: _XNNNode1x1, |
| XNNStaticTranspose, |
| XNNClamp: _XNNNode1x1, |
| XNNConv2d: _XNNNodeConv, |
| XNNDiv: _XNNNode2x1, |
| XNNStaticResizeBilinear2D, |
| XNNStaticConstantPad, |
| XNNAvgPooling2d: _XNNPooling2D, |
| XNNMinimum: _XNNNode2x1, |
| XNNDepthwiseConv2d: _XNNNodeConv, |
| XNNMaxPooling2d: _XNNPooling2D, |
| XNNMultiply: _XNNNode2x1, |
| XNNSubtract: _XNNNode2x1, |
| XNNFloor: _XNNNode1x1, |
| XNNConvert: _XNNNode1x1, |
| XNNGlobalAvgPooling2d: _XNNNode1x1, |
| XNNStaticReshape, |
| XNNArgMaxPooling2d, |
| XNNSquareRoot: _XNNNode1x1, |
| XNNCeiling: _XNNNode1x1, |
| XNNHardswish: _XNNNode1x1, |
| XNNLeakyReLU, |
| XNNMaximum: _XNNNode2x1, |
| XNNNegate: _XNNNode1x1, |
| XNNSquare: _XNNNode1x1, |
| XNNELU, |
| XNNAbs: _XNNNode1x1, |
| XNNPReLU: _XNNNode2x1, |
| XNNConcatenate2: _XNNCat, |
| XNNConcatenate3: _XNNCat, |
| XNNConcatenate4: _XNNCat, |
| XNNStaticSlice, |
| XNNScaledDotProductAttention, |
| XNNBatchMatrixMultiply: _XNNNode2x1, |
| } |
| |
| union XValueUnion { |
| XNNTensorValue, |
| XNNQuantizedTensorValue, |
| } |
| |
| table OutputMinMax { |
| output_min:float; |
| output_max:float; |
| } |
| |
| table XNode { |
| xnode_union:XNodeUnion; |
| // An int which can be linked back to the node in the origin graph |
| debug_handle:uint; |
| output_min_max:OutputMinMax; |
| } |
| |
| table XValue { |
| xvalue_union:XValueUnion; |
| } |
| |
| table XNNStaticTranspose { |
| num_dims:uint; |
| perm:[uint]; |
| input_id:uint; |
| output_id:uint; |
| flags:uint; |
| } |
| |
| table XNNStaticResizeBilinear2D { |
| new_height:uint; |
| new_width:uint; |
| input_id:uint; |
| output_id:uint; |
| flags:uint; |
| } |
| |
| table XNNStaticConstantPad { |
| pre_paddings:[uint]; |
| post_paddings:[uint]; |
| padding_value:float; |
| input_id:uint; |
| output_id:uint; |
| flags:uint; |
| } |
| |
| // A node with two input and one output |
| // Not meant to be used directly |
| table _XNNNode2x1 { |
| input1_id:uint; |
| input2_id:uint; |
| output_id:uint; |
| flags:uint; |
| } |
| |
| // A node with one input and one output |
| // Not meant to be used directly |
| table _XNNNode1x1 { |
| input_id:uint; |
| output_id:uint; |
| flags:uint; |
| } |
| |
| table _XNNCat { |
| axis: uint; |
| input1_id: uint; |
| input2_id: uint; |
| input3_id: uint; |
| input4_id: uint; |
| output_id: uint; |
| flags: uint; |
| } |
| |
| table XNNELU { |
| alpha:float; |
| input_id:uint; |
| output_id:uint; |
| flags:uint; |
| } |
| |
| table XNNFullyConnected { |
| input1_id:uint; |
| filter_id:uint; |
| bias_id:uint; |
| output_id:uint; |
| flags:uint; |
| } |
| |
| table _XNNNodeConv { |
| padding_top:uint; |
| padding_right:uint; |
| padding_bottom:uint; |
| padding_left:uint; |
| kernel_height:uint; |
| kernel_width:uint; |
| subsampling_height:uint; |
| subsampling_width:uint; |
| dilation_height:uint; |
| dilation_width:uint; |
| group_input_channels:uint; |
| group_output_channels:uint; |
| groups:uint; |
| adjustment_height:uint; |
| adjustment_width:uint; |
| input1_id:uint; |
| filter_id:uint; |
| bias_id:uint; |
| output_id:uint; |
| flags:uint; |
| } |
| |
| table _XNNPooling2D { |
| padding_top: uint; |
| padding_right: uint; |
| padding_bottom: uint; |
| padding_left: uint; |
| pooling_height: uint; |
| pooling_width: uint; |
| stride_height: uint; |
| stride_width: uint; |
| dilation_height: uint; |
| dilation_width: uint; |
| input_id: uint; |
| output_id: uint; |
| flags: uint; |
| } |
| |
| table XNNStaticReshape { |
| num_dims:uint; |
| new_shape:[uint]; |
| input_id: uint; |
| output_id: uint; |
| flags: uint; |
| } |
| |
| table XNNStaticSlice { |
| num_dims:uint; |
| offsets:[uint]; |
| sizes:[uint]; |
| input_id:uint; |
| output_id:uint; |
| flags:uint; |
| } |
| |
| table XNNScaledDotProductAttention { |
| query_id:uint; |
| key_id:uint; |
| value_id:uint; |
| scale_id:uint; |
| mask_id:uint; |
| output_id:uint; |
| flags:uint; |
| } |
| |
| table XNNArgMaxPooling2d { |
| padding_top: uint; |
| padding_right: uint; |
| padding_bottom: uint; |
| padding_left: uint; |
| pooling_height: uint; |
| pooling_width: uint; |
| input_id: uint; |
| output_value_id: uint; |
| output_index_id: uint; |
| flags: uint; |
| } |
| |
| table XNNLeakyReLU { |
| negative_slope: float; |
| input_id: uint; |
| output_id: uint; |
| flags: uint; |
| } |
| |
| // Describes data offsets for constant data |
| table ConstantDataOffset { |
| // Constant data offsets are relative to the constant data base offset provided |
| // in the XNNPACKHeader. |
| offset: uint64; |
| |
| // The size in bytes of valid data starting at the offset. The constant data |
| // may be followed by padding before the next piece of constant data |
| size: uint64; |
| } |
| |
| table XNNGraph { |
| // Schema version. |
| version:string; |
| xnodes:[XNode]; |
| xvalues:[XValue]; |
| |
| // Number of external inputs/outputs |
| num_externs:uint; |
| |
| // Ids of external inputs |
| input_ids:[uint]; |
| |
| // Ids of external outputs |
| output_ids:[uint]; |
| |
| // Tables of constant data, used for constant Values (e.g. |
| // data field of weight tensors). Each constant is assigned an index into the table |
| // which are each individually aligned. 0 index is reserved to be pointed to by non-constant |
| // Tensors. Exactly one of constant_buffer and constant_data must be non-empty |
| constant_buffer:[Buffer]; |
| |
| // the list index is memory buffer id, the value is the memory buffer size. |
| mem_buffer_sizes: [uint]; |
| |
| // List of the constant data that follows the XNNGraph in this file. Each constant data is assigned an index into |
| // the table. 0 index is reserved to be pointed to by non-constant Tensor. Exactly one of constant_buffer and |
| // constant_data must be non-empty |
| constant_data:[ConstantDataOffset]; |
| } |
| |
| root_type XNNGraph; |