blob: ac71d078daa2faf0f35f29ec9bc8e766366c2495 [file] [log] [blame]
// Copyright 2020 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
#pragma once
#include <stddef.h>
#include <stdint.h>
#include <xnnpack.h>
#include <xnnpack/common.h>
#include <xnnpack/codecache.h>
#define XNN_MAX_INPUTS 3
#define XNN_MAX_OUTPUTS 2
#define XNN_MAX_RUNTIME_INPUTS 2
#define XNN_MAX_RUNTIME_OUTPUTS 2
#define XNN_INVALID_NODE_ID UINT32_MAX
#define XNN_MAX_OPERATOR_OBJECTS 2
#ifdef __cplusplus
extern "C" {
#endif
struct xnn_shape {
size_t num_dims;
size_t dim[XNN_MAX_TENSOR_DIMS];
};
enum xnn_value_type {
xnn_value_type_invalid = 0,
xnn_value_type_dense_tensor = 1,
};
enum xnn_layout_type {
xnn_layout_type_nhwc = 0,
xnn_layout_type_nchw = 1,
};
/// Abstraction for a collections of elements produced and consumed by nodes.
struct xnn_value {
/// Unique ID for the value.
uint32_t id;
/// Type of the collection of elements.
///
/// Currently only dense tensors are supported.
/// Other types (e.g. sparse tensors) might be supported in the future.
enum xnn_value_type type;
/// Type of elements in the collection.
enum xnn_datatype datatype;
/// Per-value quantization parameters.
struct {
/// Offset from zero of the quantized elements.
int32_t zero_point;
union {
/// Multiplication factor to convert quantized elements to real representation.
float scale;
struct {
/// Per-channel multiplication factor to convert quantized elements to real representation.
const float* channelwise_scale;
/// Index of the channel dimension with per-channel quantization parameters.
size_t channel_dimension;
};
};
} quantization;
/// Tensor shape.
struct xnn_shape shape;
/// Binary features of the tensor. Supported values are any combination of:
/// - XNN_VALUE_FLAG_EXTERNAL_INPUT
/// - XNN_VALUE_FLAG_EXTERNAL_OUTPUT
uint32_t flags;
/// Static initialization data. Must be null for non-static values.
const void* data;
/// Index of the Subgraph node that produced the value, or XNN_INVALID_NODE_ID is the Value is an external input.
uint32_t producer;
/// Index of the first Node that consume the value, or XNN_INVALID_NODE_ID if the Value has no consumers within the
/// graph (e.g. Value is an external output).
uint32_t first_consumer;
/// Number of Nodes that consume the value.
/// If multiple inputs in a Node refer to this Value as input, the Node is counted as consumer multiple times.
/// If the Value is an external output, it counts as having an extra consumer.
uint32_t num_consumers;
uint32_t num_nchw_compatible_consumers;
enum xnn_layout_type layout;
/// Set during analysis in xnn_subgraph_rewrite_for_fp16.
/// Indicates that this value should be converted to FP16.
bool fp16_compatible;
/// Set during analysis in xnn_subgraph_rewrite_for_fp16.
/// Indicates Value ID of the FP16 variant of this Value.
uint32_t fp16_id;
/// Set during analysis in xnn_subgraph_rewrite_for_fp16.
/// Indicates Value ID of the FP32 variant of this Value.
uint32_t fp32_id;
};
struct xnn_blob {
/// Size in bytes.
size_t size;
/// Data pointer.
void* data;
bool external;
};
struct xnn_node;
struct xnn_operator_data;
typedef enum xnn_status (*xnn_create_operator_fn)(
const struct xnn_node* node,
const struct xnn_value* values,
size_t num_values,
struct xnn_operator_data* opdata,
struct xnn_code_cache* code_cache);
typedef enum xnn_status (*xnn_setup_operator_fn)(
const struct xnn_operator_data* opdata,
const struct xnn_blob* blobs,
size_t num_blobs,
pthreadpool_t threadpool);
enum xnn_compute_type {
xnn_compute_type_invalid = 0,
xnn_compute_type_fp32,
xnn_compute_type_fp16,
xnn_compute_type_qc8,
xnn_compute_type_qs8,
xnn_compute_type_qu8,
xnn_compute_type_fp32_to_fp16,
xnn_compute_type_fp32_to_qs8,
xnn_compute_type_fp32_to_qu8,
xnn_compute_type_fp16_to_fp32,
xnn_compute_type_qs8_to_fp32,
xnn_compute_type_qu8_to_fp32,
};
enum xnn_node_type {
xnn_node_type_invalid = 0,
xnn_node_type_abs,
xnn_node_type_add2,
xnn_node_type_argmax_pooling_2d,
xnn_node_type_average_pooling_2d,
xnn_node_type_bankers_rounding,
xnn_node_type_ceiling,
xnn_node_type_clamp,
xnn_node_type_concatenate2,
xnn_node_type_convert,
xnn_node_type_convolution_2d,
xnn_node_type_deconvolution_2d,
xnn_node_type_depthwise_convolution_2d,
xnn_node_type_depth_to_space,
xnn_node_type_divide,
xnn_node_type_elu,
xnn_node_type_fully_connected,
xnn_node_type_floor,
xnn_node_type_global_average_pooling_2d,
xnn_node_type_hardswish,
xnn_node_type_leaky_relu,
xnn_node_type_max_pooling_2d,
xnn_node_type_maximum2,
xnn_node_type_minimum2,
xnn_node_type_multiply2,
xnn_node_type_negate,
xnn_node_type_prelu,
xnn_node_type_sigmoid,
xnn_node_type_softmax,
xnn_node_type_static_constant_pad,
xnn_node_type_static_reshape,
xnn_node_type_static_resize_bilinear_2d,
xnn_node_type_square,
xnn_node_type_square_root,
xnn_node_type_squared_difference,
xnn_node_type_subtract,
xnn_node_type_unpooling_2d,
};
struct xnn_node {
enum xnn_node_type type;
uint32_t id;
enum xnn_compute_type compute_type;
/// Static parameters of the operator node.
union {
struct {
uint32_t input_padding_top;
uint32_t input_padding_right;
uint32_t input_padding_bottom;
uint32_t input_padding_left;
uint32_t kernel_height;
uint32_t kernel_width;
uint32_t subsampling_height;
uint32_t subsampling_width;
uint32_t dilation_height;
uint32_t dilation_width;
uint32_t groups;
size_t group_input_channels;
size_t group_output_channels;
} convolution_2d;
struct {
uint32_t padding_top;
uint32_t padding_right;
uint32_t padding_bottom;
uint32_t padding_left;
uint32_t adjustment_height;
uint32_t adjustment_width;
uint32_t kernel_height;
uint32_t kernel_width;
uint32_t upsampling_height;
uint32_t upsampling_width;
uint32_t dilation_height;
uint32_t dilation_width;
uint32_t groups;
size_t group_input_channels;
size_t group_output_channels;
} deconvolution_2d;
struct {
uint32_t input_padding_top;
uint32_t input_padding_right;
uint32_t input_padding_bottom;
uint32_t input_padding_left;
uint32_t kernel_height;
uint32_t kernel_width;
uint32_t subsampling_height;
uint32_t subsampling_width;
uint32_t dilation_height;
uint32_t dilation_width;
uint32_t depth_multiplier;
size_t input_channels;
} depthwise_convolution_2d;
struct {
uint32_t block_size;
} depth_to_space;
struct {
uint32_t padding_top;
uint32_t padding_right;
uint32_t padding_bottom;
uint32_t padding_left;
uint32_t pooling_height;
uint32_t pooling_width;
uint32_t stride_height;
uint32_t stride_width;
uint32_t dilation_height;
uint32_t dilation_width;
} pooling_2d;
struct {
float alpha;
} elu;
struct {
float negative_slope;
} leaky_relu;
struct {
size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
size_t post_paddings[XNN_MAX_TENSOR_DIMS];
uint32_t padding_value;
} static_pad;
struct {
struct xnn_shape new_shape;
} static_reshape;
struct {
size_t new_height;
size_t new_width;
} static_resize;
struct {
size_t axis;
} concatenate;
} params;
struct {
float output_min;
float output_max;
} activation;
/// Value IDs for node inputs.
uint32_t inputs[XNN_MAX_INPUTS];
uint32_t num_inputs;
/// Value IDs for node outputs.
uint32_t outputs[XNN_MAX_OUTPUTS];
uint32_t num_outputs;
uint32_t flags;
uint32_t layout_flags;
uint32_t cluster_leader;
// Number of filter parameters in all 1x1 Convolutions of the sparse cluster.
// This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
size_t num_params;
// Number of zero filter parameters in all 1x1 Convolutions of the sparse cluster.
// This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
size_t num_zeroes;
// Factory function to create an operator object from the node.
xnn_create_operator_fn create;
// Function to setup an operator using opdata.
xnn_setup_operator_fn setup;
};
struct xnn_operator_data {
xnn_operator_t operator_objects[XNN_MAX_OPERATOR_OBJECTS];
xnn_setup_operator_fn setup;
size_t batch_size;
size_t input_height;
size_t input_width;
size_t output_height;
size_t output_width;
struct xnn_shape shape1;
struct xnn_shape shape2;
size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
size_t post_paddings[XNN_MAX_TENSOR_DIMS];
uint32_t adjustment_height;
uint32_t adjustment_width;
uint32_t inputs[XNN_MAX_RUNTIME_INPUTS];
uint32_t outputs[XNN_MAX_RUNTIME_OUTPUTS];
};
struct xnn_subgraph {
/// Number of Value IDs reserved for communication with external graph representation.
/// Values created during subgraph transformation avoid using IDs in [0, reserved_value_ids-1] range.
uint32_t external_value_ids;
uint32_t num_reserved_values;
uint32_t num_values;
struct xnn_value* values;
uint32_t num_reserved_nodes;
uint32_t num_nodes;
struct xnn_node* nodes;
};
/// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
struct xnn_runtime {
uint32_t num_external_values;
/// List of operators in the execution plan, in execution order.
struct xnn_operator_data* opdata;
/// Number of operators in the execution plan.
size_t num_ops;
struct xnn_blob* blobs;
size_t num_blobs;
void* workspace;
#if XNN_PLATFORM_JIT
struct xnn_code_cache code_cache;
#endif // XNN_PLATFORM_JIT
pthreadpool_t threadpool;
};
struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph);
struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph);
void xnn_subgraph_add_nodes(xnn_subgraph_t subgraph, size_t num_nodes);
size_t xnn_tensor_get_size(
xnn_subgraph_t subgraph,
uint32_t value_id);
// Product of all shape dimensions
size_t xnn_shape_multiply_all_dims(
const struct xnn_shape shape[1]);
// Product of all shape dimensions, except for the last (channel) one
size_t xnn_shape_multiply_non_channel_dims(
const struct xnn_shape shape[1]);
enum xnn_status xnn_subgraph_optimize(xnn_subgraph_t subgraph, uint32_t flags);
void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph);
void xnn_node_clear(struct xnn_node* node);
void xnn_value_clear(struct xnn_value* value);
void xnn_value_copy(struct xnn_value* dst_value, const struct xnn_value* src_value);
void xnn_init_convert_node(
struct xnn_node* node,
enum xnn_compute_type compute_type,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
#ifdef __cplusplus
} // extern "C"
#endif