blob: 8779723d2fd07f2ba9eee1b8d1834bb5fb11eb9f [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.hardware.neuralnetworks@1.0;
/**
* Operand types.
*
* The type of an operand in a model.
*
* Types prefaced with TENSOR_* must be used for tensor data (i.e., tensors
* with at least one dimension). Types not prefaced by TENSOR_* represent
* scalar values and must have no dimensions.
*/
enum OperandType : int32_t {
/**
* The following entries are used to declare scalars.
*/
FLOAT32 = 0,
INT32 = 1,
UINT32 = 2,
/**
* The following entries are used to declare tensors.
*/
TENSOR_FLOAT32 = 3,
TENSOR_INT32 = 4,
/**
* A tensor of 8 bit integers that represent real numbers.
*
* Attached to this tensor are two numbers that can be used to convert the
* 8 bit integer to the real value and vice versa. These two numbers are:
* - scale: a 32 bit floating point value
* - zero_value: a 32 bit integer
*
* The formula is:
* real_value = (integer_value - zero_value) * scale.
*/
TENSOR_QUANT8_ASYMM = 5,
/**
* The following entries are OEM specific operand types.
*/
OEM = 10000,
TENSOR_OEM_BYTE = 10001,
};
/**
* Operation types.
*
* The type of an operation in a model.
*/
enum OperationType : int32_t {
/**
* Adds two tensors, elment-wise.
*
* Takes two input tensors of identical type and compatible dimensions. The output
* is the sum of both input tensors, optionally modified by an activation function.
*
* Two dimensions are compatible when:
* 1. they are equal, or
* 2. one of them is 1
*
* The size of the output is the maximum size along each dimension of the input operands.
* It starts with the trailing dimensions, and works its way forward.
*
* Example:
* input1.dimension = {4, 1, 2}
* input2.dimension = {5, 4, 3, 1}
* output.dimension = {5, 4, 3, 2}
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* Supported tensor rank: up to 4
*
* Inputs:
* 0: A tensor.
* 1: A tensor of the same type, and compatible dimensions as input0.
* 2: An INT32 value, and has to be one of the {@link FusedActivationFunc} values.
* Specifies the activation to invoke on the result of each addition.
*
* Ouputs:
* 0: The sum, a tensor of the same type as input0.
*/
ADD = 0,
/**
* Performs a 2-D average pooling operation.
*
* The output dimensions are functions of the filter dimensions, stride, and padding.
*
* The values in output Tensor is computed as:
* output[batch, row, col, channel] =
* sum_{i, j}(input[batch, row + i, col + j, channel]) / sum(1)
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: 4, with "NHWC" data layout.
*
* Inputs:
* 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
* 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
* 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
* 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
* 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
* 5: An INT32 value, specifying the output stride in the ‘width’ dimension.
* 6: An INT32 value, specifying the output stride in the ‘height’ dimension.
* 7: An INT32 value, specifying the filter width.
* 8: An INT32 value, specifying the filter height.
* 9: An INT32 value, and has to be one of the {@link FusedActivationFunc} values.
* Specifies the activation to invoke on the result of each addition.
*
* Ouputs:
* 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
*/
AVERAGE_POOL_2D = 1,
/**
* Concatenates the input tensors along the given dimension.
*
* The input tensors must have identical type and the same dimensions except the
* dimension along the concatenation axis.
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: up to 4
*
* Inputs:
* 0 ~ n: The list on n input tensors, of shape [D0, D1, ..., Daxis(i), ..., Dm]
* n+1: An INT32 value, specifying the concatenation axis.
* n+2: An INT32 value, and has to be one of the {@link FusedActivationFunc} values.
* Specifies the activation to invoke on the result of each addition.
*
* Ouputs:
* 0: The output, a tensor of the same type as the input tensors.
The output shape is [D0, D1, ..., sum(Daxis(i)), ..., Dm].
*/
CONCATENATION = 2,
/**
* Performs an 2-D convolution operation.
*
* The CONV_2D op sweeps a 2-D filter that can mix channels together over a batch of
* images, applying the filter to each window of each image of the appropriate size.
*
* The output dimensions are functions of the filter dimensions, stride, and padding.
*
* The values in output Tensor is computed as:
* output[batch, row, col, channel] =
* sum_{i, j} (
* input[batch, row + i, col + j, k] *
* filter[channel, row + i, col + j, k] +
* bias[channel]
* )
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: 4, with "NHWC" data layout.
*
* Inputs:
* 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
* 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, depth_in],
* specifying the filter.
* 2: A 1-D tensor, of shape [depth_out], specifying the bias.
* For input tensor of {@link OperandType::TENSOR_FLOAT32} type, the bias should
* also be of {@link OperandType::TENSOR_FLOAT32}.
* For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the bias
* should be of {@link OperandType::TENSOR_INT32}.
* 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
* 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
* 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
* 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
* 7: An INT32 value, specifying the output stride in the ‘width’ dimension.
* 8: An INT32 value, specifying the output stride in the ‘height’ dimension.
* 9: An INT32 value, and has to be one of the {@link FusedActivationFunc} values.
* Specifies the activation to invoke on the result of each addition.
*
* Ouputs:
* 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out].
*/
CONV_2D = 3,
/**
* Performs an depthwise 2-D convolution operation.
*
* Given an input tensor of shape [batches, height, width, depth_in] and a filter
* tensor of shape [depth_out, filter_height, filter_width, depth_in] containing
* in_channels convolutional filters of depth 1, DEPTHWISE_CONV applies a different
* filter to each input channel (expanding from 1 channel to channel_multiplier channels
* for each), then concatenates the results together.
*
* The output has depth_out = depth_in * depth_multiplier channels.
* The output dimensions are functions of the filter dimensions, stride, and padding.
*
* The values in output Tensor is computed as:
* output[b, i, j, k * channel_multiplier + q] =
* sum_{di, dj} (
* input[b, strides[1] * i + di, strides[2] * j + dj, k] *
* filter[di, dj, k, q]
* )
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: 4, with "NHWC" data layout.
*
* Inputs:
* 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
* 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
* specifying the filter.
* 2: A 1-D tensor, of shape [depth_out], specifying the bias.
* For input tensor of {@link OperandType::TENSOR_FLOAT32} type, the bias should
* also be of {@link OperandType::TENSOR_FLOAT32}.
* For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the bias
* should be of {@link OperandType::TENSOR_INT32}.
* 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
* 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
* 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
* 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
* 7: An INT32 value, specifying the output stride in the ‘width’ dimension.
* 8: An INT32 value, specifying the output stride in the ‘height’ dimension.
* 9: An INT32 value, specifying the depthwise multiplier.
* 10: An INT32 value, and has to be one of the {@link FusedActivationFunc} values.
* Specifies the activation to invoke on the result of each addition.
*
* Ouputs:
* 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out].
*/
DEPTHWISE_CONV_2D = 4,
/**
* Rearranges data from depth into blocks of spatial data.
*
* More specifically, this op outputs a copy of the input tensor where values from
* the depth dimension are moved in spatial blocks to the height and width dimensions.
* The value block_size indicates the input block size and how the data is moved.
*
* Chunks of data of size block_size * block_size from depth are rearranged into
* non-overlapping blocks of size block_size x block_size.
*
* The width of the output tensor is input_depth * block_size, whereas the height is
* input_height * block_size.
* The depth of the input tensor must be divisible by block_size * block_size
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: 4, with "NHWC" data layout.
*
* Inputs:
* 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
* 1: An INT32 value, specifying the block_size. block_size must be >=1 and
* block_size * block_size must be a divisor of the input depth.
*
* Ouputs:
* 0: The output 4-D tensor, of shape [batch, height*block_size, width*block_size,
* depth/(block_size*block_size)].
*/
DEPTH_TO_SPACE = 5,
/**
* Dequantizes the input tensor.
*
* The formula is:
* output = (input - zero_value) * scale.
*
* Supported tensor types: {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: up to 4
*
* Inputs:
* 0: A tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}.
*
* Ouputs:
* 0: The output tensor of same shape as input0, but with type
{@link OperandType::TENSOR_FLOAT32}.
*/
DEQUANTIZE = 6,
/**
* Looks up items from a given tensor.
*
* Each item in the output is a raw copy of the corresponding item in
* the input “values”. If the the given “lookup” indices are out of bounds,
* the op will fail and an error will be reported.
*
* Inputs:
* * 0: Values. An n-D tensor of any type X (where n >= 2). E.g., if n is 2,
* then the shape would be [lookup_dimension, values_dimension], where
* “lookup_dimension” corresponds to the indexing dimension in the lookup
* table, and “values_dimension” to the contents.
* * 1: Lookups. An 1-D tensor of type T, of shape [lookup_size], where
* “lookup_size” is the number of elements to look for, and each entry
* corresponds to the first dimension of the “values” tensor.
*
* Output:
* * 0: A n-D tensor of type X and the same rank and shape as the “values”
* tensor, except for the first dimension which has size “lookup_size”.
*/
EMBEDDING_LOOKUP = 7,
/**
* Computes element-wise floor() on the input tensor.
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* Supported tensor rank: up to 4
*
* Inputs:
* 0: A tensor.
*
* Ouputs:
* 0: The output, a tensor of the same type and dimensions as input0.
*/
FLOOR = 8,
/**
* Denotes a fully (densely) connected layer, which connects all elements in the input
* tensor with each element in the output tensor.
*
* This layer implements the operation:
* outputs = activation(inputs * weights’ + bias)
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: up to 4.
*
* Inputs:
* 0: A tensor, specifying the input. If rank is greater than 2, then it gets flattened to
* a 2-D Tensor. The 2-D Tensor is handled as if dimensions corresponded to shape
* [batch_size, input_size], where “batch_size” corresponds to the batching dimension,
* and “input_size” is the size of the input.
* 1: A 2-D tensor, specifying the weights, of shape [num_units, input_size], where “num_units”
* corresponds to the number of output nodes.
* 2: A 1-D tensor, of shape [num_units], specifying the bias.
* For input tensor of {@link OperandType::TENSOR_FLOAT32} type, the bias should
* also be of {@link OperandType::TENSOR_FLOAT32}.
* For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the bias
* should be of {@link OperandType::TENSOR_INT32}.
* 3: An INT32 value, and has to be one of the {@link FusedActivationFunc} values.
* Specifies the activation to invoke on the result of each addition.
*
* Ouputs:
* 0: The output tensor, of shape [batch_size, num_units].
*/
FULLY_CONNECTED = 9,
/**
* Looks up values of a hash table with given keys.
*
* Inputs:
* * 0: Lookups. A 1-D int32 tensor with shape [ k ].
* * 1: Keys. A 1-D int32 tensor with shape [ n ], *MUST* be sorted in
* ascending order.
* * 2: Values. A tensor with shape [ n … ].
*
* Outputs:
* * 0: Output. A tensor with shape [ k …].
* * 1: Hits. A uint8 tensor with shape [ k ] indicates whether the lookup
* hits or not.
*/
HASHTABLE_LOOKUP = 10,
/**
* Applies L2 normalization along a the depth dimension.
*
* The values in output Tensor is computed as:
* output[batch, row, col, channel] =
* input[batch, row, col, channel] /
* sqrt(sum_{c} pow(input[batch, row, col, c], 2))
*
* For x with more dimensions, independently normalizes each 1-D slice along dimension dim.
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* Supported tensor rank: 4, with "NHWC" data layout.
*
* Inputs:
* 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
*
* Ouputs:
* 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
*/
L2_NORMALIZATION = 11,
/**
* Performs an 2-D L2 pooling operation.
*
* The output dimensions are functions of the filter dimensions, stride, and padding.
*
* The values in output Tensor is computed as:
* output[batch, row, col, channel] =
* sqrt(sum_{i, j} pow(input[batch, row + i, col + j, channel], 2) / sum(1))
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* Supported tensor rank: 4, with "NHWC" data layout.
*
* Inputs:
* 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
* 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
* 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
* 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
* 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
* 5: An INT32 value, specifying the output stride in the ‘width’ dimension.
* 6: An INT32 value, specifying the output stride in the ‘height’ dimension.
* 7: An INT32 value, specifying the filter width.
* 8: An INT32 value, specifying the filter height.
* 9: An INT32 value, and has to be one of the {@link FusedActivationFunc} values.
* Specifies the activation to invoke on the result of each addition.
*
* Ouputs:
* 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
*/
L2_POOL_2D = 12,
/**
* Applies Local Response Normalization along the depth dimension.
*
* The 4-D input tensor is treated as a 3-D array of 1-D vectors (along the last
* dimension), and each vector is normalized independently. Within a given vector,
* each component is divided by the weighted, squared sum of inputs within depth_radius.
*
* In details:
* sqr_sum[a, b, c, d] =
* sum(pow(input[a, b, c, d - depth_radius : d + depth_radius + 1], 2)
* output = input / pow((bias + alpha * sqr_sum), beta)
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* Supported tensor rank: 4, with "NHWC" data layout.
*
* Inputs:
* 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
* 1: An INT32 value, specifying the radius of the normalization window.
* 2: A FLOAT32 value, specifying the bias, must not be zero.
* 3: A FLOAT32 value, specifying the scale factor, alpha.
* 4: A FLOAT32 value, specifying the exponent, beta.
*
* Ouputs:
* 0: The output tensor of same shape as input0.
*/
LOCAL_RESPONSE_NORMALIZATION = 13,
/**
* Computes sigmoid activation on the input tensor element-wise.
*
* In details:
* output = 1 / (1 + exp(-input))
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: up to 4.
*
* Inputs:
* 0: A tensor, specifying the input.
*
* Ouputs:
* 0: The output tensor of same shape as input0.
*/
LOGISTIC = 14,
/**
* Projects an input to a bit vector via locality senstive hashing.
*
* Inputs:
* * 0: Hash functions. Dim.size == 2, DataType: Float.
* Tensor[0].Dim[0]: Number of hash functions.
* Tensor[0].Dim[1]: Number of seeds per hash functions.
* Tensor[0].Dim[1] <= 32 in sparse case.
*
* * 1: Input. Dim.size >= 1, no restriction on DataType.
* * 2: Weight. Optional. Dim.size == 1, DataType: Float.
* If not set, each input element is considered to have the same weight of
* 1.0.
* Tensor[1].Dim[0] == Tensor[2].Dim[0]
* * 3: Type:
* Sparse: Value LSHProjectionType_SPARSE(=1).
* Computed bit vector is considered to be sparse.
* Each output element is an int32 made up of multiple bits computed from
* hash functions.
*
* Dense: Value LSHProjectionType_DENSE(=2).
* Computed bit vector is considered to be dense. Each output element
* represents a bit and can take the value of either 0 or 1.
*
* Outputs:
* * 0: If the projection type is sparse:
* Output.Dim == { Tensor[0].Dim[0] }
* A tensor of int32 that represents hash signatures.
* If the projection type is Dense:
* Output.Dim == { Tensor[0].Dim[0] * Tensor[0].Dim[1] }
* A flattened tensor that represents projected bit vectors.
*/
LSH_PROJECTION = 15,
/**
* Long short-term memory unit (LSTM) recurrent network layer.
*
* The default non-peephole implementation is based on:
* http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
* S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural
* Computation, 9(8):1735-1780, 1997.
*
* The peephole implementation is based on:
* https://research.google.com/pubs/archive/43905.pdf
* Hasim Sak, Andrew Senior, and Francoise Beaufays. "Long short-term memory
* recurrent neural network architectures for large scale acoustic modeling."
* INTERSPEECH, 2014.
*
* The coupling of input and forget gate (CIFG) is based on:
* http://arxiv.org/pdf/1503.04069.pdf
* Greff et al. "LSTM: A Search Space Odyssey"
*
* The class has the following independently optional inputs:
* * If input gate (if CIFG): “input_to_forget_weights”,
* “recurrent_to_input_weights”, “cell_to_input_weights”, “input_gate_bias”.
* * If no peephole connections: “cell_to_input_weights”,
* “cell_to_forget_weights”, “cell_to_output_weights”.
* * If no projection layer: “projection_weights” and “projection_bias”.
* * If no projection bias: “projection_bias”.
*
* Supported tensor types:
* * {@link OperandType::TENSOR_FLOAT32}
*
* Inputs:
* * 0: Input.
* A 2-D tensor of type T, of shape [batch_size, input_size], where
* “batch_size” corresponds to the batching dimension, and “input_size”
* is the size of the input.
* * 1: input_to_input_weights.
* A 2-D tensor of type T, of shape [num_units, input_size], where
* “num_units” corresponds to the number of cell units.
* * 2: input_to_forget_weights.
* A 2-D tensor of type T, of shape [num_units, input_size].
* * 3: input_to_cell_weights.
* A 2-D tensor of type T, of shape [num_units, input_size].
* * 4: input_to_output_weights.
* A 2-D tensor of type T, of shape [num_units, input_size].
* * 5: recurrent_to_input_weights.
* A 2-D tensor of type T, of shape [num_units, output_size], where
* “output_size” corresponds to either the number of cell units (i.e.,
* “num_units”), or the second dimension of the “projection_weights”, if
* defined.
* * 6: recurrent_to_forget_weights.
* A 2-D tensor of type T, of shape [num_units, output_size].
* * 7: recurrent_to_cell_weights.
* A 2-D tensor of type T, of shape [num_units, output_size].
* * 8: recurrent_to_output_weights.
* A 2-D tensor of type T, of shape [num_units, output_size].
* * 9: cell_to_input_weights.
* A 1-D tensor of type T, of shape [num_units].
* * 10:cell_to_forget_weights.
* A 1-D tensor of type T, of shape [num_units].
* * 11:cell_to_output_weights.
* A 1-D tensor of type T, of shape [num_units].
* * 12:input_gate_bias.
* A 1-D tensor of type T, of shape [num_units].
* * 13:forget_gate_bias.
* A 1-D tensor of type T, of shape [num_units].
* * 14:cell_bias.
* A 1-D tensor of type T, of shape [num_units].
* * 15:output_gate_bias.
* A 1-D tensor of type T, of shape [num_units].
* * 16:projection_weights.
* A 2-D tensor of type T, of shape [output_size, num_units].
* * 17:projection_bias.
* A 1-D tensor of type T, of shape [output_size].
*
* Parameters:
* * 18:fused_activation_function.
* An (optional) ActivationFunctionType indicating the activation
* function.
* If “NONE” is specified then it results in a linear activation.
* * 19:cell_clip.
* A clipping threshold for the cell state, such that values are bound
* within [-cell_clip, cell_clip]. If set to 0.0 then clipping is
* disabled.
* * 20:proj_clip.
* A clipping threshold for the output from the projection layer, such
* that values are bound within [-proj_clip, proj_clip]. If set to 0.0
* then clipping is disabled.
*
* Outputs:
* * 0: scratch_buffer.
* A 3-D tensor of type T, of shape [batch_size, num_cell, 4].
* * 1: output_state.
* A 2-D tensor of type T, of shape [batch_size, output_size].
* * 2: cell_state.
* A 2-D tensor of type T, of shape [batch_size, num_units].
* * 3: output.
* A 2-D tensor of type T, of shape [batch_size, output_size]. This is
* effectively the same as the current “output_state” value.
*/
LSTM = 16,
/**
* Performs an 2-D max pooling operation.
*
* The output dimensions are functions of the filter dimensions, stride, and padding.
*
* The values in output Tensor is computed as:
* output[batch, row, col, channel] =
* max_{i, j} (input[batch, row + i, col + j, channel])
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: 4, with "NHWC" data layout.
*
* Inputs:
* 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
* 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
* 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
* 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
* 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
* 5: An INT32 value, specifying the output stride in the ‘width’ dimension.
* 6: An INT32 value, specifying the output stride in the ‘height’ dimension.
* 7: An INT32 value, specifying the filter width.
* 8: An INT32 value, specifying the filter height.
* 9: An INT32 value, and has to be one of the {@link FusedActivationFunc} values.
* Specifies the activation to invoke on the result of each addition.
*
* Ouputs:
* 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
*/
MAX_POOL_2D = 17,
/**
* Multiplies two tensors, elment-wise.
*
* Takes two input tensors of identical type and compatible dimensions. The output
* is the product of both input tensors, optionally modified by an activation function.
*
* Two dimensions are compatible when:
* 1. they are equal, or
* 2. one of them is 1
*
* The size of the resulting output is the maximum size along each dimension of the
* input operands. It starts with the trailing dimensions, and works its way forward.
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* Supported tensor rank: up to 4
*
* Inputs:
* 0: A tensor.
* 1: A tensor of the same type, and compatible dimensions as input0.
* 2: An INT32 value, and has to be one of the {@link FusedActivationFunc} values.
* Specifies the activation to invoke on the result of each addition.
*
* Ouputs:
* 0: The product, a tensor of the same type as input0.
*/
MUL = 18,
/**
* Computes rectified linear activation on the input tensor element-wise.
*
* In details:
* output = max(0, input)
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: up to 4.
*
* Inputs:
* 0: A tensor, specifying the input.
*
* Ouputs:
* 0: The output tensor of same shape as input0.
*/
RELU = 19,
/**
* Computes rectified linear 1 activation on the input tensor element-wise.
*
* In details:
* output = min(1.f, max(-1.f, input))
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: up to 4.
*
* Inputs:
* 0: A tensor, specifying the input.
*
* Ouputs:
* 0: The output tensor of same shape as input0.
*/
RELU1 = 20,
/**
* Computes rectified linear 6 activation on the input tensor element-wise.
*
* In details:
* output = min(6, max(0, input))
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: up to 4.
*
* Inputs:
* 0: A tensor, specifying the input.
*
* Ouputs:
* 0: The output tensor of same shape as input0.
*/
RELU6 = 21,
/**
* Reshapes a tensor.
*
* Given tensor, this operation returns a tensor that has the same values as tensor,
* but with a newly specified shape.
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: up to 4.
*
* Inputs:
* 0: A tensor, specifying the tensor to be reshaped.
* 1: A 1-D tensor of type {@link OperandType::TENSOR_INT32}, defining the shape
* of the output tensor. The number of elements implied by shape must be the same
* as the number of elements in the input tensor.
*
* Ouputs:
* 0: The output tensor, of shape specified by the input shape.
*/
RESHAPE = 22,
/**
* Resizes images to given size using the bilinear interpretation.
*
* Resized images will be distorted if their original aspect ratio is not the same as input.
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* Supported tensor rank: 4, with "NHWC" data layout.
*
* Inputs:
* 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
* 1: An INT32 value, specifying the output width of the output tensor.
* 2: An INT32 value, specifying the output height of the output tensor.
*
* Ouputs:
* 0: The output 4-D tensor, of shape [batches, new_height, new_width, depth].
*/
RESIZE_BILINEAR = 23,
/**
* A basic recurrent neural network layer.
*
* This layer implements the operation:
* outputs = state = activation(inputs * input_weights + state * recurrent_weights + bias)
*
* Where:
* * “input_weights” is a weight matrix that multiplies the inputs;
* * “recurrent_weights” is a weight matrix that multiplies the current
* “state” which itself is the output from the previous time step
* computation;
* * “bias” is a bias vector (added to each output vector in the batch);
* * “activation” is the function passed as the “fused_activation_function”
* argument (if not “NONE”).
*
* Supported tensor types:
* * {@link OperandType::TENSOR_FLOAT32}
*
* Inputs:
* * 0: input.
* A 2-D tensor of type T, of shape [batch_size, input_size], where
* “batch_size” corresponds to the batching dimension, and “input_size” is
* the size of the input.
* * 1: weights.
* A 2-D tensor of type T, of shape [num_units, input_size], where
* “num_units” corresponds to the number of units.
* * 2: recurrent_weights.
* A 2-D tensor of type T, of shape [num_units, num_units], with columns
* corresponding to the weights from each unit.
* * 3: bias.
* A 1-D tensor of type T, of shape [num_units].
*
* For FLOAT32 input tensor, bias must also be FLOAT32.
* For UINT8 input tensor, bias must be INT32.
*
* Parameters
* * 4: fused_activation_function.
* An (optional) ActivationFunctionType indicating the activation
* function. If “NONE” is specified then it results in a linear
* activation.
*
* * 5: Hidden state.
* A 2-D tensor of type T, of shape [batch_size, num_units].
*
* Outputs:
* * 0: output.
* A 2-D tensor of type T, of shape [batch_size, num_units]. This is
* effectively the same as the current state value.
*/
RNN = 24,
/**
* Computes the softmax activation on the input tensor element-wise, per batch, by
* normalizing the input vector so the maximum coefficient is zero.
*
* In details:
* output[batch, i] =
* exp((input[batch, i] - max(input[batch, :])) * beta) /
* sum_{k}{exp((input[batch, k] - max(input[batch, :])) * beta)}
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: 2 or 4.
*
* Inputs:
* 0: A 2-D or 4-D tensor, specifying the tensor to be reshaped.
* 1: A FLOAT32 value, specifying the scaling factor for the exponent, beta.
*
* Ouputs:
* 0: The output tensor of same shape as input0.
*/
SOFTMAX = 25,
/**
* Rearranges blocks of spatial data, into depth.
*
* More specifically, this op outputs a copy of the input tensor where values from
* the height and width dimensions are moved to the depth dimension.
* The value block_size indicates the input block size and how the data is moved.
*
* Chunks of data of size block_size * block_size from depth are rearranged into
* non-overlapping blocks of size block_size x block_size.
*
* The depth of the output tensor is input_depth * block_size * block_size.
* The input tensor's height and width must be divisible by block_size.
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* {@link OperandType::TENSOR_QUANT8_ASYMM}
* Supported tensor rank: 4, with "NHWC" data layout.
*
* Inputs:
* 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
* 1: An INT32 value, specifying the block_size. block_size must be >=1 and
* block_size must be a divisor of both the input height and width.
*
* Ouputs:
* 0: The output 4-D tensor, of shape [batch, height/block_size, width/block_size,
* depth*block_size*block_size].
*/
SPACE_TO_DEPTH = 26,
/**
* SVDF op is a kind of stateful layer derived from the notion that a
* densely connected layer that's processing a sequence of input frames can
* be approximated by using a singular value decomposition of each of its
* nodes. The implementation is based on:
*
* https://research.google.com/pubs/archive/43813.pdf
*
* P. Nakkiran, R. Alvarez, R. Prabhavalkar, C. Parada.
* “Compressing Deep Neural Networks using a Rank-Constrained Topology”.
* INTERSPEECH, 2015.
*
* It processes the incoming input using a 2-stage filtering mechanism:
* * stage 1 performs filtering on the "features" dimension, whose outputs get
* pushed into a memory of fixed-size memory_size.
* * stage 2 performs filtering on the "time" dimension of the memory_size
* memoized outputs of stage 1.
*
* Specifically, for rank 1, this layer implements the operation:
*
* memory = push(conv1d(inputs, weights_feature, feature_dim, "VALID"));
* outputs = activation(memory * weights_time + bias);
*
* Where:
* * “weights_feature” is a weights matrix that processes the inputs (by
* convolving the input with every “feature filter”), and whose outputs get
* pushed, stacked in order, into the fixed-size “memory” (the oldest entry
* gets dropped);
* * “weights_time” is a weights matrix that processes the “memory” (by a
* batched matrix multiplication on the num_units);
* * “bias” is an optional bias vector (added to each output vector in the
* batch); and
* * “activation” is the function passed as the “fused_activation_function”
* argument (if not “NONE”).
*
* Each rank adds a dimension to the weights matrices by means of stacking
* the filters.
*
* Supported tensor types:
* * {@link OperandType::TENSOR_FLOAT32}
*
* Inputs:
* * 0: input.
* A 2-D tensor of type T, of shape [batch_size, input_size], where
* “batch_size” corresponds to the batching dimension, and “input_size” is
* the size of the input.
* * 1: weights_feature.
* A 2-D tensor of type T, of shape [num_units, input_size], where
* “num_units” corresponds to the number of units.
* * 2: weights_time.
* A 2-D tensor of type T, of shape [num_units, memory_size], where
* “memory_size” corresponds to the fixed-size of the memory.
* * 3: bias.
* A optional 1-D tensor of type T, of shape [num_units].
*
* For FLOAT32 input tensor, bias must also be FLOAT32.
* For UINT8 input tensor, bias must be INT32.
*
* Parameters:
* * 4: rank.
* The rank of the SVD approximation.
* * 5: fused_activation_function.
* An (optional) ActivationFunctionType indicating the activation function.
* If “NONE” is specified then it results in a linear activation.
*
* Outputs:
* * 0: state.
* A 2-D tensor of type T, of shape [batch_size, (memory_size - 1) * num_units * rank].
* * 1: output.
* A 2-D tensor of type T, of shape [batch_size, num_units].
*/
SVDF = 27,
/**
* Computes hyperbolic tangent of input tensor element-wise.
*
* In details:
* output = tanh(input)
*
* Supported tensor types: {@link OperandType::TENSOR_FLOAT32}
* Supported tensor rank: up to 4.
*
* Inputs:
* 0: A tensor, specifying the input.
*
* Ouputs:
* 0: The output tensor of same shape as input0.
*/
TANH = 28,
/**
* OEM specific operation.
*
* This operation is OEM specific. It should only be used for OEM applications.
*/
OEM_OPERATION = 10000,
};
/**
* Fused activation function types.
*/
enum FusedActivationFunc : int32_t {
NONE = 0,
RELU = 1,
RELU1 = 2,
RELU6 = 3,
};
/**
* How an operand is used.
*/
enum OperandLifeTime : int32_t {
/**
* The operand is internal to the model. It's created by an operation
* and consumed by other operations.
*/
TEMPORARY_VARIABLE,
/**
* The operand is an input of the model. An operand can't be both
* input and output of a model.
*/
MODEL_INPUT,
/**
* The operand is an output of the model.
*/
MODEL_OUTPUT,
/**
* The operand is a constant found in Model.operandValues.
*/
CONSTANT_COPY,
/**
* The operand is a constant that was specified via a Memory object.
*/
CONSTANT_REFERENCE,
/**
* The operand does not have a value. This is valid only for optional arguments
* of operations.
*/
NO_VALUE,
};
/**
* Status of a device.
*/
enum DeviceStatus : int32_t {
AVAILABLE,
BUSY,
OFFLINE,
UNKNOWN,
};
/**
* Performance information for the reference workload.
*
* Used by a driver to report its performance characteristics.
*/
struct PerformanceInfo {
/**
* Ratio of the time taken by the driver to execute the
* workload compared to the time the CPU would take for the
* same workload. A lower number is better.
*/
float execTime;
/**
* Ratio of the energy used by the driver compared to what
* the CPU would use for doing the same workload. A lower number
* is better.
*/
float powerUsage;
};
/**
* The capabilities of a driver.
*/
struct Capabilities {
/**
* Driver performance when operating on float32 data.
*/
PerformanceInfo float32Performance;
/**
* Driver performance when operating on asymmetric 8-bit quantized data.
*/
PerformanceInfo quantized8Performance;
};
/**
* Describes the location of a data object.
*/
struct DataLocation {
/**
* The index of the memory pool where this location is found.
*/
uint32_t poolIndex;
/**
* Offset in bytes from the start of the pool.
*/
uint32_t offset;
/**
* The length of the data in bytes.
*/
uint32_t length;
};
/**
* Describes one operand of the model's graph.
*/
struct Operand {
/**
* Data type of the operand.
*/
OperandType type;
/**
* Dimensions of the operand.
*/
vec<uint32_t> dimensions;
/**
* The number of operations that use this operand as input.
*/
uint32_t numberOfConsumers;
/**
* Quantized scale of the operand.
*
* Only applicable if the operand is of type TENSOR_QUANT8_ASYMM or
* TENSOR_INT32.
*/
float scale;
/**
* Quantized zero-point offset of the operand.
*
* Only applicable if the operand is of type TENSOR_QUANT8_ASYMM.
*/
int32_t zeroPoint;
/**
* How the operand is used.
*/
OperandLifeTime lifetime;
/**
* Where to find the data for this operand.
* If the lifetime is TEMPORARY_VARIABLE, MODEL_INPUT, MODEL_OUTPUT, or NO_VALUE:
* - All the fields will be 0.
* If the lifetime is CONSTANT_COPY:
* - location.poolIndex is 0.
* - location.offset is the offset in bytes into Model.operandValues.
* - location.length is set.
* If the lifetime is CONSTANT_REFERENCE:
* - location.poolIndex is set.
* - location.offset is the offset in bytes into the specified pool.
* - location.length is set.
*/
DataLocation location;
};
/**
* Describes one operation of the model's graph.
*/
struct Operation {
/**
* The operation type.
*/
OperationType type;
/**
* Describes the table that contains the indexes of the inputs of the
* operation. The offset is the index in the operandIndexes table.
*/
vec<uint32_t> inputs;
/**
* Describes the table that contains the indexes of the outputs of the
* operation. The offset is the index in the operandIndexes table.
*/
vec<uint32_t> outputs;
};
/**
* A Neural Network Model.
*
* This includes not only the execution graph, but also constant data such as
* weights or scalars added at construction time. The only information that
* might not be known is the shape of the input tensors.
*/
struct Model {
/**
* All operands included in the model.
*/
vec<Operand> operands;
/**
* All operations included in the model.
*
* The operations are sorted into execution order.
*/
vec<Operation> operations;
/**
* Input indexes of the model.
*
* Each value corresponds to the index of the operand in "operands".
*/
vec<uint32_t> inputIndexes;
/**
* Output indexes of the model.
*
* Each value corresponds to the index of the operand in "operands".
*/
vec<uint32_t> outputIndexes;
/**
* A byte buffer containing operand data that were copied into the model.
*
* An operand's value must be located here if and only if Operand::lifetime
* equals OperandLifeTime::CONSTANT_COPY.
*/
vec<uint8_t> operandValues;
/**
* A collection of shared memory pools containing operand data that were
* registered by the model.
*
* An operand's value must be located here if and only if Operand::lifetime
* equals OperandLifeTime::CONSTANT_REFERENCE.
*/
vec<memory> pools;
};
/**
* Metadata information specifying the location of the input or output data and
* any updates to the input or output operand.
*/
struct RequestArgument {
/**
* If true, the argument does not have a value. This can be used for operations
* that take optional arguments. If true, the fields of location are set to 0 and
* the dimensions vector is left empty.
*/
bool hasNoValue;
/**
* The location within one of the memory pools passed in the Request.
*/
DataLocation location;
/**
* Updated dimension information.
*
* If dimensions.size() > 0, dimension information was provided along with the
* argument. This can be the case for models that accept inputs of varying size.
* This can't change the rank, just the value of the dimensions that were
* unspecified in the model.
*/
vec<uint32_t> dimensions;
};
/**
* Inputs to be sent to and outputs to be retrieved from a prepared model.
*
* A Request serves two primary tasks:
* 1) Provides the input and output data to be used when executing the model.
* 2) Specifies any updates to the input operand metadata that were left
* unspecified at model preparation time.
*/
struct Request {
/**
* Input data and information to be used in the execution of a prepared
* model.
*
* The index of the input corresponds to the index in Model.inputIndexes.
* E.g., input[i] corresponds to Model.inputIndexes[i].
*/
vec<RequestArgument> inputs;
/**
* Output data and information to be used in the execution of a prepared
* model.
*
* The index of the output corresponds to the index in Model.outputIndexes.
* E.g., output[i] corresponds to Model.outputIndexes[i].
*/
vec<RequestArgument> outputs;
/**
* A collection of shared memory pools containing operand data for both the
* inputs and the outputs to a model.
*/
vec<memory> pools;
};
/**
* Return status of a function.
*/
enum ErrorStatus : int32_t {
NONE,
DEVICE_UNAVAILABLE,
GENERAL_FAILURE,
OUTPUT_INSUFFICIENT_SIZE,
INVALID_ARGUMENT,
};