| // |
| // Copyright (C) 2018 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| |
| // Flatbuffer schema for Neurosis (FFNN with embeddings) parameters. |
| // |
| // Contains the same information as an EmbeddingNetworkProto. |
| |
| namespace libtextclassifier3.saft_fbs; |
| |
| // NS stands for NeurosiS. The next two digits are meant to identify |
| // incompatible versions. Ideally, we'll never have to go beyond 00. |
| file_identifier "NS00"; |
| |
| // Should be kept in sync with the C++ enum nlp_saft::QuantizationType. |
| enum QuantizationType : byte { |
| NONE = 0, |
| UINT8 = 1, |
| UINT4 = 2, |
| FLOAT16 = 3, |
| } |
| |
| table Matrix { |
| // Number of rows of this matrix. |
| rows:int; |
| |
| // Number of columns of this matrix. |
| cols:int; |
| |
| // Type of quantization used for the values from this matrix. |
| // |
| // If this is QuantizationType_NONE, then the unquantized values should be |
| // stored in |values| below. Otherwise, the bytes of the quantized values |
| // should be stored in |quantized_values| and the float16 quantization scales |
| // should be stored in |scales|. |
| quantization_type:QuantizationType = NONE; |
| |
| // Non-quantized matrix elements, in row-major order. See comments for |
| // |quantization_type|. |
| values:[float]; |
| |
| // Quantized matrix elements, in row-major order. See comments for |
| // |quantization_type|. |
| quantized_values:[ubyte]; |
| |
| // Quantization factors (float16), one per matrix row. There is no float16 |
| // primitive type for flatbuffers, we just use another 16 bit type. See |
| // comments for |quantization_type|. |
| scales:[ushort]; |
| } |
| |
| // The input layer for a Neurosis network is composed of several parts (named |
| // "chunks" below, "embedding spaces" in some other parts, etc). For each |
| // chunk, we have |num_features| features that extract feature values in that |
| // chunk. All values extracted by a feature get projected via the embedding |
| // matrix |embedding| and summed together, producing a vector of |
| // |embedding.cols| elements. The resulting vector gets concatenated with the |
| // similar vectors for other |num_features| features, producing a "chunk" of |
| // |num_features * embedding.cols| elements. This chunk gets concatenated with |
| // the other chunks. |
| // |
| // Note: the specification that indicates what those |num_features| features are |
| // is stored elsewhere (usually in a ModelParameter, see model.fbs). But we |
| // need to know |num_features| here, in order to specify the geometry of the |
| // Neurosis network. |
| table InputChunk { |
| embedding:Matrix; |
| num_features:int; |
| } |
| |
| // One layer of neurons from the Neurosis network. This table can represent a |
| // hidden layer or the final (output / softmax) layer. |
| // |
| // Our formalism is a bit different, but equivalent to the usual description |
| // from the literature: |
| // |
| // Technically, in Neurosis, each layer takes an input (a vector of floats); if |
| // this is not the first layer, we apply a nonlinear function (ReLU); for the |
| // first layer, we skip ReLU. Next, we multiply by |weights| and add |bias|, |
| // get the input for the next level and so on. The output from the last layer |
| // is generally used for softmax classification. That's why we say that the |
| // last layer is the "softmax layer". |
| table NeuralLayer { |
| // Weight matrix for this layer. Geometry: num_inputs x num_neurons, where |
| // num_inputs is the number of values produced by previous layer (which can be |
| // the input layer, or another hidden layer) and num_neurons is the number of |
| // neurons from this layer. |
| weights:Matrix; |
| |
| // Bias vector for this layer. |
| // |
| // NOTE: right now, we accept both 1 x num_neurons and num_neurons x 1 |
| // geometries: the layout of the elements is the same in both cases. |
| bias:Matrix; |
| } |
| |
| table EmbeddingNetwork { |
| // Specification of the chunks that compose the input layer. |
| input_chunks:[InputChunk]; |
| |
| // Hidden layers, followed by the final (softmax) layer. |
| layers:[NeuralLayer]; |
| } |
| |
| root_type EmbeddingNetwork; |