lang_id/common/flatbuffers/embedding-network.fbs - platform/external/libtextclassifier - Git at Google

 //
 // Copyright (C) 2018 The Android Open Source Project
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //

 // Flatbuffer schema for Neurosis (FFNN with embeddings) parameters.
 //
 // Contains the same information as an EmbeddingNetworkProto.

 namespace libtextclassifier3.saft_fbs;

 // NS stands for NeurosiS.  The next two digits are meant to identify
 // incompatible versions.  Ideally, we'll never have to go beyond 00.
 file_identifier "NS00";

 // Should be kept in sync with the C++ enum nlp_saft::QuantizationType.
 enum QuantizationType : byte {
   NONE = 0,
   UINT8 = 1,
   UINT4 = 2,
   FLOAT16 = 3,
 }

 table Matrix {
   // Number of rows of this matrix.
   rows:int;

   // Number of columns of this matrix.
   cols:int;

   // Type of quantization used for the values from this matrix.
   //
   // If this is QuantizationType_NONE, then the unquantized values should be
   // stored in |values| below.  Otherwise, the bytes of the quantized values
   // should be stored in |quantized_values| and the float16 quantization scales
   // should be stored in |scales|.
   quantization_type:QuantizationType = NONE;

   // Non-quantized matrix elements, in row-major order.  See comments for
   // |quantization_type|.
   values:[float];

   // Quantized matrix elements, in row-major order.  See comments for
   // |quantization_type|.
   quantized_values:[ubyte];

   // Quantization factors (float16), one per matrix row.  There is no float16
   // primitive type for flatbuffers, we just use another 16 bit type.  See
   // comments for |quantization_type|.
   scales:[ushort];
 }

 // The input layer for a Neurosis network is composed of several parts (named
 // "chunks" below, "embedding spaces" in some other parts, etc).  For each
 // chunk, we have |num_features| features that extract feature values in that
 // chunk.  All values extracted by a feature get projected via the embedding
 // matrix |embedding| and summed together, producing a vector of
 // |embedding.cols| elements.  The resulting vector gets concatenated with the
 // similar vectors for other |num_features| features, producing a "chunk" of
 // |num_features * embedding.cols| elements.  This chunk gets concatenated with
 // the other chunks.
 //
 // Note: the specification that indicates what those |num_features| features are
 // is stored elsewhere (usually in a ModelParameter, see model.fbs).  But we
 // need to know |num_features| here, in order to specify the geometry of the
 // Neurosis network.
 table InputChunk {
   embedding:Matrix;
   num_features:int;
 }

 // One layer of neurons from the Neurosis network.  This table can represent a
 // hidden layer or the final (output / softmax) layer.
 //
 // Our formalism is a bit different, but equivalent to the usual description
 // from the literature:
 //
 // Technically, in Neurosis, each layer takes an input (a vector of floats); if
 // this is not the first layer, we apply a nonlinear function (ReLU); for the
 // first layer, we skip ReLU.  Next, we multiply by |weights| and add |bias|,
 // get the input for the next level and so on.  The output from the last layer
 // is generally used for softmax classification.  That's why we say that the
 // last layer is the "softmax layer".
 table NeuralLayer {
   // Weight matrix for this layer.  Geometry: num_inputs x num_neurons, where
   // num_inputs is the number of values produced by previous layer (which can be
   // the input layer, or another hidden layer) and num_neurons is the number of
   // neurons from this layer.
   weights:Matrix;

   // Bias vector for this layer.
   //
   // NOTE: right now, we accept both 1 x num_neurons and num_neurons x 1
   // geometries: the layout of the elements is the same in both cases.
   bias:Matrix;
 }

 table EmbeddingNetwork {
   // Specification of the chunks that compose the input layer.
   input_chunks:[InputChunk];

   // Hidden layers, followed by the final (softmax) layer.
   layers:[NeuralLayer];
 }

 root_type EmbeddingNetwork;
	//
	// Copyright (C) 2018 The Android Open Source Project
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//

	// Flatbuffer schema for Neurosis (FFNN with embeddings) parameters.
	//
	// Contains the same information as an EmbeddingNetworkProto.

	namespace libtextclassifier3.saft_fbs;

	// NS stands for NeurosiS. The next two digits are meant to identify
	// incompatible versions. Ideally, we'll never have to go beyond 00.
	file_identifier "NS00";

	// Should be kept in sync with the C++ enum nlp_saft::QuantizationType.
	enum QuantizationType : byte {
	NONE = 0,
	UINT8 = 1,
	UINT4 = 2,
	FLOAT16 = 3,
	}

	table Matrix {
	// Number of rows of this matrix.
	rows:int;

	// Number of columns of this matrix.
	cols:int;

	// Type of quantization used for the values from this matrix.
	//
	// If this is QuantizationType_NONE, then the unquantized values should be
	// stored in \|values\| below. Otherwise, the bytes of the quantized values
	// should be stored in \|quantized_values\| and the float16 quantization scales
	// should be stored in \|scales\|.
	quantization_type:QuantizationType = NONE;

	// Non-quantized matrix elements, in row-major order. See comments for
	// \|quantization_type\|.
	values:[float];

	// Quantized matrix elements, in row-major order. See comments for
	// \|quantization_type\|.
	quantized_values:[ubyte];

	// Quantization factors (float16), one per matrix row. There is no float16
	// primitive type for flatbuffers, we just use another 16 bit type. See
	// comments for \|quantization_type\|.
	scales:[ushort];
	}

	// The input layer for a Neurosis network is composed of several parts (named
	// "chunks" below, "embedding spaces" in some other parts, etc). For each
	// chunk, we have \|num_features\| features that extract feature values in that
	// chunk. All values extracted by a feature get projected via the embedding
	// matrix \|embedding\| and summed together, producing a vector of
	// \|embedding.cols\| elements. The resulting vector gets concatenated with the
	// similar vectors for other \|num_features\| features, producing a "chunk" of
	// \|num_features * embedding.cols\| elements. This chunk gets concatenated with
	// the other chunks.
	//
	// Note: the specification that indicates what those \|num_features\| features are
	// is stored elsewhere (usually in a ModelParameter, see model.fbs). But we
	// need to know \|num_features\| here, in order to specify the geometry of the
	// Neurosis network.
	table InputChunk {
	embedding:Matrix;
	num_features:int;
	}

	// One layer of neurons from the Neurosis network. This table can represent a
	// hidden layer or the final (output / softmax) layer.
	//
	// Our formalism is a bit different, but equivalent to the usual description
	// from the literature:
	//
	// Technically, in Neurosis, each layer takes an input (a vector of floats); if
	// this is not the first layer, we apply a nonlinear function (ReLU); for the
	// first layer, we skip ReLU. Next, we multiply by \|weights\| and add \|bias\|,
	// get the input for the next level and so on. The output from the last layer
	// is generally used for softmax classification. That's why we say that the
	// last layer is the "softmax layer".
	table NeuralLayer {
	// Weight matrix for this layer. Geometry: num_inputs x num_neurons, where
	// num_inputs is the number of values produced by previous layer (which can be
	// the input layer, or another hidden layer) and num_neurons is the number of
	// neurons from this layer.
	weights:Matrix;

	// Bias vector for this layer.
	//
	// NOTE: right now, we accept both 1 x num_neurons and num_neurons x 1
	// geometries: the layout of the elements is the same in both cases.
	bias:Matrix;
	}

	table EmbeddingNetwork {
	// Specification of the chunks that compose the input layer.
	input_chunks:[InputChunk];

	// Hidden layers, followed by the final (softmax) layer.
	layers:[NeuralLayer];
	}

	root_type EmbeddingNetwork;