blob: 7f1f2badfbcf713f2a7d9d491be1a39dccfff66c [file] [log] [blame]
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
include "config.fbs";
namespace tflite.ops.custom.sentencepiece;
table EncoderConfig {
// Version of the encoder.
version: EncoderVersion = SENTENCE_PIECE;
start_code: int32 = 0;
end_code: int32 = 0;
unknown_code: int32 = -1;
// Weight of "unknown code" when encoding. "Penalty" because it usually has a
// big negative weight,less than any other sentencepiece.
unknown_penalty: float = 0;
// The offset for encoding, usually used when codes with low codes are reserved
// for some special needs.
encoding_offset: int32;
// String pieces for encoding.
pieces: Trie;
pieces_scores: [float];
// Normalization related parameters.
remove_extra_whitespaces: bool;
// Add a whitespace prefix before encoding.
add_dummy_prefix: bool;
// Escape whitespaces during encoding so the decoder can restore them exactly as
// in the input.
escape_whitespaces: bool;
// Normalization parameters.
normalized_prefixes: Trie;
normalized_replacements: [byte];
}
root_type EncoderConfig;