blob: 9f07f91745f4687d3e78eafe0a5f22a21e76e12b [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_
#define LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_
#include <ostream>
#include <string>
#include <utility>
namespace libtextclassifier {
constexpr int kInvalidIndex = -1;
// Index for a 0-based array of tokens.
using TokenIndex = int;
// Index for a 0-based array of codepoints.
using CodepointIndex = int;
// Marks a span in a sequence of codepoints. The first element is the index of
// the first codepoint of the span, and the second element is the index of the
// codepoint one past the end of the span.
using CodepointSpan = std::pair<CodepointIndex, CodepointIndex>;
// Marks a span in a sequence of tokens. The first element is the index of the
// first token in the span, and the second element is the index of the token one
// past the end of the span.
using TokenSpan = std::pair<TokenIndex, TokenIndex>;
// Token holds a token, its position in the original string and whether it was
// part of the input span.
struct Token {
std::string value;
CodepointIndex start;
CodepointIndex end;
// Whether the token was in the input span.
bool is_in_span;
// Whether the token is a padding token.
bool is_padding;
// Default constructor constructs the padding-token.
Token()
: value(""),
start(kInvalidIndex),
end(kInvalidIndex),
is_in_span(false),
is_padding(true) {}
Token(const std::string& arg_value, CodepointIndex arg_start,
CodepointIndex arg_end)
: Token(arg_value, arg_start, arg_end, false) {}
Token(const std::string& arg_value, CodepointIndex arg_start,
CodepointIndex arg_end, bool is_in_span)
: value(arg_value),
start(arg_start),
end(arg_end),
is_in_span(is_in_span),
is_padding(false) {}
bool operator==(const Token& other) const {
return value == other.value && start == other.start && end == other.end &&
is_in_span == other.is_in_span && is_padding == other.is_padding;
}
};
// Pretty-printing function for Token.
inline std::ostream& operator<<(std::ostream& os, const Token& token) {
return os << "Token(\"" << token.value << "\", " << token.start << ", "
<< token.end << ", is_in_span=" << token.is_in_span
<< ", is_padding=" << token.is_padding << ")";
}
} // namespace libtextclassifier
#endif // LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_