smartselect/types.h - platform/external/libtextclassifier - Git at Google

 /*
  * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #ifndef LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_
 #define LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_

 #include <ostream>
 #include <string>
 #include <utility>

 namespace libtextclassifier {

 constexpr int kInvalidIndex = -1;

 // Index for a 0-based array of tokens.
 using TokenIndex = int;

 // Index for a 0-based array of codepoints.
 using CodepointIndex = int;

 // Marks a span in a sequence of codepoints. The first element is the index of
 // the first codepoint of the span, and the second element is the index of the
 // codepoint one past the end of the span.
 using CodepointSpan = std::pair<CodepointIndex, CodepointIndex>;

 // Marks a span in a sequence of tokens. The first element is the index of the
 // first token in the span, and the second element is the index of the token one
 // past the end of the span.
 using TokenSpan = std::pair<TokenIndex, TokenIndex>;

 // Token holds a token, its position in the original string and whether it was
 // part of the input span.
 struct Token {
   std::string value;
   CodepointIndex start;
   CodepointIndex end;

   // Whether the token was in the input span.
   bool is_in_span;

   // Whether the token is a padding token.
   bool is_padding;

   // Default constructor constructs the padding-token.
   Token()
       : value(""),
         start(kInvalidIndex),
         end(kInvalidIndex),
         is_in_span(false),
         is_padding(true) {}

   Token(const std::string& arg_value, CodepointIndex arg_start,
         CodepointIndex arg_end)
       : Token(arg_value, arg_start, arg_end, false) {}

   Token(const std::string& arg_value, CodepointIndex arg_start,
         CodepointIndex arg_end, bool is_in_span)
       : value(arg_value),
         start(arg_start),
         end(arg_end),
         is_in_span(is_in_span),
         is_padding(false) {}

   bool operator==(const Token& other) const {
     return value == other.value && start == other.start && end == other.end &&
            is_in_span == other.is_in_span && is_padding == other.is_padding;
   }
 };

 // Pretty-printing function for Token.
 inline std::ostream& operator<<(std::ostream& os, const Token& token) {
   return os << "Token(\"" << token.value << "\", " << token.start << ", "
             << token.end << ", is_in_span=" << token.is_in_span
             << ", is_padding=" << token.is_padding << ")";
 }

 }  // namespace libtextclassifier

 #endif  // LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_
	/*
	* Copyright (C) 2017 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#ifndef LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_
	#define LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_

	#include <ostream>
	#include <string>
	#include <utility>

	namespace libtextclassifier {

	constexpr int kInvalidIndex = -1;

	// Index for a 0-based array of tokens.
	using TokenIndex = int;

	// Index for a 0-based array of codepoints.
	using CodepointIndex = int;

	// Marks a span in a sequence of codepoints. The first element is the index of
	// the first codepoint of the span, and the second element is the index of the
	// codepoint one past the end of the span.
	using CodepointSpan = std::pair<CodepointIndex, CodepointIndex>;

	// Marks a span in a sequence of tokens. The first element is the index of the
	// first token in the span, and the second element is the index of the token one
	// past the end of the span.
	using TokenSpan = std::pair<TokenIndex, TokenIndex>;

	// Token holds a token, its position in the original string and whether it was
	// part of the input span.
	struct Token {
	std::string value;
	CodepointIndex start;
	CodepointIndex end;

	// Whether the token was in the input span.
	bool is_in_span;

	// Whether the token is a padding token.
	bool is_padding;

	// Default constructor constructs the padding-token.
	Token()
	: value(""),
	start(kInvalidIndex),
	end(kInvalidIndex),
	is_in_span(false),
	is_padding(true) {}

	Token(const std::string& arg_value, CodepointIndex arg_start,
	CodepointIndex arg_end)
	: Token(arg_value, arg_start, arg_end, false) {}

	Token(const std::string& arg_value, CodepointIndex arg_start,
	CodepointIndex arg_end, bool is_in_span)
	: value(arg_value),
	start(arg_start),
	end(arg_end),
	is_in_span(is_in_span),
	is_padding(false) {}

	bool operator==(const Token& other) const {
	return value == other.value && start == other.start && end == other.end &&
	is_in_span == other.is_in_span && is_padding == other.is_padding;
	}
	};

	// Pretty-printing function for Token.
	inline std::ostream& operator<<(std::ostream& os, const Token& token) {
	return os << "Token(\"" << token.value << "\", " << token.start << ", "
	<< token.end << ", is_in_span=" << token.is_in_span
	<< ", is_padding=" << token.is_padding << ")";
	}

	} // namespace libtextclassifier

	#endif // LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_