annotator/number/number.h - platform/external/libtextclassifier - Git at Google

 /*
  * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #ifndef LIBTEXTCLASSIFIER_ANNOTATOR_NUMBER_NUMBER_H_
 #define LIBTEXTCLASSIFIER_ANNOTATOR_NUMBER_NUMBER_H_

 #include <string>
 #include <unordered_set>
 #include <vector>

 #include "annotator/feature-processor.h"
 #include "annotator/model_generated.h"
 #include "annotator/types.h"
 #include "utils/utf8/unicodetext.h"

 namespace libtextclassifier3 {

 // Annotator of numbers in text.
 //
 // Only supports values in range [-999 999 999, 999 999 999] (inclusive).
 //
 // TODO(zilka): Add support for non-ASCII digits.
 // TODO(zilka): Add support for written-out numbers.
 class NumberAnnotator {
  public:
   explicit NumberAnnotator(const NumberAnnotatorOptions* options,
                            const FeatureProcessor* feature_processor)
       : options_(options),
         feature_processor_(feature_processor),
         allowed_prefix_codepoints_(
             FlatbuffersVectorToSet(options->allowed_prefix_codepoints())),
         allowed_suffix_codepoints_(
             FlatbuffersVectorToSet(options->allowed_suffix_codepoints())) {}

   // Classifies given text, and if it is a number, it passes the result in
   // 'classification_result' and returns true, otherwise returns false.
   bool ClassifyText(const UnicodeText& context, CodepointSpan selection_indices,
                     AnnotationUsecase annotation_usecase,
                     ClassificationResult* classification_result) const;

   // Finds all number instances in the input text.
   bool FindAll(const UnicodeText& context_unicode,
                AnnotationUsecase annotation_usecase,
                std::vector<AnnotatedSpan>* result) const;

  private:
   static std::unordered_set<int> FlatbuffersVectorToSet(
       const flatbuffers::Vector<int32_t>* codepoints);

   // Parses the text to an int64 value and returns true if succeeded, otherwise
   // false. Also returns the number of prefix/suffix codepoints that were
   // stripped from the number.
   bool ParseNumber(const UnicodeText& text, int64* result,
                    int* num_prefix_codepoints,
                    int* num_suffix_codepoints) const;

   const NumberAnnotatorOptions* options_;
   const FeatureProcessor* feature_processor_;
   const std::unordered_set<int> allowed_prefix_codepoints_;
   const std::unordered_set<int> allowed_suffix_codepoints_;
 };

 }  // namespace libtextclassifier3

 #endif  // LIBTEXTCLASSIFIER_ANNOTATOR_NUMBER_NUMBER_H_
	/*
	* Copyright (C) 2018 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_NUMBER_NUMBER_H_
	#define LIBTEXTCLASSIFIER_ANNOTATOR_NUMBER_NUMBER_H_

	#include <string>
	#include <unordered_set>
	#include <vector>

	#include "annotator/feature-processor.h"
	#include "annotator/model_generated.h"
	#include "annotator/types.h"
	#include "utils/utf8/unicodetext.h"

	namespace libtextclassifier3 {

	// Annotator of numbers in text.
	//
	// Only supports values in range [-999 999 999, 999 999 999] (inclusive).
	//
	// TODO(zilka): Add support for non-ASCII digits.
	// TODO(zilka): Add support for written-out numbers.
	class NumberAnnotator {
	public:
	explicit NumberAnnotator(const NumberAnnotatorOptions* options,
	const FeatureProcessor* feature_processor)
	: options_(options),
	feature_processor_(feature_processor),
	allowed_prefix_codepoints_(
	FlatbuffersVectorToSet(options->allowed_prefix_codepoints())),
	allowed_suffix_codepoints_(
	FlatbuffersVectorToSet(options->allowed_suffix_codepoints())) {}

	// Classifies given text, and if it is a number, it passes the result in
	// 'classification_result' and returns true, otherwise returns false.
	bool ClassifyText(const UnicodeText& context, CodepointSpan selection_indices,
	AnnotationUsecase annotation_usecase,
	ClassificationResult* classification_result) const;

	// Finds all number instances in the input text.
	bool FindAll(const UnicodeText& context_unicode,
	AnnotationUsecase annotation_usecase,
	std::vector<AnnotatedSpan>* result) const;

	private:
	static std::unordered_set<int> FlatbuffersVectorToSet(
	const flatbuffers::Vector<int32_t>* codepoints);

	// Parses the text to an int64 value and returns true if succeeded, otherwise
	// false. Also returns the number of prefix/suffix codepoints that were
	// stripped from the number.
	bool ParseNumber(const UnicodeText& text, int64* result,
	int* num_prefix_codepoints,
	int* num_suffix_codepoints) const;

	const NumberAnnotatorOptions* options_;
	const FeatureProcessor* feature_processor_;
	const std::unordered_set<int> allowed_prefix_codepoints_;
	const std::unordered_set<int> allowed_suffix_codepoints_;
	};

	} // namespace libtextclassifier3

	#endif // LIBTEXTCLASSIFIER_ANNOTATOR_NUMBER_NUMBER_H_