icing/index/index.h - platform/external/icing - Git at Google

 // Copyright (C) 2019 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #ifndef ICING_INDEX_INDEX_H_
 #define ICING_INDEX_INDEX_H_

 #include <cstdint>
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>

 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/file/filesystem.h"
 #include "icing/index/hit/hit.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/index/lite/lite-index.h"
 #include "icing/index/lite/term-id-hit-pair.h"
 #include "icing/index/main/main-index-merger.h"
 #include "icing/index/main/main-index.h"
 #include "icing/index/term-id-codec.h"
 #include "icing/index/term-metadata.h"
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/proto/debug.pb.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
 #include "icing/store/namespace-id.h"
 #include "icing/store/suggestion-result-checker.h"
 #include "icing/util/status-macros.h"

 namespace icing {
 namespace lib {

 // The class representing the Icing search index. This index maps terms to hits
 // (document_ids, section_ids).
 // Content is added to the index through the Editor class - which also dedupes
 // hits (calling Editor::AddHit with the same arguments will only result in the
 // creation of a single hit).
 // Ex.
 // ICING_ASSIGN_OR_RETURN(std::unique_ptr<Index> index,
 // .                Index::Create(MakeIndexOptions()));
 // Index::Editor editor = index->Edit(document_id, section_id,
 //     TermMatchType::EXACT_ONLY); ICING_RETURN_IF_ERROR(editor.AddHit("foo"));
 // ICING_RETURN_IF_ERROR(editor.AddHit("baz"));
 //
 // Content is retrieved from the index through the Iterator class.
 // Ex.
 // ICING_ASSIGN_OR_RETURN(std::unique_ptr<Index> index,
 // .                Index::Create(MakeIndexOptions()));
 // ICING_ASSIGN_OR_RETURN(Index::Iterator iterator =
 //     index->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
 // while(iterator->Advance().ok())
 //   ProcessResult(iterator->value());
 class Index {
  public:
   struct Options {
     explicit Options(const std::string& base_dir, uint32_t index_merge_size,
                      bool lite_index_sort_at_indexing,
                      uint32_t lite_index_sort_size)
         : base_dir(base_dir),
           index_merge_size(index_merge_size),
           lite_index_sort_at_indexing(lite_index_sort_at_indexing),
           lite_index_sort_size(lite_index_sort_size) {}

     std::string base_dir;
     int32_t index_merge_size;
     bool lite_index_sort_at_indexing;
     int32_t lite_index_sort_size;
   };

   // Creates an instance of Index in the directory pointed by file_dir.
   //
   // Returns:
   //   Valid Index on success
   //   DATA_LOSS if the index was corrupt and had to be cleared
   //   INVALID_ARGUMENT if options have invalid values
   //   INTERNAL on I/O error
   static libtextclassifier3::StatusOr<std::unique_ptr<Index>> Create(
       const Options& options, const Filesystem* filesystem,
       const IcingFilesystem* icing_filesystem);

   // Reads magic from existing flash (main) index file header. We need this
   // during Icing initialization phase to determine the version.
   //
   // Returns
   //   Valid magic on success
   //   NOT_FOUND if the lite index doesn't exist
   //   INTERNAL on I/O error
   static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic(
       const Filesystem* filesystem, const std::string& base_dir);

   // Clears all files created by the index. Returns OK if all files were
   // cleared.
   libtextclassifier3::Status Reset() {
     ICING_RETURN_IF_ERROR(lite_index_->Reset());
     return main_index_->Reset();
   }

   // Brings components of the index into memory in anticipation of a query in
   // order to reduce latency.
   void Warm() {
     lite_index_->Warm();
     main_index_->Warm();
   }

   // Syncs all the data and metadata changes to disk.
   //
   // Returns:
   //   OK on success
   //   INTERNAL on I/O errors
   libtextclassifier3::Status PersistToDisk() {
     ICING_RETURN_IF_ERROR(lite_index_->PersistToDisk());
     return main_index_->PersistToDisk();
   }

   // Discard parts of the index if they contain data for document ids greater
   // than document_id.
   //
   // NOTE: This means that TruncateTo(kInvalidDocumentId) will have no effect.
   //
   // Returns:
   //   OK on success
   //   INTERNAL on I/O errors
   libtextclassifier3::Status TruncateTo(DocumentId document_id);

   // DocumentIds are always inserted in increasing order. Returns the largest
   // document_id added to the index.
   DocumentId last_added_document_id() const {
     DocumentId lite_document_id = lite_index_->last_added_document_id();
     if (lite_document_id != kInvalidDocumentId) {
       return lite_document_id;
     }
     return main_index_->last_added_document_id();
   }

   // Sets last_added_document_id to document_id so long as document_id >
   // last_added_document_id()
   void set_last_added_document_id(DocumentId document_id) {
     DocumentId lite_document_id = lite_index_->last_added_document_id();
     if (lite_document_id == kInvalidDocumentId ||
         document_id >= lite_document_id) {
       lite_index_->set_last_added_document_id(document_id);
     }
   }

   // Returns debug information for the index in out.
   // verbosity = BASIC, simplest debug information - just the lexicons and lite
   //                    index.
   // verbosity = DETAILED, more detailed debug information including raw
   //                       postings lists.
   IndexDebugInfoProto GetDebugInfo(DebugInfoVerbosity::Code verbosity) const {
     IndexDebugInfoProto debug_info;
     *debug_info.mutable_index_storage_info() = GetStorageInfo();
     *debug_info.mutable_lite_index_info() =
         lite_index_->GetDebugInfo(verbosity);
     *debug_info.mutable_main_index_info() =
         main_index_->GetDebugInfo(verbosity);
     return debug_info;
   }

   // Returns the byte size of the all the elements held in the index. This
   // excludes the size of any internal metadata of the index, e.g. the index's
   // header.
   //
   // Returns:
   //   Byte size on success
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<int64_t> GetElementsSize() const {
     ICING_ASSIGN_OR_RETURN(int64_t lite_index_size,
                            lite_index_->GetElementsSize());
     ICING_ASSIGN_OR_RETURN(int64_t main_index_size,
                            main_index_->GetElementsSize());
     return lite_index_size + main_index_size;
   }

   // Calculates the StorageInfo for the Index.
   //
   // If an IO error occurs while trying to calculate the value for a field, then
   // that field will be set to -1.
   IndexStorageInfoProto GetStorageInfo() const;

   // Create an iterator to iterate through all doc hit infos in the index that
   // match the term. term_start_index is the start index of the given term in
   // the search query. unnormalized_term_length is the length of the given
   // unnormalized term in the search query not listed in the mask.
   // Eg. section_id_mask = 1U << 3; would only return hits that occur in
   // section 3.
   //
   // Returns:
   //   unique ptr to a valid DocHitInfoIterator that matches the term
   //   INVALID_ARGUMENT if given an invalid term_match_type
   libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
       const std::string& term, int term_start_index,
       int unnormalized_term_length, SectionIdMask section_id_mask,
       TermMatchType::Code term_match_type, bool need_hit_term_frequency = true);

   // Finds terms with the given prefix in the given namespaces. If
   // 'namespace_ids' is empty, returns results from all the namespaces. Results
   // are sorted in decreasing order of hit count. Number of results are no more
   // than 'num_to_return'.
   //
   // Returns:
   //   A list of TermMetadata on success
   //   INTERNAL_ERROR if failed to access term data.
   libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindTermsByPrefix(
       const std::string& prefix, int num_to_return,
       TermMatchType::Code scoring_match_type,
       SuggestionScoringSpecProto::SuggestionRankingStrategy::Code rank_by,
       const SuggestionResultChecker* suggestion_result_checker);

   // A class that can be used to add hits to the index.
   //
   // An editor groups hits from a particular section within a document together
   // and dedupes hits for the same term within a section. This removes the
   // burden of deduping from the caller and direct access to the index
   // implementation allows for more efficient deduping.
   class Editor {
    public:
     // Does not take any ownership, and all pointers must refer to valid objects
     // that outlive the one constructed.
     // TODO(b/141180665): Add nullptr checks for the raw pointers
     Editor(const TermIdCodec* term_id_codec, LiteIndex* lite_index,
            DocumentId document_id, SectionId section_id,
            TermMatchType::Code term_match_type, NamespaceId namespace_id)
         : term_id_codec_(term_id_codec),
           lite_index_(lite_index),
           document_id_(document_id),
           term_match_type_(term_match_type),
           namespace_id_(namespace_id),
           section_id_(section_id) {}

     // Buffer the term in seen_tokens_.
     libtextclassifier3::Status BufferTerm(const char* term);
     // Index all the terms stored in seen_tokens_.
     libtextclassifier3::Status IndexAllBufferedTerms();

    private:
     // The Editor is able to store previously seen terms as TermIds. This is
     // is more efficient than a client doing this externally because TermIds are
     // not exposed to clients.
     std::unordered_map<uint32_t, Hit::TermFrequency> seen_tokens_;
     const TermIdCodec* term_id_codec_;
     LiteIndex* lite_index_;
     DocumentId document_id_;
     TermMatchType::Code term_match_type_;
     NamespaceId namespace_id_;
     SectionId section_id_;
   };
   Editor Edit(DocumentId document_id, SectionId section_id,
               TermMatchType::Code term_match_type, NamespaceId namespace_id) {
     return Editor(term_id_codec_.get(), lite_index_.get(), document_id,
                   section_id, term_match_type, namespace_id);
   }

   bool WantsMerge() const { return lite_index_->WantsMerge(); }

   // Merges newly-added hits in the LiteIndex into the MainIndex.
   //
   // RETURNS:
   //  - INTERNAL on IO error while writing to the MainIndex.
   //  - RESOURCE_EXHAUSTED error if unable to grow the index.
   libtextclassifier3::Status Merge() {
     ICING_ASSIGN_OR_RETURN(MainIndex::LexiconMergeOutputs outputs,
                            main_index_->MergeLexicon(lite_index_->lexicon()));
     ICING_ASSIGN_OR_RETURN(std::vector<TermIdHitPair> term_id_hit_pairs,
                            MainIndexMerger::TranslateAndExpandLiteHits(
                                *lite_index_, *term_id_codec_, outputs));
     ICING_RETURN_IF_ERROR(main_index_->AddHits(
         *term_id_codec_, std::move(outputs.backfill_map),
         std::move(term_id_hit_pairs), lite_index_->last_added_document_id()));
     ICING_RETURN_IF_ERROR(main_index_->PersistToDisk());
     return lite_index_->Reset();
   }

   // Whether the LiteIndex HitBuffer requires sorting. This is only true if
   // Icing has enabled sorting during indexing time, and the HitBuffer's
   // unsorted tail has exceeded the lite_index_sort_size.
   bool LiteIndexNeedSort() const {
     return options_.lite_index_sort_at_indexing &&
            lite_index_->HasUnsortedHitsExceedingSortThreshold();
   }

   // Sorts the LiteIndex HitBuffer.
   void SortLiteIndex() {
     lite_index_->SortHits();
   }

   // Reduces internal file sizes by reclaiming space of deleted documents.
   // new_last_added_document_id will be used to update the last added document
   // id in the lite index.
   //
   // Returns:
   //   OK on success
   //   INTERNAL_ERROR on IO error, this indicates that the index may be in an
   //                               invalid state and should be cleared.
   libtextclassifier3::Status Optimize(
       const std::vector<DocumentId>& document_id_old_to_new,
       DocumentId new_last_added_document_id);

  private:
   Index(const Options& options, std::unique_ptr<TermIdCodec> term_id_codec,
         std::unique_ptr<LiteIndex> lite_index,
         std::unique_ptr<MainIndex> main_index, const Filesystem* filesystem)
       : lite_index_(std::move(lite_index)),
         main_index_(std::move(main_index)),
         options_(options),
         term_id_codec_(std::move(term_id_codec)),
         filesystem_(filesystem) {}

   libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindLiteTermsByPrefix(
       const std::string& prefix,
       SuggestionScoringSpecProto::SuggestionRankingStrategy::Code rank_by,
       const SuggestionResultChecker* suggestion_result_checker);

   std::unique_ptr<LiteIndex> lite_index_;
   std::unique_ptr<MainIndex> main_index_;
   const Options options_;
   std::unique_ptr<TermIdCodec> term_id_codec_;
   const Filesystem* filesystem_;
 };

 }  // namespace lib
 }  // namespace icing

 #endif  // ICING_INDEX_INDEX_H_
	// Copyright (C) 2019 Google LLC
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#ifndef ICING_INDEX_INDEX_H_
	#define ICING_INDEX_INDEX_H_

	#include <cstdint>
	#include <memory>
	#include <string>
	#include <unordered_map>
	#include <utility>
	#include <vector>

	#include "icing/text_classifier/lib3/utils/base/status.h"
	#include "icing/text_classifier/lib3/utils/base/statusor.h"
	#include "icing/file/filesystem.h"
	#include "icing/index/hit/hit.h"
	#include "icing/index/iterator/doc-hit-info-iterator.h"
	#include "icing/index/lite/lite-index.h"
	#include "icing/index/lite/term-id-hit-pair.h"
	#include "icing/index/main/main-index-merger.h"
	#include "icing/index/main/main-index.h"
	#include "icing/index/term-id-codec.h"
	#include "icing/index/term-metadata.h"
	#include "icing/legacy/index/icing-filesystem.h"
	#include "icing/proto/debug.pb.h"
	#include "icing/proto/scoring.pb.h"
	#include "icing/proto/storage.pb.h"
	#include "icing/proto/term.pb.h"
	#include "icing/schema/section.h"
	#include "icing/store/document-id.h"
	#include "icing/store/namespace-id.h"
	#include "icing/store/suggestion-result-checker.h"
	#include "icing/util/status-macros.h"

	namespace icing {
	namespace lib {

	// The class representing the Icing search index. This index maps terms to hits
	// (document_ids, section_ids).
	// Content is added to the index through the Editor class - which also dedupes
	// hits (calling Editor::AddHit with the same arguments will only result in the
	// creation of a single hit).
	// Ex.
	// ICING_ASSIGN_OR_RETURN(std::unique_ptr<Index> index,
	// . Index::Create(MakeIndexOptions()));
	// Index::Editor editor = index->Edit(document_id, section_id,
	// TermMatchType::EXACT_ONLY); ICING_RETURN_IF_ERROR(editor.AddHit("foo"));
	// ICING_RETURN_IF_ERROR(editor.AddHit("baz"));
	//
	// Content is retrieved from the index through the Iterator class.
	// Ex.
	// ICING_ASSIGN_OR_RETURN(std::unique_ptr<Index> index,
	// . Index::Create(MakeIndexOptions()));
	// ICING_ASSIGN_OR_RETURN(Index::Iterator iterator =
	// index->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
	// while(iterator->Advance().ok())
	// ProcessResult(iterator->value());
	class Index {
	public:
	struct Options {
	explicit Options(const std::string& base_dir, uint32_t index_merge_size,
	bool lite_index_sort_at_indexing,
	uint32_t lite_index_sort_size)
	: base_dir(base_dir),
	index_merge_size(index_merge_size),
	lite_index_sort_at_indexing(lite_index_sort_at_indexing),
	lite_index_sort_size(lite_index_sort_size) {}

	std::string base_dir;
	int32_t index_merge_size;
	bool lite_index_sort_at_indexing;
	int32_t lite_index_sort_size;
	};

	// Creates an instance of Index in the directory pointed by file_dir.
	//
	// Returns:
	// Valid Index on success
	// DATA_LOSS if the index was corrupt and had to be cleared
	// INVALID_ARGUMENT if options have invalid values
	// INTERNAL on I/O error
	static libtextclassifier3::StatusOr<std::unique_ptr<Index>> Create(
	const Options& options, const Filesystem* filesystem,
	const IcingFilesystem* icing_filesystem);

	// Reads magic from existing flash (main) index file header. We need this
	// during Icing initialization phase to determine the version.
	//
	// Returns
	// Valid magic on success
	// NOT_FOUND if the lite index doesn't exist
	// INTERNAL on I/O error
	static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic(
	const Filesystem* filesystem, const std::string& base_dir);

	// Clears all files created by the index. Returns OK if all files were
	// cleared.
	libtextclassifier3::Status Reset() {
	ICING_RETURN_IF_ERROR(lite_index_->Reset());
	return main_index_->Reset();
	}

	// Brings components of the index into memory in anticipation of a query in
	// order to reduce latency.
	void Warm() {
	lite_index_->Warm();
	main_index_->Warm();
	}

	// Syncs all the data and metadata changes to disk.
	//
	// Returns:
	// OK on success
	// INTERNAL on I/O errors
	libtextclassifier3::Status PersistToDisk() {
	ICING_RETURN_IF_ERROR(lite_index_->PersistToDisk());
	return main_index_->PersistToDisk();
	}

	// Discard parts of the index if they contain data for document ids greater
	// than document_id.
	//
	// NOTE: This means that TruncateTo(kInvalidDocumentId) will have no effect.
	//
	// Returns:
	// OK on success
	// INTERNAL on I/O errors
	libtextclassifier3::Status TruncateTo(DocumentId document_id);

	// DocumentIds are always inserted in increasing order. Returns the largest
	// document_id added to the index.
	DocumentId last_added_document_id() const {
	DocumentId lite_document_id = lite_index_->last_added_document_id();
	if (lite_document_id != kInvalidDocumentId) {
	return lite_document_id;
	}
	return main_index_->last_added_document_id();
	}

	// Sets last_added_document_id to document_id so long as document_id >
	// last_added_document_id()
	void set_last_added_document_id(DocumentId document_id) {
	DocumentId lite_document_id = lite_index_->last_added_document_id();
	if (lite_document_id == kInvalidDocumentId \|\|
	document_id >= lite_document_id) {
	lite_index_->set_last_added_document_id(document_id);
	}
	}

	// Returns debug information for the index in out.
	// verbosity = BASIC, simplest debug information - just the lexicons and lite
	// index.
	// verbosity = DETAILED, more detailed debug information including raw
	// postings lists.
	IndexDebugInfoProto GetDebugInfo(DebugInfoVerbosity::Code verbosity) const {
	IndexDebugInfoProto debug_info;
	*debug_info.mutable_index_storage_info() = GetStorageInfo();
	*debug_info.mutable_lite_index_info() =
	lite_index_->GetDebugInfo(verbosity);
	*debug_info.mutable_main_index_info() =
	main_index_->GetDebugInfo(verbosity);
	return debug_info;
	}

	// Returns the byte size of the all the elements held in the index. This
	// excludes the size of any internal metadata of the index, e.g. the index's
	// header.
	//
	// Returns:
	// Byte size on success
	// INTERNAL_ERROR on IO error
	libtextclassifier3::StatusOr<int64_t> GetElementsSize() const {
	ICING_ASSIGN_OR_RETURN(int64_t lite_index_size,
	lite_index_->GetElementsSize());
	ICING_ASSIGN_OR_RETURN(int64_t main_index_size,
	main_index_->GetElementsSize());
	return lite_index_size + main_index_size;
	}

	// Calculates the StorageInfo for the Index.
	//
	// If an IO error occurs while trying to calculate the value for a field, then
	// that field will be set to -1.
	IndexStorageInfoProto GetStorageInfo() const;

	// Create an iterator to iterate through all doc hit infos in the index that
	// match the term. term_start_index is the start index of the given term in
	// the search query. unnormalized_term_length is the length of the given
	// unnormalized term in the search query not listed in the mask.
	// Eg. section_id_mask = 1U << 3; would only return hits that occur in
	// section 3.
	//
	// Returns:
	// unique ptr to a valid DocHitInfoIterator that matches the term
	// INVALID_ARGUMENT if given an invalid term_match_type
	libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
	const std::string& term, int term_start_index,
	int unnormalized_term_length, SectionIdMask section_id_mask,
	TermMatchType::Code term_match_type, bool need_hit_term_frequency = true);

	// Finds terms with the given prefix in the given namespaces. If
	// 'namespace_ids' is empty, returns results from all the namespaces. Results
	// are sorted in decreasing order of hit count. Number of results are no more
	// than 'num_to_return'.
	//
	// Returns:
	// A list of TermMetadata on success
	// INTERNAL_ERROR if failed to access term data.
	libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindTermsByPrefix(
	const std::string& prefix, int num_to_return,
	TermMatchType::Code scoring_match_type,
	SuggestionScoringSpecProto::SuggestionRankingStrategy::Code rank_by,
	const SuggestionResultChecker* suggestion_result_checker);

	// A class that can be used to add hits to the index.
	//
	// An editor groups hits from a particular section within a document together
	// and dedupes hits for the same term within a section. This removes the
	// burden of deduping from the caller and direct access to the index
	// implementation allows for more efficient deduping.
	class Editor {
	public:
	// Does not take any ownership, and all pointers must refer to valid objects
	// that outlive the one constructed.
	// TODO(b/141180665): Add nullptr checks for the raw pointers
	Editor(const TermIdCodec* term_id_codec, LiteIndex* lite_index,
	DocumentId document_id, SectionId section_id,
	TermMatchType::Code term_match_type, NamespaceId namespace_id)
	: term_id_codec_(term_id_codec),
	lite_index_(lite_index),
	document_id_(document_id),
	term_match_type_(term_match_type),
	namespace_id_(namespace_id),
	section_id_(section_id) {}

	// Buffer the term in seen_tokens_.
	libtextclassifier3::Status BufferTerm(const char* term);
	// Index all the terms stored in seen_tokens_.
	libtextclassifier3::Status IndexAllBufferedTerms();

	private:
	// The Editor is able to store previously seen terms as TermIds. This is
	// is more efficient than a client doing this externally because TermIds are
	// not exposed to clients.
	std::unordered_map<uint32_t, Hit::TermFrequency> seen_tokens_;
	const TermIdCodec* term_id_codec_;
	LiteIndex* lite_index_;
	DocumentId document_id_;
	TermMatchType::Code term_match_type_;
	NamespaceId namespace_id_;
	SectionId section_id_;
	};
	Editor Edit(DocumentId document_id, SectionId section_id,
	TermMatchType::Code term_match_type, NamespaceId namespace_id) {
	return Editor(term_id_codec_.get(), lite_index_.get(), document_id,
	section_id, term_match_type, namespace_id);
	}

	bool WantsMerge() const { return lite_index_->WantsMerge(); }

	// Merges newly-added hits in the LiteIndex into the MainIndex.
	//
	// RETURNS:
	// - INTERNAL on IO error while writing to the MainIndex.
	// - RESOURCE_EXHAUSTED error if unable to grow the index.
	libtextclassifier3::Status Merge() {
	ICING_ASSIGN_OR_RETURN(MainIndex::LexiconMergeOutputs outputs,
	main_index_->MergeLexicon(lite_index_->lexicon()));
	ICING_ASSIGN_OR_RETURN(std::vector<TermIdHitPair> term_id_hit_pairs,
	MainIndexMerger::TranslateAndExpandLiteHits(
	lite_index_, term_id_codec_, outputs));
	ICING_RETURN_IF_ERROR(main_index_->AddHits(
	*term_id_codec_, std::move(outputs.backfill_map),
	std::move(term_id_hit_pairs), lite_index_->last_added_document_id()));
	ICING_RETURN_IF_ERROR(main_index_->PersistToDisk());
	return lite_index_->Reset();
	}

	// Whether the LiteIndex HitBuffer requires sorting. This is only true if
	// Icing has enabled sorting during indexing time, and the HitBuffer's
	// unsorted tail has exceeded the lite_index_sort_size.
	bool LiteIndexNeedSort() const {
	return options_.lite_index_sort_at_indexing &&
	lite_index_->HasUnsortedHitsExceedingSortThreshold();
	}

	// Sorts the LiteIndex HitBuffer.
	void SortLiteIndex() {
	lite_index_->SortHits();
	}

	// Reduces internal file sizes by reclaiming space of deleted documents.
	// new_last_added_document_id will be used to update the last added document
	// id in the lite index.
	//
	// Returns:
	// OK on success
	// INTERNAL_ERROR on IO error, this indicates that the index may be in an
	// invalid state and should be cleared.
	libtextclassifier3::Status Optimize(
	const std::vector<DocumentId>& document_id_old_to_new,
	DocumentId new_last_added_document_id);

	private:
	Index(const Options& options, std::unique_ptr<TermIdCodec> term_id_codec,
	std::unique_ptr<LiteIndex> lite_index,
	std::unique_ptr<MainIndex> main_index, const Filesystem* filesystem)
	: lite_index_(std::move(lite_index)),
	main_index_(std::move(main_index)),
	options_(options),
	term_id_codec_(std::move(term_id_codec)),
	filesystem_(filesystem) {}

	libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindLiteTermsByPrefix(
	const std::string& prefix,
	SuggestionScoringSpecProto::SuggestionRankingStrategy::Code rank_by,
	const SuggestionResultChecker* suggestion_result_checker);

	std::unique_ptr<LiteIndex> lite_index_;
	std::unique_ptr<MainIndex> main_index_;
	const Options options_;
	std::unique_ptr<TermIdCodec> term_id_codec_;
	const Filesystem* filesystem_;
	};

	} // namespace lib
	} // namespace icing

	#endif // ICING_INDEX_INDEX_H_