blob: c2cee47dfede7cf71e82dbf50592c4118cc59fff [file] [log] [blame]
// Copyright (C) 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "icing/query/advanced_query_parser/query-visitor.h"
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <iterator>
#include <limits>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
#include "icing/index/iterator/doc-hit-info-iterator-and.h"
#include "icing/index/iterator/doc-hit-info-iterator-none.h"
#include "icing/index/iterator/doc-hit-info-iterator-not.h"
#include "icing/index/iterator/doc-hit-info-iterator-or.h"
#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/query/advanced_query_parser/lexer.h"
#include "icing/query/advanced_query_parser/param.h"
#include "icing/query/advanced_query_parser/parser.h"
#include "icing/query/advanced_query_parser/pending-value.h"
#include "icing/query/advanced_query_parser/util/string-util.h"
#include "icing/query/query-features.h"
#include "icing/schema/property-util.h"
#include "icing/schema/section.h"
#include "icing/tokenization/token.h"
#include "icing/tokenization/tokenizer.h"
#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
namespace {
struct CreateList {
libtextclassifier3::StatusOr<PendingValue> operator()(
std::vector<PendingValue>&& args) const {
std::vector<std::string> values;
values.reserve(args.size());
for (PendingValue& arg : args) {
QueryTerm string_val = std::move(arg).string_val().ValueOrDie();
values.push_back(std::move(string_val.term));
}
return PendingValue(std::move(values));
}
};
bool IsNumericComparator(std::string_view operator_text) {
if (operator_text.length() < 1 || operator_text.length() > 2) {
return false;
}
// TODO(tjbarron) decide how/if to support !=
return operator_text == "<" || operator_text == ">" ||
operator_text == "==" || operator_text == "<=" ||
operator_text == ">=";
}
bool IsSupportedNaryOperator(std::string_view operator_text) {
return IsNumericComparator(operator_text) || operator_text == "AND" ||
operator_text == "OR" || operator_text == ":";
}
struct Int64Range {
int64_t low;
int64_t high;
};
libtextclassifier3::StatusOr<Int64Range> GetInt64Range(
std::string_view operator_text, int64_t int_value) {
Int64Range range = {std::numeric_limits<int64_t>::min(),
std::numeric_limits<int64_t>::max()};
if (operator_text == "<") {
if (int_value == std::numeric_limits<int64_t>::min()) {
return absl_ports::InvalidArgumentError(
"Cannot specify < INT64_MIN in query expression.");
}
range.high = int_value - 1;
} else if (operator_text == "<=") {
range.high = int_value;
} else if (operator_text == "==") {
range.high = int_value;
range.low = int_value;
} else if (operator_text == ">=") {
range.low = int_value;
} else if (operator_text == ">") {
if (int_value == std::numeric_limits<int64_t>::max()) {
return absl_ports::InvalidArgumentError(
"Cannot specify > INT64_MAX in query expression.");
}
range.low = int_value + 1;
}
return range;
}
} // namespace
void QueryVisitor::PendingPropertyRestricts::AddValidRestricts(
std::set<std::string> new_restricts) {
if (!has_active_property_restricts()) {
pending_property_restricts_.push_back(std::move(new_restricts));
return;
}
// There is an active property restrict already in effect. To determine the
// updated active property restrict being applied at this level, we need to
// calculate the intersection of new_restricts and
// active_property_restricts.
const std::set<std::string>& active_restricts = active_property_restricts();
auto active_restricts_itr = active_restricts.begin();
for (auto new_restricts_itr = new_restricts.begin();
new_restricts_itr != new_restricts.end();) {
while (active_restricts_itr != active_restricts.end() &&
*active_restricts_itr < *new_restricts_itr) {
// new_restricts_itr is behind active_restricts_itr.
++active_restricts_itr;
}
if (active_restricts_itr == active_restricts.end()) {
// There's nothing left in active restricts. Everything at
// new_restricts_itr and beyond should be removed
new_restricts_itr =
new_restricts.erase(new_restricts_itr, new_restricts.end());
} else if (*active_restricts_itr > *new_restricts_itr) {
// new_restricts_itr points to elements not present in
// active_restricts_itr
new_restricts_itr = new_restricts.erase(new_restricts_itr);
} else {
// the element that new_restricts_itr points to is present in
// active_restricts_itr.
++new_restricts_itr;
}
}
pending_property_restricts_.push_back(std::move(new_restricts));
}
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
QueryVisitor::CreateTermIterator(const QueryTerm& query_term) {
if (query_term.is_prefix_val) {
// '*' prefix operator was added in list filters
features_.insert(kListFilterQueryLanguageFeature);
}
TermMatchType::Code match_type = GetTermMatchType(query_term.is_prefix_val);
int unnormalized_term_start =
query_term.raw_term.data() - raw_query_text_.data();
if (!processing_not_) {
// 1. Add term to property_query_terms_map
if (pending_property_restricts_.has_active_property_restricts()) {
for (const std::string& property_restrict :
pending_property_restricts_.active_property_restricts()) {
property_query_terms_map_[property_restrict].insert(query_term.term);
}
} else {
property_query_terms_map_[""].insert(query_term.term);
}
// 2. If needed add term iterator to query_term_iterators_ map.
if (needs_term_frequency_info_) {
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<DocHitInfoIterator> term_iterator,
index_.GetIterator(query_term.term, unnormalized_term_start,
query_term.raw_term.length(), kSectionIdMaskAll,
match_type_, needs_term_frequency_info_));
query_term_iterators_[query_term.term] =
std::make_unique<DocHitInfoIteratorFilter>(
std::move(term_iterator), &document_store_, &schema_store_,
filter_options_);
}
}
// 3. Add the term iterator.
return index_.GetIterator(query_term.term, unnormalized_term_start,
query_term.raw_term.length(), kSectionIdMaskAll,
match_type, needs_term_frequency_info_);
}
void QueryVisitor::RegisterFunctions() {
// std::vector<std::string> createList(std::string...);
Function create_list_function_ =
Function::Create(DataType::kStringList, "createList",
{Param(DataType::kString, Cardinality::kRequired),
Param(DataType::kString, Cardinality::kVariable)},
CreateList())
.ValueOrDie();
registered_functions_.insert(
{create_list_function_.name(), std::move(create_list_function_)});
// DocHitInfoIterator search(std::string);
// DocHitInfoIterator search(std::string, std::vector<std::string>);
auto search_eval = [this](std::vector<PendingValue>&& args) {
return this->SearchFunction(std::move(args));
};
Function search_function =
Function::Create(DataType::kDocumentIterator, "search",
{Param(DataType::kString),
Param(DataType::kStringList, Cardinality::kOptional)},
std::move(search_eval))
.ValueOrDie();
registered_functions_.insert(
{search_function.name(), std::move(search_function)});
// DocHitInfoIterator propertyDefined(std::string);
auto property_defined = [this](std::vector<PendingValue>&& args) {
return this->PropertyDefinedFunction(std::move(args));
};
Function property_defined_function =
Function::Create(DataType::kDocumentIterator, "propertyDefined",
{Param(DataType::kText)}, std::move(property_defined))
.ValueOrDie();
registered_functions_.insert(
{property_defined_function.name(), std::move(property_defined_function)});
}
libtextclassifier3::StatusOr<PendingValue> QueryVisitor::SearchFunction(
std::vector<PendingValue>&& args) {
// The second arg (if present) is a list of sections to restrict to.
if (args.size() == 2) {
std::set<std::string> new_restricts;
std::vector<std::string> property_restricts =
std::move(args.at(1)).string_vals().ValueOrDie();
for (std::string& property_restrict : property_restricts) {
new_restricts.insert(std::move(property_restrict));
}
pending_property_restricts_.AddValidRestricts(std::move(new_restricts));
if (pending_property_restricts_.active_property_restricts().empty()) {
pending_property_restricts_.PopRestricts();
return PendingValue(std::make_unique<DocHitInfoIteratorNone>());
}
}
// The first arg is guaranteed to be a STRING at this point. It should be safe
// to call ValueOrDie.
const QueryTerm* query = args.at(0).string_val().ValueOrDie();
Lexer lexer(query->term, Lexer::Language::QUERY);
ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
lexer.ExtractTokens());
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
std::unique_ptr<DocHitInfoIterator> iterator;
QueryResults query_result;
if (tree_root == nullptr) {
iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
document_store_.last_added_document_id());
} else {
QueryVisitor query_visitor(&index_, &numeric_index_, &document_store_,
&schema_store_, &normalizer_, &tokenizer_,
query->raw_term, filter_options_, match_type_,
needs_term_frequency_info_,
pending_property_restricts_, processing_not_);
tree_root->Accept(&query_visitor);
ICING_ASSIGN_OR_RETURN(query_result,
std::move(query_visitor).ConsumeResults());
iterator = std::move(query_result.root_iterator);
}
// Update members based on results of processing the query.
if (args.size() == 2 &&
pending_property_restricts_.has_active_property_restricts()) {
iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
std::move(iterator), &document_store_, &schema_store_,
pending_property_restricts_.active_property_restricts());
pending_property_restricts_.PopRestricts();
}
if (!processing_not_) {
std::move(
query_result.query_term_iterators.begin(),
query_result.query_term_iterators.end(),
std::inserter(query_term_iterators_, query_term_iterators_.end()));
std::move(query_result.query_terms.begin(), query_result.query_terms.end(),
std::inserter(property_query_terms_map_,
property_query_terms_map_.end()));
}
std::move(query_result.features_in_use.begin(),
query_result.features_in_use.end(),
std::inserter(features_, features_.end()));
return PendingValue(std::move(iterator));
}
libtextclassifier3::StatusOr<PendingValue>
QueryVisitor::PropertyDefinedFunction(std::vector<PendingValue>&& args) {
// The first arg is guaranteed to be a TEXT at this point. It should be safe
// to call ValueOrDie.
// TODO(b/268680462): Consume this and implement the actual iterator.
// const QueryTerm* member =
args.at(0).text_val().ValueOrDie();
std::unique_ptr<DocHitInfoIterator> iterator =
std::make_unique<DocHitInfoIteratorAllDocumentId>(
document_store_.last_added_document_id());
features_.insert(kPropertyDefinedInSchemaCustomFunctionFeature);
return PendingValue(std::move(iterator));
}
libtextclassifier3::StatusOr<int64_t> QueryVisitor::PopPendingIntValue() {
if (pending_values_.empty()) {
return absl_ports::InvalidArgumentError("Unable to retrieve int value.");
}
ICING_ASSIGN_OR_RETURN(int64_t int_value, pending_values_.top().long_val());
pending_values_.pop();
return int_value;
}
libtextclassifier3::StatusOr<QueryTerm> QueryVisitor::PopPendingStringValue() {
if (pending_values_.empty()) {
return absl_ports::InvalidArgumentError("Unable to retrieve string value.");
}
ICING_ASSIGN_OR_RETURN(QueryTerm string_value,
std::move(pending_values_.top()).string_val());
pending_values_.pop();
return string_value;
}
libtextclassifier3::StatusOr<QueryTerm> QueryVisitor::PopPendingTextValue() {
if (pending_values_.empty()) {
return absl_ports::InvalidArgumentError("Unable to retrieve text value.");
}
ICING_ASSIGN_OR_RETURN(QueryTerm text_value,
std::move(pending_values_.top()).text_val());
pending_values_.pop();
return text_value;
}
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
QueryVisitor::PopPendingIterator() {
if (pending_values_.empty() || pending_values_.top().is_placeholder()) {
return absl_ports::InvalidArgumentError("Unable to retrieve iterator.");
}
if (pending_values_.top().data_type() == DataType::kDocumentIterator) {
std::unique_ptr<DocHitInfoIterator> iterator =
std::move(pending_values_.top()).iterator().ValueOrDie();
pending_values_.pop();
return iterator;
} else if (pending_values_.top().data_type() == DataType::kString) {
features_.insert(kVerbatimSearchFeature);
ICING_ASSIGN_OR_RETURN(QueryTerm string_value, PopPendingStringValue());
return CreateTermIterator(std::move(string_value));
} else {
ICING_ASSIGN_OR_RETURN(QueryTerm text_value, PopPendingTextValue());
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<Tokenizer::Iterator> token_itr,
tokenizer_.Tokenize(text_value.term,
LanguageSegmenter::AccessType::kForwardIterator));
std::string normalized_term;
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
// The tokenizer will produce 1+ tokens out of the text. The prefix operator
// only applies to the final token.
bool reached_final_token = !token_itr->Advance();
// raw_text is the portion of text_value.raw_term that hasn't yet been
// matched to any of the tokens that we've processed. escaped_token will
// hold the portion of raw_text that corresponds to the current token that
// is being processed.
std::string_view raw_text = text_value.raw_term;
std::string_view raw_token;
while (!reached_final_token) {
std::vector<Token> tokens = token_itr->GetTokens();
if (tokens.size() > 1) {
// The tokenizer iterator iterates between token groups. In practice,
// the tokenizer used with QueryVisitor (PlainTokenizer) will always
// only produce a single token per token group.
return absl_ports::InvalidArgumentError(
"Encountered unexpected token group with >1 tokens.");
}
reached_final_token = !token_itr->Advance();
const Token& token = tokens.at(0);
if (reached_final_token && token.text.length() == raw_text.length()) {
// Unescaped tokens are strictly smaller than their escaped counterparts
// This means that if we're at the final token and token.length equals
// raw_text, then all of raw_text must correspond to this token.
raw_token = raw_text;
} else {
ICING_ASSIGN_OR_RETURN(raw_token, string_util::FindEscapedToken(
raw_text, token.text));
}
normalized_term = normalizer_.NormalizeTerm(token.text);
QueryTerm term_value{std::move(normalized_term), raw_token,
reached_final_token && text_value.is_prefix_val};
ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> iterator,
CreateTermIterator(std::move(term_value)));
iterators.push_back(std::move(iterator));
// Remove escaped_token from raw_text now that we've processed
// raw_text.
const char* escaped_token_end = raw_token.data() + raw_token.length();
raw_text = raw_text.substr(escaped_token_end - raw_text.data());
}
// Finally, create an And Iterator. If there's only a single term here, then
// it will just return that term iterator. Otherwise, segmented text is
// treated as a group of terms AND'd together.
return CreateAndIterator(std::move(iterators));
}
}
libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DocHitInfoIterator>>>
QueryVisitor::PopAllPendingIterators() {
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) {
ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> itr,
PopPendingIterator());
iterators.push_back(std::move(itr));
}
if (pending_values_.empty()) {
return absl_ports::InvalidArgumentError(
"Unable to retrieve expected iterators.");
}
// Iterators will be in reverse order because we retrieved them from the
// stack. Reverse them to get back to the original ordering.
std::reverse(iterators.begin(), iterators.end());
return iterators;
}
libtextclassifier3::Status QueryVisitor::ProcessNumericComparator(
const NaryOperatorNode* node) {
if (node->children().size() != 2) {
return absl_ports::InvalidArgumentError("Expected 2 children.");
}
// 1. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
// 2. The first child is the property to restrict by.
node->children().at(0)->Accept(this);
if (has_pending_error()) {
return std::move(pending_error_);
}
ICING_ASSIGN_OR_RETURN(QueryTerm text_value, PopPendingTextValue());
if (text_value.is_prefix_val) {
return absl_ports::InvalidArgumentError(
"Cannot use prefix operator '*' with a property name!");
}
// If there is an active property restrict and this property is not present in
// in the active restrict set, then it's not satisfiable.
if (pending_property_restricts_.has_active_property_restricts() &&
pending_property_restricts_.active_property_restricts().find(
text_value.term) ==
pending_property_restricts_.active_property_restricts().end()) {
// The property restrict can't be satisfiable. Pop the placeholder that was
// just added and push a FALSE iterator.
pending_property_restricts_.PopRestricts();
pending_values_.pop();
pending_values_.push(
PendingValue(std::make_unique<DocHitInfoIteratorNone>()));
return libtextclassifier3::Status::OK;
}
// 3. The second child should be parseable as an integer value.
expecting_numeric_arg_ = true;
node->children().at(1)->Accept(this);
expecting_numeric_arg_ = false;
ICING_ASSIGN_OR_RETURN(int64_t int_value, PopPendingIntValue());
// 4. Check for the placeholder.
if (!pending_values_.top().is_placeholder()) {
return absl_ports::InvalidArgumentError(
"Error processing arguments for node.");
}
pending_values_.pop();
// 5. Create the iterator and push it onto pending_values_.
ICING_ASSIGN_OR_RETURN(Int64Range range,
GetInt64Range(node->operator_text(), int_value));
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<DocHitInfoIterator> iterator,
numeric_index_.GetIterator(text_value.term, range.low, range.high,
document_store_, schema_store_));
features_.insert(kNumericSearchFeature);
pending_values_.push(PendingValue(std::move(iterator)));
return libtextclassifier3::Status::OK;
}
libtextclassifier3::StatusOr<PendingValue> QueryVisitor::ProcessAndOperator(
const NaryOperatorNode* node) {
ICING_ASSIGN_OR_RETURN(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators,
PopAllPendingIterators());
return PendingValue(CreateAndIterator(std::move(iterators)));
}
libtextclassifier3::StatusOr<PendingValue> QueryVisitor::ProcessOrOperator(
const NaryOperatorNode* node) {
ICING_ASSIGN_OR_RETURN(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators,
PopAllPendingIterators());
return PendingValue(CreateOrIterator(std::move(iterators)));
}
libtextclassifier3::Status QueryVisitor::ProcessNegationOperator(
const UnaryOperatorNode* node) {
// 1. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
// 2. Visit child
node->child()->Accept(this);
if (has_pending_error()) {
return std::move(pending_error_);
}
if (pending_values_.size() < 2) {
return absl_ports::InvalidArgumentError(
"Visit unary operator child didn't correctly add pending values.");
}
// 3. We want to preserve the original text of the integer value, append our
// minus and *then* parse as an int.
ICING_ASSIGN_OR_RETURN(QueryTerm int_text_val, PopPendingTextValue());
int_text_val.term = absl_ports::StrCat("-", int_text_val.term);
PendingValue pending_value =
PendingValue::CreateTextPendingValue(std::move(int_text_val));
ICING_RETURN_IF_ERROR(pending_value.long_val());
// We've parsed our integer value successfully. Pop our placeholder, push it
// on to the stack and return successfully.
if (!pending_values_.top().is_placeholder()) {
return absl_ports::InvalidArgumentError(
"Error processing arguments for node.");
}
pending_values_.pop();
pending_values_.push(std::move(pending_value));
return libtextclassifier3::Status::OK;
}
libtextclassifier3::Status QueryVisitor::ProcessNotOperator(
const UnaryOperatorNode* node) {
// TODO(b/265312785) Consider implementing query optimization when we run into
// nested NOTs. This would allow us to simplify a query like "NOT (-foo)" to
// just "foo". This would also require more complicate rewrites as we would
// need to do things like rewrite "NOT (-a OR b)" as "a AND -b" and
// "NOT (price < 5)" as "price >= 5".
// 1. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
// Toggle whatever the current value of 'processing_not_' is before visiting
// the children.
processing_not_ = !processing_not_;
// 2. Visit child
node->child()->Accept(this);
if (has_pending_error()) {
return std::move(pending_error_);
}
if (pending_values_.size() < 2) {
return absl_ports::InvalidArgumentError(
"Visit unary operator child didn't correctly add pending values.");
}
// 3. Retrieve the delegate iterator
ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> delegate,
PopPendingIterator());
// 4. Check for the placeholder.
if (!pending_values_.top().is_placeholder()) {
return absl_ports::InvalidArgumentError(
"Error processing arguments for node.");
}
pending_values_.pop();
pending_values_.push(PendingValue(std::make_unique<DocHitInfoIteratorNot>(
std::move(delegate), document_store_.last_added_document_id())));
// Untoggle whatever the current value of 'processing_not_' is now that we've
// finished processing this NOT.
processing_not_ = !processing_not_;
return libtextclassifier3::Status::OK;
}
libtextclassifier3::Status QueryVisitor::ProcessHasOperator(
const NaryOperatorNode* node) {
if (node->children().size() != 2) {
return absl_ports::InvalidArgumentError("Expected 2 children.");
}
// 1. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
// 2. Visit the first child - the property.
node->children().at(0)->Accept(this);
if (has_pending_error()) {
return pending_error_;
}
ICING_ASSIGN_OR_RETURN(QueryTerm text_value, PopPendingTextValue());
if (text_value.is_prefix_val) {
return absl_ports::InvalidArgumentError(
"Cannot use prefix operator '*' with a property name!");
}
pending_property_restricts_.AddValidRestricts({text_value.term});
// Just added a restrict - if there are no active property restricts then that
// be because this restrict is unsatisfiable.
if (pending_property_restricts_.active_property_restricts().empty()) {
// The property restrict can't be satisfiable. Pop the placeholder that was
// just added and push a FALSE iterator.
pending_property_restricts_.PopRestricts();
pending_values_.pop();
pending_values_.push(
PendingValue(std::make_unique<DocHitInfoIteratorNone>()));
return libtextclassifier3::Status::OK;
}
// 3. Visit the second child - the argument.
node->children().at(1)->Accept(this);
if (has_pending_error()) {
return pending_error_;
}
ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> delegate,
PopPendingIterator());
// 4. Check for the placeholder.
if (!pending_values_.top().is_placeholder()) {
return absl_ports::InvalidArgumentError(
"Error processing arguments for node.");
}
pending_values_.pop();
pending_property_restricts_.PopRestricts();
std::set<std::string> property_restricts = {std::move(text_value.term)};
pending_values_.push(
PendingValue(std::make_unique<DocHitInfoIteratorSectionRestrict>(
std::move(delegate), &document_store_, &schema_store_,
std::move(property_restricts))));
return libtextclassifier3::Status::OK;
}
void QueryVisitor::VisitFunctionName(const FunctionNameNode* node) {
pending_error_ = absl_ports::UnimplementedError(
"Function Name node visiting not implemented yet.");
}
void QueryVisitor::VisitString(const StringNode* node) {
// A STRING node can only be a term. Create the iterator now.
auto unescaped_string_or = string_util::UnescapeStringValue(node->value());
if (!unescaped_string_or.ok()) {
pending_error_ = std::move(unescaped_string_or).status();
return;
}
std::string unescaped_string = std::move(unescaped_string_or).ValueOrDie();
QueryTerm val{std::move(unescaped_string), node->raw_value(),
node->is_prefix()};
pending_values_.push(PendingValue::CreateStringPendingValue(std::move(val)));
}
void QueryVisitor::VisitText(const TextNode* node) {
// TEXT nodes could either be a term (and will become DocHitInfoIteratorTerm)
// or a property name. As such, we just push the TEXT value into pending
// values and determine which it is at a later point.
QueryTerm val{std::move(node->value()), node->raw_value(), node->is_prefix()};
pending_values_.push(PendingValue::CreateTextPendingValue(std::move(val)));
}
void QueryVisitor::VisitMember(const MemberNode* node) {
if (node->children().empty()) {
pending_error_ =
absl_ports::InvalidArgumentError("Encountered malformed member node.");
return;
}
// 1. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
// 2. Visit the children.
for (const std::unique_ptr<TextNode>& child : node->children()) {
child->Accept(this);
if (has_pending_error()) {
return;
}
}
// 3. Now process the results of the children and produce a single pending
// value representing this member.
PendingValue pending_value;
if (node->children().size() == 1) {
// 3a. This member only has a single child, then the pending value produced
// by that child is the final value produced by this member.
pending_value = std::move(pending_values_.top());
pending_values_.pop();
} else {
// 3b. Retrieve the values of all children and concatenate them into a
// single value.
libtextclassifier3::StatusOr<QueryTerm> member_or;
std::vector<std::string> members;
QueryTerm text_val;
const char* start = nullptr;
const char* end = nullptr;
while (!pending_values_.empty() &&
!pending_values_.top().is_placeholder()) {
member_or = PopPendingTextValue();
if (!member_or.ok()) {
pending_error_ = std::move(member_or).status();
return;
}
text_val = std::move(member_or).ValueOrDie();
if (text_val.is_prefix_val) {
pending_error_ = absl_ports::InvalidArgumentError(
"Cannot use prefix operator '*' within a property name!");
return;
}
if (start == nullptr) {
start = text_val.raw_term.data();
end = text_val.raw_term.data() + text_val.raw_term.length();
} else {
start = std::min(start, text_val.raw_term.data());
end = std::max(end, text_val.raw_term.data() + text_val.raw_term.length());
}
members.push_back(std::move(text_val.term));
}
QueryTerm member;
member.term = absl_ports::StrJoin(members.rbegin(), members.rend(),
property_util::kPropertyPathSeparator);
member.raw_term = std::string_view(start, end - start);
member.is_prefix_val = false;
pending_value = PendingValue::CreateTextPendingValue(std::move(member));
}
// 4. If pending_values_ is empty somehow, then our placeholder disappeared
// somehow.
if (pending_values_.empty()) {
pending_error_ = absl_ports::InvalidArgumentError(
"Error processing arguments for member node.");
return;
}
pending_values_.pop();
pending_values_.push(std::move(pending_value));
}
void QueryVisitor::VisitFunction(const FunctionNode* node) {
// 1. Get the associated function.
auto itr = registered_functions_.find(node->function_name()->value());
if (itr == registered_functions_.end()) {
pending_error_ = absl_ports::InvalidArgumentError(absl_ports::StrCat(
"Function ", node->function_name()->value(), " is not supported."));
return;
}
// 2. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
// 3. Visit the children.
for (const std::unique_ptr<Node>& arg : node->args()) {
arg->Accept(this);
if (has_pending_error()) {
return;
}
}
// 4. Collect the arguments and evaluate the function.
std::vector<PendingValue> args;
while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) {
args.push_back(std::move(pending_values_.top()));
pending_values_.pop();
}
std::reverse(args.begin(), args.end());
const Function& function = itr->second;
auto eval_result = function.Eval(std::move(args));
if (!eval_result.ok()) {
pending_error_ = std::move(eval_result).status();
return;
}
// 5. Pop placeholder in pending_values and add the result of our function.
pending_values_.pop();
pending_values_.push(std::move(eval_result).ValueOrDie());
// Support for custom functions was added in list filters.
features_.insert(kListFilterQueryLanguageFeature);
}
// TODO(b/265312785) Clarify handling of the interaction between HAS and NOT.
// Currently, `prop1:(NOT foo bar)` will not match any documents. Likewise,
// `search("NOT foo bar", createList("prop1"))` will not match any documents.
//
// We should either confirm that this is the desired behavior or consider
// rewriting these queries so that they're interpreted as
// `NOT prop1:foo AND prop1:bar` and
// `NOT search("foo", createList("prop1"))
// AND search("bar", createList("prop1"))`
void QueryVisitor::VisitUnaryOperator(const UnaryOperatorNode* node) {
bool is_minus = node->operator_text() == "MINUS";
if (node->operator_text() != "NOT" && !is_minus) {
pending_error_ = absl_ports::UnimplementedError(
absl_ports::StrCat("Visiting for unary operator ",
node->operator_text(), " not implemented yet."));
return;
}
libtextclassifier3::Status status;
if (expecting_numeric_arg_ && is_minus) {
// If the operator is a MINUS ('-') and we're at the child of a numeric
// comparator, then this must be a negation ('-3')
status = ProcessNegationOperator(node);
} else {
status = ProcessNotOperator(node);
}
if (!status.ok()) {
pending_error_ = std::move(status);
}
if (!is_minus ||
pending_property_restricts_.has_active_property_restricts() ||
processing_not_) {
// 'NOT' operator was added in list filters.
// Likewise, mixing property restricts and NOTs were made valid in list
// filters.
features_.insert(kListFilterQueryLanguageFeature);
}
}
void QueryVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
if (!IsSupportedNaryOperator(node->operator_text())) {
pending_error_ = absl_ports::UnimplementedError(
"No support for any non-numeric operators.");
return;
}
if (pending_property_restricts_.has_active_property_restricts() ||
processing_not_) {
// Likewise, mixing property restricts and NOT with compound statements was
// added in list filters.
features_.insert(kListFilterQueryLanguageFeature);
}
if (node->operator_text() == ":") {
libtextclassifier3::Status status = ProcessHasOperator(node);
if (!status.ok()) {
pending_error_ = std::move(status);
}
return;
} else if (IsNumericComparator(node->operator_text())) {
libtextclassifier3::Status status = ProcessNumericComparator(node);
if (!status.ok()) {
pending_error_ = std::move(status);
}
return;
}
// 1. Put in a placeholder PendingValue
pending_values_.push(PendingValue());
// 2. Visit the children.
for (int i = 0; i < node->children().size(); ++i) {
node->children().at(i)->Accept(this);
if (has_pending_error()) {
return;
}
}
// 3. Retrieve the pending value for this node.
libtextclassifier3::StatusOr<PendingValue> pending_value_or;
if (node->operator_text() == "AND") {
pending_value_or = ProcessAndOperator(node);
} else if (node->operator_text() == "OR") {
pending_value_or = ProcessOrOperator(node);
}
if (!pending_value_or.ok()) {
pending_error_ = std::move(pending_value_or).status();
return;
}
PendingValue pending_value = std::move(pending_value_or).ValueOrDie();
// 4. Check for the placeholder.
if (!pending_values_.top().is_placeholder()) {
pending_error_ = absl_ports::InvalidArgumentError(
"Error processing arguments for node.");
return;
}
pending_values_.pop();
pending_values_.push(std::move(pending_value));
}
libtextclassifier3::StatusOr<QueryResults> QueryVisitor::ConsumeResults() && {
if (has_pending_error()) {
return std::move(pending_error_);
}
if (pending_values_.size() != 1) {
return absl_ports::InvalidArgumentError(
"Visitor does not contain a single root iterator.");
}
auto iterator_or = PopPendingIterator();
if (!iterator_or.ok()) {
return std::move(iterator_or).status();
}
QueryResults results;
results.root_iterator = std::move(iterator_or).ValueOrDie();
results.query_term_iterators = std::move(query_term_iterators_);
results.query_terms = std::move(property_query_terms_map_);
results.features_in_use = std::move(features_);
return results;
}
} // namespace lib
} // namespace icing