blob: e7301cfacbe9b25403ac8ed322eee209ea0bb39d [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/autocomplete/bookmark_provider.h"
#include <algorithm>
#include <functional>
#include <vector>
#include "base/prefs/pref_service.h"
#include "base/strings/utf_string_conversions.h"
#include "chrome/browser/autocomplete/autocomplete_result.h"
#include "chrome/browser/autocomplete/history_provider.h"
#include "chrome/browser/bookmarks/bookmark_model_factory.h"
#include "chrome/browser/omnibox/omnibox_field_trial.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/common/pref_names.h"
#include "components/autocomplete/url_prefix.h"
#include "components/bookmarks/browser/bookmark_match.h"
#include "components/bookmarks/browser/bookmark_model.h"
#include "components/metrics/proto/omnibox_input_type.pb.h"
#include "net/base/net_util.h"
using bookmarks::BookmarkMatch;
typedef std::vector<BookmarkMatch> BookmarkMatches;
// BookmarkProvider ------------------------------------------------------------
BookmarkProvider::BookmarkProvider(
AutocompleteProviderListener* listener,
Profile* profile)
: AutocompleteProvider(listener, profile,
AutocompleteProvider::TYPE_BOOKMARK),
bookmark_model_(NULL),
score_using_url_matches_(OmniboxFieldTrial::BookmarksIndexURLsValue()) {
if (profile) {
bookmark_model_ = BookmarkModelFactory::GetForProfile(profile);
languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
}
}
void BookmarkProvider::Start(const AutocompleteInput& input,
bool minimal_changes) {
if (minimal_changes)
return;
matches_.clear();
if (input.text().empty() ||
(input.type() == metrics::OmniboxInputType::FORCED_QUERY))
return;
DoAutocomplete(input);
}
BookmarkProvider::~BookmarkProvider() {}
void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input) {
// We may not have a bookmark model for some unit tests.
if (!bookmark_model_)
return;
BookmarkMatches matches;
// Retrieve enough bookmarks so that we have a reasonable probability of
// suggesting the one that the user desires.
const size_t kMaxBookmarkMatches = 50;
// GetBookmarksMatching returns bookmarks matching the user's
// search terms using the following rules:
// - The search text is broken up into search terms. Each term is searched
// for separately.
// - Term matches are always performed against the start of a word. 'def'
// will match against 'define' but not against 'indefinite'.
// - Terms must be at least three characters in length in order to perform
// partial word matches. Any term of lesser length will only be used as an
// exact match. 'def' will match against 'define' but 'de' will not match.
// - A search containing multiple terms will return results with those words
// occuring in any order.
// - Terms enclosed in quotes comprises a phrase that must match exactly.
// - Multiple terms enclosed in quotes will require those exact words in that
// exact order to match.
//
// Please refer to the code for BookmarkIndex::GetBookmarksMatching for
// complete details of how searches are performed against the user's
// bookmarks.
bookmark_model_->GetBookmarksMatching(input.text(),
kMaxBookmarkMatches,
&matches);
if (matches.empty())
return; // There were no matches.
const base::string16 fixed_up_input(FixupUserInput(input).second);
for (BookmarkMatches::const_iterator i = matches.begin(); i != matches.end();
++i) {
// Create and score the AutocompleteMatch. If its score is 0 then the
// match is discarded.
AutocompleteMatch match(BookmarkMatchToACMatch(input, fixed_up_input, *i));
if (match.relevance > 0)
matches_.push_back(match);
}
// Sort and clip the resulting matches.
size_t num_matches =
std::min(matches_.size(), AutocompleteProvider::kMaxMatches);
std::partial_sort(matches_.begin(), matches_.begin() + num_matches,
matches_.end(), AutocompleteMatch::MoreRelevant);
matches_.resize(num_matches);
}
namespace {
// for_each helper functor that calculates a match factor for each query term
// when calculating the final score.
//
// Calculate a 'factor' from 0 to the bookmark's title length for a match
// based on 1) how many characters match and 2) where the match is positioned.
class ScoringFunctor {
public:
// |title_length| is the length of the bookmark title against which this
// match will be scored.
explicit ScoringFunctor(size_t title_length)
: title_length_(static_cast<double>(title_length)),
scoring_factor_(0.0) {
}
void operator()(const query_parser::Snippet::MatchPosition& match) {
double term_length = static_cast<double>(match.second - match.first);
scoring_factor_ += term_length *
(title_length_ - match.first) / title_length_;
}
double ScoringFactor() { return scoring_factor_; }
private:
double title_length_;
double scoring_factor_;
};
} // namespace
AutocompleteMatch BookmarkProvider::BookmarkMatchToACMatch(
const AutocompleteInput& input,
const base::string16& fixed_up_input_text,
const BookmarkMatch& bookmark_match) {
// The AutocompleteMatch we construct is non-deletable because the only
// way to support this would be to delete the underlying bookmark, which is
// unlikely to be what the user intends.
AutocompleteMatch match(this, 0, false,
AutocompleteMatchType::BOOKMARK_TITLE);
base::string16 title(bookmark_match.node->GetTitle());
const GURL& url(bookmark_match.node->url());
const base::string16& url_utf16 = base::UTF8ToUTF16(url.spec());
size_t inline_autocomplete_offset = URLPrefix::GetInlineAutocompleteOffset(
input.text(), fixed_up_input_text, false, url_utf16);
match.destination_url = url;
const size_t match_start = bookmark_match.url_match_positions.empty() ?
0 : bookmark_match.url_match_positions[0].first;
const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()) &&
((match_start == base::string16::npos) || (match_start != 0));
std::vector<size_t> offsets = BookmarkMatch::OffsetsFromMatchPositions(
bookmark_match.url_match_positions);
// In addition to knowing how |offsets| is transformed, we need to know how
// |inline_autocomplete_offset| is transformed. We add it to the end of
// |offsets|, compute how everything is transformed, then remove it from the
// end.
offsets.push_back(inline_autocomplete_offset);
match.contents = net::FormatUrlWithOffsets(url, languages_,
net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP),
net::UnescapeRule::SPACES, NULL, NULL, &offsets);
inline_autocomplete_offset = offsets.back();
offsets.pop_back();
BookmarkMatch::MatchPositions new_url_match_positions =
BookmarkMatch::ReplaceOffsetsInMatchPositions(
bookmark_match.url_match_positions, offsets);
match.contents_class =
ClassificationsFromMatch(new_url_match_positions,
match.contents.size(),
true);
match.fill_into_edit =
AutocompleteInput::FormattedStringWithEquivalentMeaning(url,
match.contents);
if (inline_autocomplete_offset != base::string16::npos) {
// |inline_autocomplete_offset| may be beyond the end of the
// |fill_into_edit| if the user has typed an URL with a scheme and the
// last character typed is a slash. That slash is removed by the
// FormatURLWithOffsets call above.
if (inline_autocomplete_offset < match.fill_into_edit.length()) {
match.inline_autocompletion =
match.fill_into_edit.substr(inline_autocomplete_offset);
}
match.allowed_to_be_default_match = match.inline_autocompletion.empty() ||
!HistoryProvider::PreventInlineAutocomplete(input);
}
match.description = title;
match.description_class =
ClassificationsFromMatch(bookmark_match.title_match_positions,
match.description.size(),
false);
match.starred = true;
// Summary on how a relevance score is determined for the match:
//
// For each match within the bookmark's title or URL (or both), calculate a
// 'factor', sum up those factors, then use the sum to figure out a value
// between the base score and the maximum score.
//
// The factor for each match is the product of:
//
// 1) how many characters in the bookmark's title/URL are part of this match.
// This is capped at the length of the bookmark's title
// to prevent terms that match in both the title and the URL from
// scoring too strongly.
//
// 2) where the match occurs within the bookmark's title or URL,
// giving more points for matches that appear earlier in the string:
// ((string_length - position of match start) / string_length).
//
// Example: Given a bookmark title of 'abcde fghijklm', with a title length
// of 14, and two different search terms, 'abcde' and 'fghij', with
// start positions of 0 and 6, respectively, 'abcde' will score higher
// (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with
// a partial factor of (14-6)/14 = 0.571 ). (In this example neither
// term matches in the URL.)
//
// Once all match factors have been calculated they are summed. If URL
// matches are not considered, the resulting sum will never be greater than
// the length of the bookmark title because of the way the bookmark model
// matches and removes overlaps. (In particular, the bookmark model only
// matches terms to the beginning of words and it removes all overlapping
// matches, keeping only the longest. Together these mean that each
// character is included in at most one match.) If URL matches are
// considered, the sum can be greater.
//
// This sum is then normalized by the length of the bookmark title (if URL
// matches are not considered) or by the length of the bookmark title + 10
// (if URL matches are considered) and capped at 1.0. (If URL matches
// are considered, we want to expand the scoring range so fewer bookmarks
// will hit the 1.0 cap and hence lose all ability to distinguish between
// these high-quality bookmarks.)
//
// The normalized value is multiplied against the scoring range available,
// which is 299. The 299 is calculated by subtracting the minimum possible
// score, 900, from the maximum possible score, 1199. This product, ranging
// from 0 to 299, is added to the minimum possible score, 900, giving the
// preliminary score.
//
// If the preliminary score is less than the maximum possible score, 1199,
// it can be boosted up to that maximum possible score if the URL referenced
// by the bookmark is also referenced by any of the user's other bookmarks.
// A count of how many times the bookmark's URL is referenced is determined
// and, for each additional reference beyond the one for the bookmark being
// scored up to a maximum of three, the score is boosted by a fixed amount
// given by |kURLCountBoost|, below.
//
if (score_using_url_matches_) {
// Pretend empty titles are identical to the URL.
if (title.empty())
title = base::ASCIIToUTF16(url.spec());
} else {
DCHECK(!title.empty());
}
ScoringFunctor title_position_functor =
for_each(bookmark_match.title_match_positions.begin(),
bookmark_match.title_match_positions.end(),
ScoringFunctor(title.size()));
ScoringFunctor url_position_functor =
for_each(bookmark_match.url_match_positions.begin(),
bookmark_match.url_match_positions.end(),
ScoringFunctor(bookmark_match.node->url().spec().length()));
const double summed_factors = title_position_functor.ScoringFactor() +
(score_using_url_matches_ ? url_position_functor.ScoringFactor() : 0);
const double normalized_sum = std::min(
summed_factors / (title.size() + (score_using_url_matches_ ? 10 : 0)),
1.0);
const int kBaseBookmarkScore = 900;
const int kMaxBookmarkScore = 1199;
const double kBookmarkScoreRange =
static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore);
match.relevance = static_cast<int>(normalized_sum * kBookmarkScoreRange) +
kBaseBookmarkScore;
// Don't waste any time searching for additional referenced URLs if we
// already have a perfect title match.
if (match.relevance >= kMaxBookmarkScore)
return match;
// Boost the score if the bookmark's URL is referenced by other bookmarks.
const int kURLCountBoost[4] = { 0, 75, 125, 150 };
std::vector<const BookmarkNode*> nodes;
bookmark_model_->GetNodesByURL(url, &nodes);
DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U);
match.relevance +=
kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1];
match.relevance = std::min(kMaxBookmarkScore, match.relevance);
return match;
}
// static
ACMatchClassifications BookmarkProvider::ClassificationsFromMatch(
const query_parser::Snippet::MatchPositions& positions,
size_t text_length,
bool is_url) {
ACMatchClassification::Style url_style =
is_url ? ACMatchClassification::URL : ACMatchClassification::NONE;
ACMatchClassifications classifications;
if (positions.empty()) {
classifications.push_back(
ACMatchClassification(0, url_style));
return classifications;
}
for (query_parser::Snippet::MatchPositions::const_iterator i =
positions.begin();
i != positions.end();
++i) {
AutocompleteMatch::ACMatchClassifications new_class;
AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first,
text_length, url_style, &new_class);
classifications = AutocompleteMatch::MergeClassifications(
classifications, new_class);
}
return classifications;
}