| // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/browser/history/typed_url_syncable_service.h" |
| |
| #include "base/auto_reset.h" |
| #include "base/logging.h" |
| #include "base/metrics/histogram.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "chrome/browser/history/history_backend.h" |
| #include "net/base/net_util.h" |
| #include "sync/protocol/sync.pb.h" |
| #include "sync/protocol/typed_url_specifics.pb.h" |
| |
| namespace { |
| |
| // The server backend can't handle arbitrarily large node sizes, so to keep |
| // the size under control we limit the visit array. |
| static const int kMaxTypedUrlVisits = 100; |
| |
| // There's no limit on how many visits the history DB could have for a given |
| // typed URL, so we limit how many we fetch from the DB to avoid crashes due to |
| // running out of memory (http://crbug.com/89793). This value is different |
| // from kMaxTypedUrlVisits, as some of the visits fetched from the DB may be |
| // RELOAD visits, which will be stripped. |
| static const int kMaxVisitsToFetch = 1000; |
| |
| // This is the threshold at which we start throttling sync updates for typed |
| // URLs - any URLs with a typed_count >= this threshold will be throttled. |
| static const int kTypedUrlVisitThrottleThreshold = 10; |
| |
| // This is the multiple we use when throttling sync updates. If the multiple is |
| // N, we sync up every Nth update (i.e. when typed_count % N == 0). |
| static const int kTypedUrlVisitThrottleMultiple = 10; |
| |
| } // namespace |
| |
| namespace history { |
| |
| const char kTypedUrlTag[] = "google_chrome_typed_urls"; |
| |
| static bool CheckVisitOrdering(const VisitVector& visits) { |
| int64 previous_visit_time = 0; |
| for (VisitVector::const_iterator visit = visits.begin(); |
| visit != visits.end(); ++visit) { |
| if (visit != visits.begin()) { |
| // We allow duplicate visits here - they shouldn't really be allowed, but |
| // they still seem to show up sometimes and we haven't figured out the |
| // source, so we just log an error instead of failing an assertion. |
| // (http://crbug.com/91473). |
| if (previous_visit_time == visit->visit_time.ToInternalValue()) |
| DVLOG(1) << "Duplicate visit time encountered"; |
| else if (previous_visit_time > visit->visit_time.ToInternalValue()) |
| return false; |
| } |
| |
| previous_visit_time = visit->visit_time.ToInternalValue(); |
| } |
| return true; |
| } |
| |
| TypedUrlSyncableService::TypedUrlSyncableService( |
| HistoryBackend* history_backend) |
| : history_backend_(history_backend), |
| processing_syncer_changes_(false), |
| expected_loop_(base::MessageLoop::current()) { |
| DCHECK(history_backend_); |
| DCHECK(expected_loop_ == base::MessageLoop::current()); |
| } |
| |
| TypedUrlSyncableService::~TypedUrlSyncableService() { |
| DCHECK(expected_loop_ == base::MessageLoop::current()); |
| } |
| |
| syncer::SyncMergeResult TypedUrlSyncableService::MergeDataAndStartSyncing( |
| syncer::ModelType type, |
| const syncer::SyncDataList& initial_sync_data, |
| scoped_ptr<syncer::SyncChangeProcessor> sync_processor, |
| scoped_ptr<syncer::SyncErrorFactory> error_handler) { |
| DCHECK(expected_loop_ == base::MessageLoop::current()); |
| DCHECK(!sync_processor_.get()); |
| DCHECK(sync_processor.get()); |
| DCHECK(error_handler.get()); |
| DCHECK_EQ(type, syncer::TYPED_URLS); |
| |
| syncer::SyncMergeResult merge_result(type); |
| sync_processor_ = sync_processor.Pass(); |
| sync_error_handler_ = error_handler.Pass(); |
| |
| // TODO(mgist): Add implementation |
| |
| return merge_result; |
| } |
| |
| void TypedUrlSyncableService::StopSyncing(syncer::ModelType type) { |
| DCHECK(expected_loop_ == base::MessageLoop::current()); |
| DCHECK_EQ(type, syncer::TYPED_URLS); |
| |
| sync_processor_.reset(); |
| sync_error_handler_.reset(); |
| } |
| |
| syncer::SyncDataList TypedUrlSyncableService::GetAllSyncData( |
| syncer::ModelType type) const { |
| DCHECK(expected_loop_ == base::MessageLoop::current()); |
| syncer::SyncDataList list; |
| |
| // TODO(mgist): Add implementation |
| |
| return list; |
| } |
| |
| syncer::SyncError TypedUrlSyncableService::ProcessSyncChanges( |
| const tracked_objects::Location& from_here, |
| const syncer::SyncChangeList& change_list) { |
| DCHECK(expected_loop_ == base::MessageLoop::current()); |
| |
| // TODO(mgist): Add implementation |
| |
| return syncer::SyncError(FROM_HERE, |
| syncer::SyncError::DATATYPE_ERROR, |
| "Typed url syncable service is not implemented.", |
| syncer::TYPED_URLS); |
| } |
| |
| void TypedUrlSyncableService::OnUrlsModified(URLRows* changed_urls) { |
| DCHECK(expected_loop_ == base::MessageLoop::current()); |
| DCHECK(changed_urls); |
| |
| if (processing_syncer_changes_) |
| return; // These are changes originating from us, ignore. |
| if (!sync_processor_.get()) |
| return; // Sync processor not yet initialized, don't sync. |
| |
| // Create SyncChangeList. |
| syncer::SyncChangeList changes; |
| |
| for (URLRows::iterator url = changed_urls->begin(); |
| url != changed_urls->end(); ++url) { |
| // Only care if the modified URL is typed. |
| if (url->typed_count() > 0) { |
| // If there were any errors updating the sync node, just ignore them and |
| // continue on to process the next URL. |
| CreateOrUpdateSyncNode(*url, &changes); |
| } |
| } |
| |
| // Send SyncChangeList to server if there are any changes. |
| if (changes.size() > 0) |
| sync_processor_->ProcessSyncChanges(FROM_HERE, changes); |
| } |
| |
| void TypedUrlSyncableService::OnUrlVisited(ui::PageTransition transition, |
| URLRow* row) { |
| DCHECK(expected_loop_ == base::MessageLoop::current()); |
| DCHECK(row); |
| |
| if (processing_syncer_changes_) |
| return; // These are changes originating from us, ignore. |
| if (!sync_processor_.get()) |
| return; // Sync processor not yet initialized, don't sync. |
| if (!ShouldSyncVisit(transition, row)) |
| return; |
| |
| // Create SyncChangeList. |
| syncer::SyncChangeList changes; |
| |
| CreateOrUpdateSyncNode(*row, &changes); |
| |
| // Send SyncChangeList to server if there are any changes. |
| if (changes.size() > 0) |
| sync_processor_->ProcessSyncChanges(FROM_HERE, changes); |
| } |
| |
| void TypedUrlSyncableService::OnUrlsDeleted(bool all_history, |
| bool expired, |
| URLRows* rows) { |
| DCHECK(expected_loop_ == base::MessageLoop::current()); |
| |
| if (processing_syncer_changes_) |
| return; // These are changes originating from us, ignore. |
| if (!sync_processor_.get()) |
| return; // Sync processor not yet initialized, don't sync. |
| |
| // Ignore URLs expired due to old age (we don't want to sync them as deletions |
| // to avoid extra traffic up to the server, and also to make sure that a |
| // client with a bad clock setting won't go on an expiration rampage and |
| // delete all history from every client). The server will gracefully age out |
| // the sync DB entries when they've been idle for long enough. |
| if (expired) |
| return; |
| |
| // Create SyncChangeList. |
| syncer::SyncChangeList changes; |
| |
| if (all_history) { |
| // Delete all synced typed urls. |
| for (std::set<GURL>::const_iterator url = synced_typed_urls_.begin(); |
| url != synced_typed_urls_.end(); ++url) { |
| VisitVector visits; |
| URLRow row(*url); |
| AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE, |
| row, visits, url->spec(), &changes); |
| } |
| // Clear cache of server state. |
| synced_typed_urls_.clear(); |
| } else { |
| DCHECK(rows); |
| // Delete rows. |
| for (URLRows::const_iterator row = rows->begin(); |
| row != rows->end(); ++row) { |
| // Add specifics to change list for all synced urls that were deleted. |
| if (synced_typed_urls_.find(row->url()) != synced_typed_urls_.end()) { |
| VisitVector visits; |
| AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE, |
| *row, visits, row->url().spec(), &changes); |
| // Delete typed url from cache. |
| synced_typed_urls_.erase(row->url()); |
| } |
| } |
| } |
| |
| // Send SyncChangeList to server if there are any changes. |
| if (changes.size() > 0) |
| sync_processor_->ProcessSyncChanges(FROM_HERE, changes); |
| } |
| |
| bool TypedUrlSyncableService::ShouldIgnoreUrl(const GURL& url) { |
| // Ignore empty URLs. Not sure how this can happen (maybe import from other |
| // busted browsers, or misuse of the history API, or just plain bugs) but we |
| // can't deal with them. |
| if (url.spec().empty()) |
| return true; |
| |
| // Ignore local file URLs. |
| if (url.SchemeIsFile()) |
| return true; |
| |
| // Ignore localhost URLs. |
| if (net::IsLocalhost(url.host())) |
| return true; |
| |
| return false; |
| } |
| |
| bool TypedUrlSyncableService::ShouldSyncVisit( |
| ui::PageTransition page_transition, |
| URLRow* row) { |
| if (!row) |
| return false; |
| int typed_count = row->typed_count(); |
| ui::PageTransition transition = ui::PageTransitionFromInt( |
| page_transition & ui::PAGE_TRANSITION_CORE_MASK); |
| |
| // Just use an ad-hoc criteria to determine whether to ignore this |
| // notification. For most users, the distribution of visits is roughly a bell |
| // curve with a long tail - there are lots of URLs with < 5 visits so we want |
| // to make sure we sync up every visit to ensure the proper ordering of |
| // suggestions. But there are relatively few URLs with > 10 visits, and those |
| // tend to be more broadly distributed such that there's no need to sync up |
| // every visit to preserve their relative ordering. |
| return (transition == ui::PAGE_TRANSITION_TYPED && |
| typed_count > 0 && |
| (typed_count < kTypedUrlVisitThrottleThreshold || |
| (typed_count % kTypedUrlVisitThrottleMultiple) == 0)); |
| } |
| |
| bool TypedUrlSyncableService::CreateOrUpdateSyncNode( |
| URLRow url, |
| syncer::SyncChangeList* changes) { |
| DCHECK_GT(url.typed_count(), 0); |
| |
| if (ShouldIgnoreUrl(url.url())) |
| return true; |
| |
| // Get the visits for this node. |
| VisitVector visit_vector; |
| if (!FixupURLAndGetVisits(&url, &visit_vector)) { |
| DLOG(ERROR) << "Could not load visits for url: " << url.url(); |
| return false; |
| } |
| DCHECK(!visit_vector.empty()); |
| |
| std::string title = url.url().spec(); |
| syncer::SyncChange::SyncChangeType change_type; |
| |
| // If server already has URL, then send a sync update, else add it. |
| change_type = |
| (synced_typed_urls_.find(url.url()) != synced_typed_urls_.end()) ? |
| syncer::SyncChange::ACTION_UPDATE : |
| syncer::SyncChange::ACTION_ADD; |
| |
| // Ensure cache of server state is up to date. |
| synced_typed_urls_.insert(url.url()); |
| |
| AddTypedUrlToChangeList(change_type, url, visit_vector, title, changes); |
| |
| return true; |
| } |
| |
| void TypedUrlSyncableService::AddTypedUrlToChangeList( |
| syncer::SyncChange::SyncChangeType change_type, |
| const URLRow& row, |
| const VisitVector& visits, |
| std::string title, |
| syncer::SyncChangeList* change_list) { |
| sync_pb::EntitySpecifics entity_specifics; |
| sync_pb::TypedUrlSpecifics* typed_url = entity_specifics.mutable_typed_url(); |
| |
| if (change_type == syncer::SyncChange::ACTION_DELETE) { |
| typed_url->set_url(row.url().spec()); |
| } else { |
| WriteToTypedUrlSpecifics(row, visits, typed_url); |
| } |
| |
| change_list->push_back( |
| syncer::SyncChange(FROM_HERE, change_type, |
| syncer::SyncData::CreateLocalData( |
| kTypedUrlTag, title, entity_specifics))); |
| } |
| |
| void TypedUrlSyncableService::WriteToTypedUrlSpecifics( |
| const URLRow& url, |
| const VisitVector& visits, |
| sync_pb::TypedUrlSpecifics* typed_url) { |
| |
| DCHECK(!url.last_visit().is_null()); |
| DCHECK(!visits.empty()); |
| DCHECK_EQ(url.last_visit().ToInternalValue(), |
| visits.back().visit_time.ToInternalValue()); |
| |
| typed_url->set_url(url.url().spec()); |
| typed_url->set_title(base::UTF16ToUTF8(url.title())); |
| typed_url->set_hidden(url.hidden()); |
| |
| DCHECK(CheckVisitOrdering(visits)); |
| |
| bool only_typed = false; |
| int skip_count = 0; |
| |
| if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) { |
| int typed_count = 0; |
| int total = 0; |
| // Walk the passed-in visit vector and count the # of typed visits. |
| for (VisitVector::const_iterator visit = visits.begin(); |
| visit != visits.end(); ++visit) { |
| ui::PageTransition transition = ui::PageTransitionFromInt( |
| visit->transition & ui::PAGE_TRANSITION_CORE_MASK); |
| // We ignore reload visits. |
| if (transition == ui::PAGE_TRANSITION_RELOAD) |
| continue; |
| ++total; |
| if (transition == ui::PAGE_TRANSITION_TYPED) |
| ++typed_count; |
| } |
| // We should have at least one typed visit. This can sometimes happen if |
| // the history DB has an inaccurate count for some reason (there's been |
| // bugs in the history code in the past which has left users in the wild |
| // with incorrect counts - http://crbug.com/84258). |
| DCHECK(typed_count > 0); |
| |
| if (typed_count > kMaxTypedUrlVisits) { |
| only_typed = true; |
| skip_count = typed_count - kMaxTypedUrlVisits; |
| } else if (total > kMaxTypedUrlVisits) { |
| skip_count = total - kMaxTypedUrlVisits; |
| } |
| } |
| |
| for (VisitVector::const_iterator visit = visits.begin(); |
| visit != visits.end(); ++visit) { |
| ui::PageTransition transition = |
| ui::PageTransitionStripQualifier(visit->transition); |
| // Skip reload visits. |
| if (transition == ui::PAGE_TRANSITION_RELOAD) |
| continue; |
| |
| // If we only have room for typed visits, then only add typed visits. |
| if (only_typed && transition != ui::PAGE_TRANSITION_TYPED) |
| continue; |
| |
| if (skip_count > 0) { |
| // We have too many entries to fit, so we need to skip the oldest ones. |
| // Only skip typed URLs if there are too many typed URLs to fit. |
| if (only_typed || transition != ui::PAGE_TRANSITION_TYPED) { |
| --skip_count; |
| continue; |
| } |
| } |
| typed_url->add_visits(visit->visit_time.ToInternalValue()); |
| typed_url->add_visit_transitions(visit->transition); |
| } |
| DCHECK_EQ(skip_count, 0); |
| |
| if (typed_url->visits_size() == 0) { |
| // If we get here, it's because we don't actually have any TYPED visits |
| // even though the visit's typed_count > 0 (corrupted typed_count). So |
| // let's go ahead and add a RELOAD visit at the most recent visit since |
| // it's not legal to have an empty visit array (yet another workaround |
| // for http://crbug.com/84258). |
| typed_url->add_visits(url.last_visit().ToInternalValue()); |
| typed_url->add_visit_transitions(ui::PAGE_TRANSITION_RELOAD); |
| } |
| CHECK_GT(typed_url->visits_size(), 0); |
| CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits); |
| CHECK_EQ(typed_url->visits_size(), typed_url->visit_transitions_size()); |
| } |
| |
| bool TypedUrlSyncableService::FixupURLAndGetVisits( |
| URLRow* url, |
| VisitVector* visits) { |
| ++num_db_accesses_; |
| CHECK(history_backend_); |
| if (!history_backend_->GetMostRecentVisitsForURL( |
| url->id(), kMaxVisitsToFetch, visits)) { |
| ++num_db_errors_; |
| return false; |
| } |
| |
| // Sometimes (due to a bug elsewhere in the history or sync code, or due to |
| // a crash between adding a URL to the history database and updating the |
| // visit DB) the visit vector for a URL can be empty. If this happens, just |
| // create a new visit whose timestamp is the same as the last_visit time. |
| // This is a workaround for http://crbug.com/84258. |
| if (visits->empty()) { |
| DVLOG(1) << "Found empty visits for URL: " << url->url(); |
| VisitRow visit( |
| url->id(), url->last_visit(), 0, ui::PAGE_TRANSITION_TYPED, 0); |
| visits->push_back(visit); |
| } |
| |
| // GetMostRecentVisitsForURL() returns the data in the opposite order that |
| // we need it, so reverse it. |
| std::reverse(visits->begin(), visits->end()); |
| |
| // Sometimes, the last_visit field in the URL doesn't match the timestamp of |
| // the last visit in our visit array (they come from different tables, so |
| // crashes/bugs can cause them to mismatch), so just set it here. |
| url->set_last_visit(visits->back().visit_time); |
| DCHECK(CheckVisitOrdering(*visits)); |
| return true; |
| } |
| |
| } // namespace history |