blob: e7b9140f1f5b8be7658a4c0f57df5a629fe0c391 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h"
#include <functional>
#include "base/file_util.h"
#include "base/files/important_file_writer.h"
#include "base/md5.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "chrome/browser/spellchecker/spellcheck_host_metrics.h"
#include "chrome/common/chrome_constants.h"
#include "chrome/common/spellcheck_messages.h"
#include "content/public/browser/browser_thread.h"
#include "sync/api/sync_change.h"
#include "sync/api/sync_data.h"
#include "sync/api/sync_error_factory.h"
#include "sync/protocol/sync.pb.h"
using content::BrowserThread;
using chrome::spellcheck_common::WordList;
using chrome::spellcheck_common::WordSet;
namespace {
// Filename extension for backup dictionary file.
const base::FilePath::CharType BACKUP_EXTENSION[] = FILE_PATH_LITERAL("backup");
// Prefix for the checksum in the dictionary file.
const char CHECKSUM_PREFIX[] = "checksum_v1 = ";
// The status of the checksum in a custom spellcheck dictionary.
enum ChecksumStatus {
VALID_CHECKSUM,
INVALID_CHECKSUM,
};
// The result of a dictionary sanitation. Can be used as a bitmap.
enum ChangeSanitationResult {
// The change is valid and can be applied as-is.
VALID_CHANGE = 0,
// The change contained words to be added that are not valid.
DETECTED_INVALID_WORDS = 1,
// The change contained words to be added that are already in the dictionary.
DETECTED_DUPLICATE_WORDS = 2,
// The change contained words to be removed that are not in the dictionary.
DETECTED_MISSING_WORDS = 4,
};
// Loads the file at |file_path| into the |words| container. If the file has a
// valid checksum, then returns ChecksumStatus::VALID. If the file has an
// invalid checksum, then returns ChecksumStatus::INVALID and clears |words|.
ChecksumStatus LoadFile(const base::FilePath& file_path, WordList& words) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
words.clear();
std::string contents;
base::ReadFileToString(file_path, &contents);
size_t pos = contents.rfind(CHECKSUM_PREFIX);
if (pos != std::string::npos) {
std::string checksum = contents.substr(pos + strlen(CHECKSUM_PREFIX));
contents = contents.substr(0, pos);
if (checksum != base::MD5String(contents))
return INVALID_CHECKSUM;
}
base::TrimWhitespaceASCII(contents, base::TRIM_ALL, &contents);
base::SplitString(contents, '\n', &words);
return VALID_CHECKSUM;
}
// Returns true for invalid words and false for valid words.
bool IsInvalidWord(const std::string& word) {
std::string tmp;
return !base::IsStringUTF8(word) ||
word.length() >
chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES ||
word.empty() ||
base::TRIM_NONE != base::TrimWhitespaceASCII(word, base::TRIM_ALL, &tmp);
}
// Loads the custom spellcheck dictionary from |path| into |custom_words|. If
// the dictionary checksum is not valid, but backup checksum is valid, then
// restores the backup and loads that into |custom_words| instead. If the backup
// is invalid too, then clears |custom_words|. Must be called on the file
// thread.
void LoadDictionaryFileReliably(WordList& custom_words,
const base::FilePath& path) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
// Load the contents and verify the checksum.
if (LoadFile(path, custom_words) == VALID_CHECKSUM)
return;
// Checksum is not valid. See if there's a backup.
base::FilePath backup = path.AddExtension(BACKUP_EXTENSION);
if (!base::PathExists(backup))
return;
// Load the backup and verify its checksum.
if (LoadFile(backup, custom_words) != VALID_CHECKSUM)
return;
// Backup checksum is valid. Restore the backup.
base::CopyFile(backup, path);
}
// Backs up the original dictionary, saves |custom_words| and its checksum into
// the custom spellcheck dictionary at |path|.
void SaveDictionaryFileReliably(
const WordList& custom_words,
const base::FilePath& path) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
std::stringstream content;
for (WordList::const_iterator it = custom_words.begin();
it != custom_words.end();
++it) {
content << *it << '\n';
}
std::string checksum = base::MD5String(content.str());
content << CHECKSUM_PREFIX << checksum;
base::CopyFile(path, path.AddExtension(BACKUP_EXTENSION));
base::ImportantFileWriter::WriteFileAtomically(path, content.str());
}
// Removes duplicate and invalid words from |to_add| word list and sorts it.
// Looks for duplicates in both |to_add| and |existing| word lists. Returns a
// bitmap of |ChangeSanitationResult| values.
int SanitizeWordsToAdd(const WordSet& existing, WordList& to_add) {
// Do not add duplicate words.
std::sort(to_add.begin(), to_add.end());
WordList new_words = base::STLSetDifference<WordList>(to_add, existing);
new_words.erase(std::unique(new_words.begin(), new_words.end()),
new_words.end());
int result = VALID_CHANGE;
if (to_add.size() != new_words.size())
result |= DETECTED_DUPLICATE_WORDS;
// Do not add invalid words.
size_t size = new_words.size();
new_words.erase(std::remove_if(new_words.begin(),
new_words.end(),
IsInvalidWord),
new_words.end());
if (size != new_words.size())
result |= DETECTED_INVALID_WORDS;
// Save the sanitized words to be added.
std::swap(to_add, new_words);
return result;
}
// Removes word from |to_remove| that are missing from |existing| word list and
// sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values.
int SanitizeWordsToRemove(const WordSet& existing, WordList& to_remove) {
// Do not remove words that are missing from the dictionary.
std::sort(to_remove.begin(), to_remove.end());
WordList found_words;
std::set_intersection(existing.begin(),
existing.end(),
to_remove.begin(),
to_remove.end(),
std::back_inserter(found_words));
int result = VALID_CHANGE;
if (to_remove.size() > found_words.size())
result |= DETECTED_MISSING_WORDS;
// Save the sanitized words to be removed.
std::swap(to_remove, found_words);
return result;
}
} // namespace
SpellcheckCustomDictionary::Change::Change() {
}
SpellcheckCustomDictionary::Change::Change(
const SpellcheckCustomDictionary::Change& other)
: to_add_(other.to_add()),
to_remove_(other.to_remove()) {
}
SpellcheckCustomDictionary::Change::Change(const WordList& to_add)
: to_add_(to_add) {
}
SpellcheckCustomDictionary::Change::~Change() {
}
void SpellcheckCustomDictionary::Change::AddWord(const std::string& word) {
to_add_.push_back(word);
}
void SpellcheckCustomDictionary::Change::RemoveWord(const std::string& word) {
to_remove_.push_back(word);
}
int SpellcheckCustomDictionary::Change::Sanitize(const WordSet& words) {
int result = VALID_CHANGE;
if (!to_add_.empty())
result |= SanitizeWordsToAdd(words, to_add_);
if (!to_remove_.empty())
result |= SanitizeWordsToRemove(words, to_remove_);
return result;
}
const WordList& SpellcheckCustomDictionary::Change::to_add() const {
return to_add_;
}
const WordList& SpellcheckCustomDictionary::Change::to_remove() const {
return to_remove_;
}
bool SpellcheckCustomDictionary::Change::empty() const {
return to_add_.empty() && to_remove_.empty();
}
SpellcheckCustomDictionary::SpellcheckCustomDictionary(
const base::FilePath& path)
: custom_dictionary_path_(),
is_loaded_(false),
weak_ptr_factory_(this) {
custom_dictionary_path_ =
path.Append(chrome::kCustomDictionaryFileName);
}
SpellcheckCustomDictionary::~SpellcheckCustomDictionary() {
}
const WordSet& SpellcheckCustomDictionary::GetWords() const {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
return words_;
}
bool SpellcheckCustomDictionary::AddWord(const std::string& word) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
Change dictionary_change;
dictionary_change.AddWord(word);
int result = dictionary_change.Sanitize(GetWords());
Apply(dictionary_change);
Notify(dictionary_change);
Sync(dictionary_change);
Save(dictionary_change);
return result == VALID_CHANGE;
}
bool SpellcheckCustomDictionary::RemoveWord(const std::string& word) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
Change dictionary_change;
dictionary_change.RemoveWord(word);
int result = dictionary_change.Sanitize(GetWords());
Apply(dictionary_change);
Notify(dictionary_change);
Sync(dictionary_change);
Save(dictionary_change);
return result == VALID_CHANGE;
}
bool SpellcheckCustomDictionary::HasWord(const std::string& word) const {
return !!words_.count(word);
}
void SpellcheckCustomDictionary::AddObserver(Observer* observer) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
observers_.AddObserver(observer);
}
void SpellcheckCustomDictionary::RemoveObserver(Observer* observer) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
observers_.RemoveObserver(observer);
}
bool SpellcheckCustomDictionary::IsLoaded() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
return is_loaded_;
}
bool SpellcheckCustomDictionary::IsSyncing() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
return !!sync_processor_.get();
}
void SpellcheckCustomDictionary::Load() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
BrowserThread::PostTaskAndReplyWithResult(
BrowserThread::FILE,
FROM_HERE,
base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile,
custom_dictionary_path_),
base::Bind(&SpellcheckCustomDictionary::OnLoaded,
weak_ptr_factory_.GetWeakPtr()));
}
syncer::SyncMergeResult SpellcheckCustomDictionary::MergeDataAndStartSyncing(
syncer::ModelType type,
const syncer::SyncDataList& initial_sync_data,
scoped_ptr<syncer::SyncChangeProcessor> sync_processor,
scoped_ptr<syncer::SyncErrorFactory> sync_error_handler) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
DCHECK(!sync_processor_.get());
DCHECK(!sync_error_handler_.get());
DCHECK(sync_processor.get());
DCHECK(sync_error_handler.get());
DCHECK_EQ(syncer::DICTIONARY, type);
sync_processor_ = sync_processor.Pass();
sync_error_handler_ = sync_error_handler.Pass();
// Build a list of words to add locally.
WordList to_add_locally;
for (syncer::SyncDataList::const_iterator it = initial_sync_data.begin();
it != initial_sync_data.end();
++it) {
DCHECK_EQ(syncer::DICTIONARY, it->GetDataType());
to_add_locally.push_back(it->GetSpecifics().dictionary().word());
}
// Add remote words locally.
Change to_change_locally(to_add_locally);
to_change_locally.Sanitize(GetWords());
Apply(to_change_locally);
Notify(to_change_locally);
Save(to_change_locally);
// Add as many as possible local words remotely.
std::sort(to_add_locally.begin(), to_add_locally.end());
WordList to_add_remotely = base::STLSetDifference<WordList>(words_,
to_add_locally);
// Send local changes to the sync server.
Change to_change_remotely(to_add_remotely);
syncer::SyncMergeResult result(type);
result.set_error(Sync(to_change_remotely));
return result;
}
void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
DCHECK_EQ(syncer::DICTIONARY, type);
sync_processor_.reset();
sync_error_handler_.reset();
}
syncer::SyncDataList SpellcheckCustomDictionary::GetAllSyncData(
syncer::ModelType type) const {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
DCHECK_EQ(syncer::DICTIONARY, type);
syncer::SyncDataList data;
std::string word;
size_t i = 0;
for (WordSet::const_iterator it = words_.begin();
it != words_.end() &&
i < chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS;
++it, ++i) {
word = *it;
sync_pb::EntitySpecifics specifics;
specifics.mutable_dictionary()->set_word(word);
data.push_back(syncer::SyncData::CreateLocalData(word, word, specifics));
}
return data;
}
syncer::SyncError SpellcheckCustomDictionary::ProcessSyncChanges(
const tracked_objects::Location& from_here,
const syncer::SyncChangeList& change_list) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
Change dictionary_change;
for (syncer::SyncChangeList::const_iterator it = change_list.begin();
it != change_list.end();
++it) {
DCHECK(it->IsValid());
std::string word = it->sync_data().GetSpecifics().dictionary().word();
switch (it->change_type()) {
case syncer::SyncChange::ACTION_ADD:
dictionary_change.AddWord(word);
break;
case syncer::SyncChange::ACTION_DELETE:
dictionary_change.RemoveWord(word);
break;
default:
return sync_error_handler_->CreateAndUploadError(
FROM_HERE,
"Processing sync changes failed on change type " +
syncer::SyncChange::ChangeTypeToString(it->change_type()));
}
}
dictionary_change.Sanitize(GetWords());
Apply(dictionary_change);
Notify(dictionary_change);
Save(dictionary_change);
return syncer::SyncError();
}
// static
WordList SpellcheckCustomDictionary::LoadDictionaryFile(
const base::FilePath& path) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
WordList words;
LoadDictionaryFileReliably(words, path);
if (!words.empty() && VALID_CHANGE != SanitizeWordsToAdd(WordSet(), words))
SaveDictionaryFileReliably(words, path);
SpellCheckHostMetrics::RecordCustomWordCountStats(words.size());
return words;
}
// static
void SpellcheckCustomDictionary::UpdateDictionaryFile(
const SpellcheckCustomDictionary::Change& dictionary_change,
const base::FilePath& path) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
if (dictionary_change.empty())
return;
WordList custom_words;
LoadDictionaryFileReliably(custom_words, path);
// Add words.
custom_words.insert(custom_words.end(),
dictionary_change.to_add().begin(),
dictionary_change.to_add().end());
// Remove words.
std::sort(custom_words.begin(), custom_words.end());
WordList remaining =
base::STLSetDifference<WordList>(custom_words,
dictionary_change.to_remove());
std::swap(custom_words, remaining);
SaveDictionaryFileReliably(custom_words, path);
}
void SpellcheckCustomDictionary::OnLoaded(WordList custom_words) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
Change dictionary_change(custom_words);
dictionary_change.Sanitize(GetWords());
Apply(dictionary_change);
Sync(dictionary_change);
is_loaded_ = true;
FOR_EACH_OBSERVER(Observer, observers_, OnCustomDictionaryLoaded());
}
void SpellcheckCustomDictionary::Apply(
const SpellcheckCustomDictionary::Change& dictionary_change) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
if (!dictionary_change.to_add().empty()) {
words_.insert(dictionary_change.to_add().begin(),
dictionary_change.to_add().end());
}
if (!dictionary_change.to_remove().empty()) {
WordSet updated_words =
base::STLSetDifference<WordSet>(words_,
dictionary_change.to_remove());
std::swap(words_, updated_words);
}
}
void SpellcheckCustomDictionary::Save(
const SpellcheckCustomDictionary::Change& dictionary_change) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
BrowserThread::PostTask(
BrowserThread::FILE,
FROM_HERE,
base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile,
dictionary_change,
custom_dictionary_path_));
}
syncer::SyncError SpellcheckCustomDictionary::Sync(
const SpellcheckCustomDictionary::Change& dictionary_change) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
syncer::SyncError error;
if (!IsSyncing() || dictionary_change.empty())
return error;
// The number of words on the sync server should not exceed the limits.
int server_size = static_cast<int>(words_.size()) -
static_cast<int>(dictionary_change.to_add().size());
int max_upload_size = std::max(
0,
static_cast<int>(
chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS) -
server_size);
int upload_size = std::min(
static_cast<int>(dictionary_change.to_add().size()),
max_upload_size);
syncer::SyncChangeList sync_change_list;
int i = 0;
for (WordList::const_iterator it = dictionary_change.to_add().begin();
it != dictionary_change.to_add().end() && i < upload_size;
++it, ++i) {
std::string word = *it;
sync_pb::EntitySpecifics specifics;
specifics.mutable_dictionary()->set_word(word);
sync_change_list.push_back(syncer::SyncChange(
FROM_HERE,
syncer::SyncChange::ACTION_ADD,
syncer::SyncData::CreateLocalData(word, word, specifics)));
}
for (WordList::const_iterator it = dictionary_change.to_remove().begin();
it != dictionary_change.to_remove().end();
++it) {
std::string word = *it;
sync_pb::EntitySpecifics specifics;
specifics.mutable_dictionary()->set_word(word);
sync_change_list.push_back(syncer::SyncChange(
FROM_HERE,
syncer::SyncChange::ACTION_DELETE,
syncer::SyncData::CreateLocalData(word, word, specifics)));
}
// Send the changes to the sync processor.
error = sync_processor_->ProcessSyncChanges(FROM_HERE, sync_change_list);
if (error.IsSet())
return error;
// Turn off syncing of this dictionary if the server already has the maximum
// number of words.
if (words_.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS)
StopSyncing(syncer::DICTIONARY);
return error;
}
void SpellcheckCustomDictionary::Notify(
const SpellcheckCustomDictionary::Change& dictionary_change) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
if (!IsLoaded() || dictionary_change.empty())
return;
FOR_EACH_OBSERVER(Observer,
observers_,
OnCustomDictionaryChanged(dictionary_change));
}