blob: e851a711e75c95a2a25941d52c1ca78dfd84cea6 [file] [log] [blame]
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <sstream>
#include "base/command_line.h"
#include "base/files/scoped_temp_dir.h"
#include "base/message_loop/message_loop.h"
#include "base/path_service.h"
#include "base/run_loop.h"
#include "base/strings/string_number_conversions.h"
#include "components/dom_distiller/content/distiller_page_web_contents.h"
#include "components/dom_distiller/core/distiller.h"
#include "components/dom_distiller/core/dom_distiller_database.h"
#include "components/dom_distiller/core/dom_distiller_service.h"
#include "components/dom_distiller/core/dom_distiller_store.h"
#include "components/dom_distiller/core/proto/distilled_article.pb.h"
#include "components/dom_distiller/core/proto/distilled_page.pb.h"
#include "components/dom_distiller/core/task_tracker.h"
#include "content/public/browser/browser_context.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/test/content_browser_test.h"
#include "content/shell/browser/shell.h"
#include "net/dns/mock_host_resolver.h"
#include "third_party/dom_distiller_js/dom_distiller.pb.h"
#include "ui/base/resource/resource_bundle.h"
using content::ContentBrowserTest;
namespace dom_distiller {
namespace {
// The url to distill.
const char* kUrlSwitch = "url";
// Indicates that DNS resolution should be disabled for this test.
const char* kDisableDnsSwitch = "disable-dns";
// Will write the distilled output to the given file instead of to stdout.
const char* kOutputFile = "output-file";
// Indicates to output a serialized protocol buffer instead of human-readable
// output.
const char* kShouldOutputBinary = "output-binary";
// Indicates to output only the text of the article and not the enclosing html.
const char* kExtractTextOnly = "extract-text-only";
// Indicates to include debug output.
const char* kDebugLevel = "debug-level";
scoped_ptr<DomDistillerService> CreateDomDistillerService(
content::BrowserContext* context,
const base::FilePath& db_path) {
scoped_refptr<base::SequencedTaskRunner> background_task_runner =
content::BrowserThread::GetBlockingPool()->GetSequencedTaskRunner(
content::BrowserThread::GetBlockingPool()->GetSequenceToken());
// TODO(cjhopman): use an in-memory database instead of an on-disk one with
// temporary directory.
scoped_ptr<DomDistillerDatabase> db(
new DomDistillerDatabase(background_task_runner));
scoped_ptr<DomDistillerStore> dom_distiller_store(new DomDistillerStore(
db.PassAs<DomDistillerDatabaseInterface>(), db_path));
scoped_ptr<DistillerPageFactory> distiller_page_factory(
new DistillerPageWebContentsFactory(context));
scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory(
new DistillerURLFetcherFactory(context->GetRequestContext()));
dom_distiller::proto::DomDistillerOptions options;
if (base::CommandLine::ForCurrentProcess()->HasSwitch(kExtractTextOnly)) {
options.set_extract_text_only(true);
}
int debug_level = 0;
if (base::CommandLine::ForCurrentProcess()->HasSwitch(kDebugLevel) &&
base::StringToInt(
base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
kDebugLevel),
&debug_level)) {
options.set_debug_level(debug_level);
}
scoped_ptr<DistillerFactory> distiller_factory(
new DistillerFactoryImpl(distiller_url_fetcher_factory.Pass(), options));
return scoped_ptr<DomDistillerService>(new DomDistillerService(
dom_distiller_store.PassAs<DomDistillerStoreInterface>(),
distiller_factory.Pass(),
distiller_page_factory.Pass()));
}
void AddComponentsResources() {
base::FilePath pak_file;
base::FilePath pak_dir;
PathService::Get(base::DIR_MODULE, &pak_dir);
pak_file = pak_dir.Append(FILE_PATH_LITERAL("components_resources.pak"));
ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
pak_file, ui::SCALE_FACTOR_NONE);
}
void LogArticle(const DistilledArticleProto& article_proto) {
std::stringstream output;
if (CommandLine::ForCurrentProcess()->HasSwitch(kShouldOutputBinary)) {
output << article_proto.SerializeAsString();
} else {
output << "Article Title: " << article_proto.title() << std::endl;
output << "# of pages: " << article_proto.pages_size() << std::endl;
for (int i = 0; i < article_proto.pages_size(); ++i) {
const DistilledPageProto& page = article_proto.pages(i);
output << "Page " << i << std::endl;
output << "URL: " << page.url() << std::endl;
output << "Content: " << page.html() << std::endl;
}
}
std::string data = output.str();
if (CommandLine::ForCurrentProcess()->HasSwitch(kOutputFile)) {
base::FilePath filename =
CommandLine::ForCurrentProcess()->GetSwitchValuePath(kOutputFile);
base::WriteFile(filename, data.c_str(), data.size());
} else {
VLOG(0) << data;
}
}
} // namespace
class ContentExtractionRequest : public ViewRequestDelegate {
public:
void Start(DomDistillerService* service, base::Closure finished_callback) {
finished_callback_ = finished_callback;
viewer_handle_ =
service->ViewUrl(this, service->CreateDefaultDistillerPage(), url_);
}
DistilledArticleProto GetArticleCopy() {
return *article_proto_;
}
static scoped_ptr<ContentExtractionRequest> CreateForCommandLine(
const CommandLine& command_line) {
GURL url;
if (command_line.HasSwitch(kUrlSwitch)) {
std::string url_string = command_line.GetSwitchValueASCII(kUrlSwitch);
url = GURL(url_string);
}
if (!url.is_valid()) {
ADD_FAILURE() << "No valid url provided";
return scoped_ptr<ContentExtractionRequest>();
}
return scoped_ptr<ContentExtractionRequest>(
new ContentExtractionRequest(url));
}
private:
ContentExtractionRequest(const GURL& url) : url_(url) {}
virtual void OnArticleUpdated(ArticleDistillationUpdate article_update)
OVERRIDE {}
virtual void OnArticleReady(const DistilledArticleProto* article_proto)
OVERRIDE {
article_proto_ = article_proto;
base::MessageLoop::current()->PostTask(
FROM_HERE,
finished_callback_);
}
const DistilledArticleProto* article_proto_;
scoped_ptr<ViewerHandle> viewer_handle_;
GURL url_;
base::Closure finished_callback_;
};
class ContentExtractor : public ContentBrowserTest {
// Change behavior of the default host resolver to avoid DNS lookup errors, so
// we can make network calls.
virtual void SetUpOnMainThread() OVERRIDE {
if (!CommandLine::ForCurrentProcess()->HasSwitch(kDisableDnsSwitch)) {
EnableDNSLookupForThisTest();
}
CHECK(db_dir_.CreateUniqueTempDir());
AddComponentsResources();
}
virtual void TearDownOnMainThread() OVERRIDE {
DisableDNSLookupForThisTest();
}
protected:
// Creates the DomDistillerService and creates and starts the extraction
// request.
void Start() {
content::BrowserContext* context =
shell()->web_contents()->GetBrowserContext();
service_ = CreateDomDistillerService(context,
db_dir_.path());
const CommandLine& command_line = *CommandLine::ForCurrentProcess();
request_ = ContentExtractionRequest::CreateForCommandLine(command_line);
request_->Start(
service_.get(),
base::Bind(&ContentExtractor::Finish, base::Unretained(this)));
}
private:
// Change behavior of the default host resolver to allow DNS lookup
// to proceed instead of being blocked by the test infrastructure.
void EnableDNSLookupForThisTest() {
// mock_host_resolver_override_ takes ownership of the resolver.
scoped_refptr<net::RuleBasedHostResolverProc> resolver =
new net::RuleBasedHostResolverProc(host_resolver());
resolver->AllowDirectLookup("*");
mock_host_resolver_override_.reset(
new net::ScopedDefaultHostResolverProc(resolver.get()));
}
// We need to reset the DNS lookup when we finish, or the test will fail.
void DisableDNSLookupForThisTest() {
mock_host_resolver_override_.reset();
}
void Finish() {
LogArticle(request_->GetArticleCopy());
request_.reset();
service_.reset();
base::MessageLoop::current()->PostTask(
FROM_HERE, base::MessageLoop::QuitWhenIdleClosure());
}
base::ScopedTempDir db_dir_;
scoped_ptr<net::ScopedDefaultHostResolverProc> mock_host_resolver_override_;
scoped_ptr<DomDistillerService> service_;
scoped_ptr<ContentExtractionRequest> request_;
};
IN_PROC_BROWSER_TEST_F(ContentExtractor, MANUAL_ExtractUrl) {
Start();
base::RunLoop().Run();
}
} // namespace dom_distiller