blob: d769615a7c74c228e0f427e926ea34744ea51646 [file] [log] [blame]
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/memory/weak_ptr.h"
#include "base/path_service.h"
#include "base/run_loop.h"
#include "base/values.h"
#include "components/dom_distiller/content/distiller_page_web_contents.h"
#include "components/dom_distiller/content/web_contents_main_frame_observer.h"
#include "components/dom_distiller/core/distiller_page.h"
#include "components/dom_distiller/core/proto/distilled_article.pb.h"
#include "components/dom_distiller/core/proto/distilled_page.pb.h"
#include "components/dom_distiller/core/viewer.h"
#include "content/public/browser/browser_context.h"
#include "content/public/browser/navigation_controller.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/web_contents_observer.h"
#include "content/public/test/content_browser_test.h"
#include "content/shell/browser/shell.h"
#include "grit/components_strings.h"
#include "net/test/embedded_test_server/embedded_test_server.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "third_party/dom_distiller_js/dom_distiller.pb.h"
#include "ui/base/l10n/l10n_util.h"
#include "ui/base/resource/resource_bundle.h"
using content::ContentBrowserTest;
using testing::ContainsRegex;
using testing::HasSubstr;
using testing::Not;
namespace dom_distiller {
const char* kSimpleArticlePath = "/simple_article.html";
const char* kVideoArticlePath = "/video_article.html";
class DistillerPageWebContentsTest : public ContentBrowserTest {
public:
// ContentBrowserTest:
void SetUpOnMainThread() override {
AddComponentsResources();
SetUpTestServer();
ContentBrowserTest::SetUpOnMainThread();
}
void DistillPage(const base::Closure& quit_closure, const std::string& url) {
quit_closure_ = quit_closure;
distiller_page_->DistillPage(
embedded_test_server()->GetURL(url),
dom_distiller::proto::DomDistillerOptions(),
base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished,
this));
}
void OnPageDistillationFinished(
scoped_ptr<proto::DomDistillerResult> distiller_result,
bool distillation_successful) {
distiller_result_ = distiller_result.Pass();
quit_closure_.Run();
}
private:
void AddComponentsResources() {
base::FilePath pak_file;
base::FilePath pak_dir;
PathService::Get(base::DIR_MODULE, &pak_dir);
pak_file = pak_dir.Append(FILE_PATH_LITERAL("components_resources.pak"));
ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
pak_file, ui::SCALE_FACTOR_NONE);
}
void SetUpTestServer() {
base::FilePath path;
PathService::Get(base::DIR_SOURCE_ROOT, &path);
path = path.AppendASCII("components/test/data/dom_distiller");
embedded_test_server()->ServeFilesFromDirectory(path);
ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
}
protected:
void RunUseCurrentWebContentsTest(const std::string& url,
bool expect_new_web_contents,
bool setup_main_frame_observer,
bool wait_for_document_loaded);
DistillerPageWebContents* distiller_page_;
base::Closure quit_closure_;
scoped_ptr<proto::DomDistillerResult> distiller_result_;
};
// Use this class to be able to leak the WebContents, which is needed for when
// the current WebContents is used for distillation.
class TestDistillerPageWebContents : public DistillerPageWebContents {
public:
TestDistillerPageWebContents(
content::BrowserContext* browser_context,
const gfx::Size& render_view_size,
scoped_ptr<SourcePageHandleWebContents> optional_web_contents_handle,
bool expect_new_web_contents)
: DistillerPageWebContents(browser_context, render_view_size,
optional_web_contents_handle.Pass()),
expect_new_web_contents_(expect_new_web_contents),
new_web_contents_created_(false) {}
void CreateNewWebContents(const GURL& url) override {
ASSERT_EQ(true, expect_new_web_contents_);
new_web_contents_created_ = true;
// DistillerPageWebContents::CreateNewWebContents resets the scoped_ptr to
// the WebContents, so intentionally leak WebContents here, since it is
// owned by the shell.
content::WebContents* web_contents = web_contents_.release();
web_contents->GetLastCommittedURL();
DistillerPageWebContents::CreateNewWebContents(url);
}
~TestDistillerPageWebContents() override {
if (!expect_new_web_contents_) {
// Intentionally leaking WebContents, since it is owned by the shell.
content::WebContents* web_contents = web_contents_.release();
web_contents->GetLastCommittedURL();
}
}
bool new_web_contents_created() { return new_web_contents_created_; }
private:
bool expect_new_web_contents_;
bool new_web_contents_created_;
};
// Helper class to know how far in the loading process the current WebContents
// has come. It will call the callback either after
// DidCommitProvisionalLoadForFrame or DocumentLoadedInFrame is called for the
// main frame, based on the value of |wait_for_document_loaded|.
class WebContentsMainFrameHelper : public content::WebContentsObserver {
public:
WebContentsMainFrameHelper(content::WebContents* web_contents,
const base::Closure& callback,
bool wait_for_document_loaded)
: WebContentsObserver(web_contents),
callback_(callback),
wait_for_document_loaded_(wait_for_document_loaded) {}
void DidCommitProvisionalLoadForFrame(
content::RenderFrameHost* render_frame_host,
const GURL& url,
ui::PageTransition transition_type) override {
if (wait_for_document_loaded_)
return;
if (!render_frame_host->GetParent())
callback_.Run();
}
void DocumentLoadedInFrame(
content::RenderFrameHost* render_frame_host) override {
if (wait_for_document_loaded_) {
if (!render_frame_host->GetParent())
callback_.Run();
}
}
private:
base::Closure callback_;
bool wait_for_document_loaded_;
};
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, BasicDistillationWorks) {
DistillerPageWebContents distiller_page(
shell()->web_contents()->GetBrowserContext(),
shell()->web_contents()->GetContainerBounds().size(),
scoped_ptr<SourcePageHandleWebContents>());
distiller_page_ = &distiller_page;
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
run_loop.Run();
EXPECT_EQ("Test Page Title", distiller_result_->title());
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("Lorem ipsum"));
EXPECT_THAT(distiller_result_->distilled_content().html(),
Not(HasSubstr("questionable content")));
EXPECT_EQ("", distiller_result_->pagination_info().next_page());
EXPECT_EQ("", distiller_result_->pagination_info().prev_page());
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) {
DistillerPageWebContents distiller_page(
shell()->web_contents()->GetBrowserContext(),
shell()->web_contents()->GetContainerBounds().size(),
scoped_ptr<SourcePageHandleWebContents>());
distiller_page_ = &distiller_page;
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
run_loop.Run();
// A relative link should've been updated.
EXPECT_THAT(distiller_result_->distilled_content().html(),
ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\""));
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeImages) {
DistillerPageWebContents distiller_page(
shell()->web_contents()->GetBrowserContext(),
shell()->web_contents()->GetContainerBounds().size(),
scoped_ptr<SourcePageHandleWebContents>());
distiller_page_ = &distiller_page;
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
run_loop.Run();
// A relative link should've been updated.
EXPECT_THAT(distiller_result_->distilled_content().html(),
ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\""));
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeVideos) {
DistillerPageWebContents distiller_page(
shell()->web_contents()->GetBrowserContext(),
shell()->web_contents()->GetContainerBounds().size(),
scoped_ptr<SourcePageHandleWebContents>());
distiller_page_ = &distiller_page;
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), kVideoArticlePath);
run_loop.Run();
// A relative source/track should've been updated.
EXPECT_THAT(distiller_result_->distilled_content().html(),
ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
EXPECT_THAT(
distiller_result_->distilled_content().html(),
ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\""));
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, VisibilityDetection) {
DistillerPageWebContents distiller_page(
shell()->web_contents()->GetBrowserContext(),
shell()->web_contents()->GetContainerBounds().size(),
scoped_ptr<SourcePageHandleWebContents>());
distiller_page_ = &distiller_page;
// visble_style.html and invisible_style.html only differ by the visibility
// internal stylesheet.
{
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/visible_style.html");
run_loop.Run();
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("Lorem ipsum"));
}
{
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/invisible_style.html");
run_loop.Run();
EXPECT_THAT(distiller_result_->distilled_content().html(),
Not(HasSubstr("Lorem ipsum")));
}
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
UsingCurrentWebContentsWrongUrl) {
std::string url("/bogus");
bool expect_new_web_contents = true;
bool setup_main_frame_observer = true;
bool wait_for_document_loaded = true;
RunUseCurrentWebContentsTest(url,
expect_new_web_contents,
setup_main_frame_observer,
wait_for_document_loaded);
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
UsingCurrentWebContentsNoMainFrameObserver) {
std::string url(kSimpleArticlePath);
bool expect_new_web_contents = true;
bool setup_main_frame_observer = false;
bool wait_for_document_loaded = true;
RunUseCurrentWebContentsTest(url,
expect_new_web_contents,
setup_main_frame_observer,
wait_for_document_loaded);
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
UsingCurrentWebContentsNotFinishedLoadingYet) {
std::string url(kSimpleArticlePath);
bool expect_new_web_contents = false;
bool setup_main_frame_observer = true;
bool wait_for_document_loaded = false;
RunUseCurrentWebContentsTest(url,
expect_new_web_contents,
setup_main_frame_observer,
wait_for_document_loaded);
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
UsingCurrentWebContentsReadyForDistillation) {
std::string url(kSimpleArticlePath);
bool expect_new_web_contents = false;
bool setup_main_frame_observer = true;
bool wait_for_document_loaded = true;
RunUseCurrentWebContentsTest(url,
expect_new_web_contents,
setup_main_frame_observer,
wait_for_document_loaded);
}
void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
const std::string& url,
bool expect_new_web_contents,
bool setup_main_frame_observer,
bool wait_for_document_loaded) {
content::WebContents* current_web_contents = shell()->web_contents();
if (setup_main_frame_observer) {
dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
current_web_contents);
}
base::RunLoop url_loaded_runner;
WebContentsMainFrameHelper main_frame_loaded(current_web_contents,
url_loaded_runner.QuitClosure(),
wait_for_document_loaded);
current_web_contents->GetController().LoadURL(
embedded_test_server()->GetURL(url),
content::Referrer(),
ui::PAGE_TRANSITION_TYPED,
std::string());
url_loaded_runner.Run();
scoped_ptr<content::WebContents> old_web_contents_sptr(current_web_contents);
scoped_ptr<SourcePageHandleWebContents> source_page_handle(
new SourcePageHandleWebContents(old_web_contents_sptr.Pass()));
TestDistillerPageWebContents distiller_page(
shell()->web_contents()->GetBrowserContext(),
shell()->web_contents()->GetContainerBounds().size(),
source_page_handle.Pass(),
expect_new_web_contents);
distiller_page_ = &distiller_page;
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
run_loop.Run();
// Sanity check of distillation process.
EXPECT_EQ(expect_new_web_contents, distiller_page.new_web_contents_created());
EXPECT_EQ("Test Page Title", distiller_result_->title());
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, MarkupInfo) {
DistillerPageWebContents distiller_page(
shell()->web_contents()->GetBrowserContext(),
shell()->web_contents()->GetContainerBounds().size(),
scoped_ptr<SourcePageHandleWebContents>());
distiller_page_ = &distiller_page;
base::RunLoop run_loop;
DistillPage(run_loop.QuitClosure(), "/markup_article.html");
run_loop.Run();
EXPECT_THAT(distiller_result_->distilled_content().html(),
HasSubstr("Lorem ipsum"));
EXPECT_EQ("Marked-up Markup Test Page Title", distiller_result_->title());
const proto::MarkupInfo markup_info = distiller_result_->markup_info();
EXPECT_EQ("Marked-up Markup Test Page Title", markup_info.title());
EXPECT_EQ("Article", markup_info.type());
EXPECT_EQ("http://test/markup.html", markup_info.url());
EXPECT_EQ("This page tests Markup Info.", markup_info.description());
EXPECT_EQ("Whoever Published", markup_info.publisher());
EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info.copyright());
EXPECT_EQ("Whoever Authored", markup_info.author());
const proto::MarkupArticle markup_article = markup_info.article();
EXPECT_EQ("Whatever Section", markup_article.section());
EXPECT_EQ("July 23, 2014", markup_article.published_time());
EXPECT_EQ("2014-07-23T23:59", markup_article.modified_time());
EXPECT_EQ("", markup_article.expiration_time());
ASSERT_EQ(1, markup_article.authors_size());
EXPECT_EQ("Whoever Authored", markup_article.authors(0));
ASSERT_EQ(2, markup_info.images_size());
const proto::MarkupImage markup_image1 = markup_info.images(0);
EXPECT_EQ("http://test/markup1.jpeg", markup_image1.url());
EXPECT_EQ("https://test/markup1.jpeg", markup_image1.secure_url());
EXPECT_EQ("jpeg", markup_image1.type());
EXPECT_EQ("", markup_image1.caption());
EXPECT_EQ(600, markup_image1.width());
EXPECT_EQ(400, markup_image1.height());
const proto::MarkupImage markup_image2 = markup_info.images(1);
EXPECT_EQ("http://test/markup2.gif", markup_image2.url());
EXPECT_EQ("https://test/markup2.gif", markup_image2.secure_url());
EXPECT_EQ("gif", markup_image2.type());
EXPECT_EQ("", markup_image2.caption());
EXPECT_EQ(1000, markup_image2.width());
EXPECT_EQ(600, markup_image2.height());
}
IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
TestTitleAndContentAreNeverEmpty) {
const std::string some_title = "some title";
const std::string some_content = "some content";
const std::string no_title =
l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_TITLE);
const std::string no_content =
l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_CONTENT);
{ // Test non-empty title and content for article.
scoped_ptr<DistilledArticleProto> article_proto(
new DistilledArticleProto());
article_proto->set_title(some_title);
(*(article_proto->add_pages())).set_html(some_content);
std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
EXPECT_THAT(html, HasSubstr(some_title));
EXPECT_THAT(html, HasSubstr(some_content));
EXPECT_THAT(html, Not(HasSubstr(no_title)));
EXPECT_THAT(html, Not(HasSubstr(no_content)));
}
{ // Test empty title and content for article.
scoped_ptr<DistilledArticleProto> article_proto(
new DistilledArticleProto());
article_proto->set_title("");
(*(article_proto->add_pages())).set_html("");
std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
EXPECT_THAT(html, HasSubstr(no_title));
EXPECT_THAT(html, HasSubstr(no_content));
EXPECT_THAT(html, Not(HasSubstr(some_title)));
EXPECT_THAT(html, Not(HasSubstr(some_content)));
}
{ // Test missing title and non-empty content for article.
scoped_ptr<DistilledArticleProto> article_proto(
new DistilledArticleProto());
(*(article_proto->add_pages())).set_html(some_content);
std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
EXPECT_THAT(html, HasSubstr(no_title));
EXPECT_THAT(html, HasSubstr(no_content));
EXPECT_THAT(html, Not(HasSubstr(some_title)));
EXPECT_THAT(html, Not(HasSubstr(some_content)));
}
{ // Test non-empty title and missing content for article.
scoped_ptr<DistilledArticleProto> article_proto(
new DistilledArticleProto());
article_proto->set_title(some_title);
std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
EXPECT_THAT(html, HasSubstr(no_title));
EXPECT_THAT(html, HasSubstr(no_content));
EXPECT_THAT(html, Not(HasSubstr(some_title)));
EXPECT_THAT(html, Not(HasSubstr(some_content)));
}
{ // Test non-empty title and content for page.
scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
page_proto->set_title(some_title);
page_proto->set_html(some_content);
std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
EXPECT_THAT(html, HasSubstr(some_title));
EXPECT_THAT(html, HasSubstr(some_content));
EXPECT_THAT(html, Not(HasSubstr(no_title)));
EXPECT_THAT(html, Not(HasSubstr(no_content)));
}
{ // Test empty title and content for page.
scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
page_proto->set_title("");
page_proto->set_html("");
std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
EXPECT_THAT(html, HasSubstr(no_title));
EXPECT_THAT(html, HasSubstr(no_content));
EXPECT_THAT(html, Not(HasSubstr(some_title)));
EXPECT_THAT(html, Not(HasSubstr(some_content)));
}
{ // Test missing title and non-empty content for page.
scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
page_proto->set_html(some_content);
std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
EXPECT_THAT(html, HasSubstr(no_title));
EXPECT_THAT(html, HasSubstr(some_content));
EXPECT_THAT(html, Not(HasSubstr(some_title)));
EXPECT_THAT(html, Not(HasSubstr(no_content)));
}
{ // Test non-empty title and missing content for page.
scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
page_proto->set_title(some_title);
std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
EXPECT_THAT(html, HasSubstr(some_title));
EXPECT_THAT(html, HasSubstr(no_content));
EXPECT_THAT(html, Not(HasSubstr(no_title)));
EXPECT_THAT(html, Not(HasSubstr(some_content)));
}
}
} // namespace dom_distiller