blob: b87a87d9f256824f9992ff1742b59220022f480b [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/safe_browsing/client_side_detection_host.h"
#include <vector>
#include "base/logging.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
#include "base/metrics/histogram.h"
#include "base/prefs/pref_service.h"
#include "base/sequenced_task_runner_helpers.h"
#include "chrome/browser/browser_process.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/safe_browsing/browser_feature_extractor.h"
#include "chrome/browser/safe_browsing/client_side_detection_service.h"
#include "chrome/browser/safe_browsing/database_manager.h"
#include "chrome/browser/safe_browsing/safe_browsing_service.h"
#include "chrome/common/chrome_switches.h"
#include "chrome/common/chrome_version_info.h"
#include "chrome/common/pref_names.h"
#include "chrome/common/safe_browsing/csd.pb.h"
#include "chrome/common/safe_browsing/safebrowsing_messages.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/navigation_controller.h"
#include "content/public/browser/navigation_details.h"
#include "content/public/browser/navigation_entry.h"
#include "content/public/browser/notification_details.h"
#include "content/public/browser/notification_source.h"
#include "content/public/browser/notification_types.h"
#include "content/public/browser/render_process_host.h"
#include "content/public/browser/render_view_host.h"
#include "content/public/browser/resource_request_details.h"
#include "content/public/browser/web_contents.h"
#include "content/public/common/frame_navigate_params.h"
#include "url/gurl.h"
using content::BrowserThread;
using content::NavigationEntry;
using content::ResourceRequestDetails;
using content::WebContents;
namespace safe_browsing {
const int ClientSideDetectionHost::kMaxUrlsPerIP = 20;
const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
namespace {
void EmptyUrlCheckCallback(bool processed) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
}
} // namespace
// This class is instantiated each time a new toplevel URL loads, and
// asynchronously checks whether the phishing classifier should run for this
// URL. If so, it notifies the renderer with a StartPhishingDetection IPC.
// Objects of this class are ref-counted and will be destroyed once nobody
// uses it anymore. If |web_contents|, |csd_service| or |host| go away you need
// to call Cancel(). We keep the |database_manager| alive in a ref pointer for
// as long as it takes.
class ClientSideDetectionHost::ShouldClassifyUrlRequest
: public base::RefCountedThreadSafe<
ClientSideDetectionHost::ShouldClassifyUrlRequest> {
public:
ShouldClassifyUrlRequest(const content::FrameNavigateParams& params,
WebContents* web_contents,
ClientSideDetectionService* csd_service,
SafeBrowsingDatabaseManager* database_manager,
ClientSideDetectionHost* host)
: canceled_(false),
params_(params),
web_contents_(web_contents),
csd_service_(csd_service),
database_manager_(database_manager),
host_(host) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
DCHECK(web_contents_);
DCHECK(csd_service_);
DCHECK(database_manager_.get());
DCHECK(host_);
}
void Start() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
// We start by doing some simple checks that can run on the UI thread.
UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);
// Only classify [X]HTML documents.
if (params_.contents_mime_type != "text/html" &&
params_.contents_mime_type != "application/xhtml+xml") {
VLOG(1) << "Skipping phishing classification for URL: " << params_.url
<< " because it has an unsupported MIME type: "
<< params_.contents_mime_type;
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
NO_CLASSIFY_MAX);
return;
}
if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
VLOG(1) << "Skipping phishing classification for URL: " << params_.url
<< " because of hosting on private IP: "
<< params_.socket_address.host();
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
NO_CLASSIFY_PRIVATE_IP,
NO_CLASSIFY_MAX);
return;
}
// Don't run the phishing classifier if the tab is incognito.
if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
VLOG(1) << "Skipping phishing classification for URL: " << params_.url
<< " because we're browsing incognito.";
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
NO_CLASSIFY_OFF_THE_RECORD,
NO_CLASSIFY_MAX);
return;
}
// We lookup the csd-whitelist before we lookup the cache because
// a URL may have recently been whitelisted. If the URL matches
// the csd-whitelist we won't start classification. The
// csd-whitelist check has to be done on the IO thread because it
// uses the SafeBrowsing service class.
BrowserThread::PostTask(
BrowserThread::IO,
FROM_HERE,
base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,
this, params_.url));
}
void Cancel() {
canceled_ = true;
// Just to make sure we don't do anything stupid we reset all these
// pointers except for the safebrowsing service class which may be
// accessed by CheckCsdWhitelist().
web_contents_ = NULL;
csd_service_ = NULL;
host_ = NULL;
}
private:
friend class base::RefCountedThreadSafe<
ClientSideDetectionHost::ShouldClassifyUrlRequest>;
// Enum used to keep stats about why the pre-classification check failed.
enum PreClassificationCheckFailures {
OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
NO_CLASSIFY_PRIVATE_IP,
NO_CLASSIFY_OFF_THE_RECORD,
NO_CLASSIFY_MATCH_CSD_WHITELIST,
NO_CLASSIFY_TOO_MANY_REPORTS,
NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
NO_CLASSIFY_MAX // Always add new values before this one.
};
// The destructor can be called either from the UI or the IO thread.
virtual ~ShouldClassifyUrlRequest() { }
void CheckCsdWhitelist(const GURL& url) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
if (!database_manager_.get() ||
database_manager_->MatchCsdWhitelistUrl(url)) {
// We're done. There is no point in going back to the UI thread.
VLOG(1) << "Skipping phishing classification for URL: " << url
<< " because it matches the csd whitelist";
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
NO_CLASSIFY_MATCH_CSD_WHITELIST,
NO_CLASSIFY_MAX);
return;
}
bool malware_killswitch_on = database_manager_->IsMalwareKillSwitchOn();
BrowserThread::PostTask(
BrowserThread::UI,
FROM_HERE,
base::Bind(&ShouldClassifyUrlRequest::CheckCache, this,
malware_killswitch_on));
}
void CheckCache(bool malware_killswitch_on) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
if (canceled_) {
return;
}
host_->SetMalwareKillSwitch(malware_killswitch_on);
// If result is cached, we don't want to run classification again
bool is_phishing;
if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
VLOG(1) << "Satisfying request for " << params_.url << " from cache";
UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
// Since we are already on the UI thread, this is safe.
host_->MaybeShowPhishingWarning(params_.url, is_phishing);
return;
}
// We want to limit the number of requests, though we will ignore the
// limit for urls in the cache. We don't want to start classifying
// too many pages as phishing, but for those that we already think are
// phishing we want to give ourselves a chance to fix false positives.
if (csd_service_->IsInCache(params_.url)) {
VLOG(1) << "Reporting limit skipped for " << params_.url
<< " as it was in the cache.";
UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);
} else if (csd_service_->OverPhishingReportLimit()) {
VLOG(1) << "Too many report phishing requests sent recently, "
<< "not running classification for " << params_.url;
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
NO_CLASSIFY_TOO_MANY_REPORTS,
NO_CLASSIFY_MAX);
return;
}
// Everything checks out, so start classification.
// |web_contents_| is safe to call as we will be destructed
// before it is.
VLOG(1) << "Instruct renderer to start phishing detection for URL: "
<< params_.url;
content::RenderViewHost* rvh = web_contents_->GetRenderViewHost();
rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
rvh->GetRoutingID(), params_.url));
}
// No need to protect |canceled_| with a lock because it is only read and
// written by the UI thread.
bool canceled_;
content::FrameNavigateParams params_;
WebContents* web_contents_;
ClientSideDetectionService* csd_service_;
// We keep a ref pointer here just to make sure the safe browsing
// database manager stays alive long enough.
scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
ClientSideDetectionHost* host_;
DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
};
// static
ClientSideDetectionHost* ClientSideDetectionHost::Create(
WebContents* tab) {
return new ClientSideDetectionHost(tab);
}
ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
: content::WebContentsObserver(tab),
csd_service_(NULL),
weak_factory_(this),
unsafe_unique_page_id_(-1),
malware_killswitch_on_(false),
malware_report_enabled_(false) {
DCHECK(tab);
// Note: csd_service_ and sb_service will be NULL here in testing.
csd_service_ = g_browser_process->safe_browsing_detection_service();
feature_extractor_.reset(new BrowserFeatureExtractor(tab, csd_service_));
registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
content::Source<WebContents>(tab));
scoped_refptr<SafeBrowsingService> sb_service =
g_browser_process->safe_browsing_service();
if (sb_service.get()) {
ui_manager_ = sb_service->ui_manager();
database_manager_ = sb_service->database_manager();
ui_manager_->AddObserver(this);
}
// Only enable the malware bad IP matching and report feature for canary
// and dev channel.
chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
malware_report_enabled_ = (
channel == chrome::VersionInfo::CHANNEL_DEV ||
channel == chrome::VersionInfo::CHANNEL_CANARY);
}
ClientSideDetectionHost::~ClientSideDetectionHost() {
if (ui_manager_.get())
ui_manager_->RemoveObserver(this);
}
bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
bool handled = true;
IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
OnPhishingDetectionDone)
IPC_MESSAGE_UNHANDLED(handled = false)
IPC_END_MESSAGE_MAP()
return handled;
}
void ClientSideDetectionHost::DidNavigateMainFrame(
const content::LoadCommittedDetails& details,
const content::FrameNavigateParams& params) {
// TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
// that don't call this method on the UI thread.
// DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
if (details.is_in_page) {
// If the navigation is within the same page, the user isn't really
// navigating away. We don't need to cancel a pending callback or
// begin a new classification.
return;
}
// If we navigate away and there currently is a pending phishing
// report request we have to cancel it to make sure we don't display
// an interstitial for the wrong page. Note that this won't cancel
// the server ping back but only cancel the showing of the
// interstial.
weak_factory_.InvalidateWeakPtrs();
if (!csd_service_) {
return;
}
// Cancel any pending classification request.
if (classification_request_.get()) {
classification_request_->Cancel();
}
browse_info_.reset(new BrowseInfo);
// Store redirect chain information.
if (params.url.host() != cur_host_) {
cur_host_ = params.url.host();
cur_host_redirects_ = params.redirects;
}
browse_info_->host_redirects = cur_host_redirects_;
browse_info_->url_redirects = params.redirects;
browse_info_->http_status_code = details.http_status_code;
// Notify the renderer if it should classify this URL.
classification_request_ = new ShouldClassifyUrlRequest(
params, web_contents(), csd_service_, database_manager_.get(), this);
classification_request_->Start();
}
void ClientSideDetectionHost::OnSafeBrowsingHit(
const SafeBrowsingUIManager::UnsafeResource& resource) {
// Check that this notification is really for us and that it corresponds to
// either a malware or phishing hit. In this case we store the unique page
// ID for later.
if (web_contents() &&
web_contents()->GetRenderProcessHost()->GetID() ==
resource.render_process_host_id &&
web_contents()->GetRenderViewHost()->GetRoutingID() ==
resource.render_view_id &&
(resource.threat_type == SB_THREAT_TYPE_URL_PHISHING ||
resource.threat_type == SB_THREAT_TYPE_URL_MALWARE) &&
web_contents()->GetController().GetActiveEntry()) {
unsafe_unique_page_id_ =
web_contents()->GetController().GetActiveEntry()->GetUniqueID();
// We also keep the resource around in order to be able to send the
// malicious URL to the server.
unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
unsafe_resource_->callback.Reset(); // Don't do anything stupid.
}
}
void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {
DCHECK(tab);
// Tell any pending classification request that it is being canceled.
if (classification_request_.get()) {
classification_request_->Cancel();
}
// Cancel all pending feature extractions.
feature_extractor_.reset();
}
void ClientSideDetectionHost::OnPhishingDetectionDone(
const std::string& verdict_str) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
// There is something seriously wrong if there is no service class but
// this method is called. The renderer should not start phishing detection
// if there isn't any service class in the browser.
DCHECK(csd_service_);
// There shouldn't be any pending requests because we revoke them everytime
// we navigate away.
DCHECK(!weak_factory_.HasWeakPtrs());
DCHECK(browse_info_.get());
// We parse the protocol buffer here. If we're unable to parse it we won't
// send the verdict further.
scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
if (csd_service_ &&
!weak_factory_.HasWeakPtrs() &&
browse_info_.get() &&
verdict->ParseFromString(verdict_str) &&
verdict->IsInitialized()) {
// We do the malware IP matching and request sending if the feature
// is enabled.
if (malware_report_enabled_ && !MalwareKillSwitchIsOn()) {
scoped_ptr<ClientMalwareRequest> malware_verdict(
new ClientMalwareRequest);
// Start browser-side malware feature extraction. Once we're done it will
// send the malware client verdict request.
malware_verdict->set_url(verdict->url());
feature_extractor_->ExtractMalwareFeatures(
browse_info_.get(), malware_verdict.get());
MalwareFeatureExtractionDone(malware_verdict.Pass());
}
// We only send phishing verdict to the server if the verdict is phishing or
// if a SafeBrowsing interstitial was already shown for this site. E.g., a
// malware or phishing interstitial was shown but the user clicked
// through.
if (verdict->is_phishing() || DidShowSBInterstitial()) {
if (DidShowSBInterstitial()) {
browse_info_->unsafe_resource.reset(unsafe_resource_.release());
}
// Start browser-side feature extraction. Once we're done it will send
// the client verdict request.
feature_extractor_->ExtractFeatures(
browse_info_.get(),
verdict.release(),
base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
weak_factory_.GetWeakPtr()));
}
}
browse_info_.reset();
}
void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
bool is_phishing) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
VLOG(2) << "Received server phishing verdict for URL:" << phishing_url
<< " is_phishing:" << is_phishing;
if (is_phishing) {
DCHECK(web_contents());
if (ui_manager_.get()) {
SafeBrowsingUIManager::UnsafeResource resource;
resource.url = phishing_url;
resource.original_url = phishing_url;
resource.is_subresource = false;
resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
resource.render_process_host_id =
web_contents()->GetRenderProcessHost()->GetID();
resource.render_view_id =
web_contents()->GetRenderViewHost()->GetRoutingID();
if (!ui_manager_->IsWhitelisted(resource)) {
// We need to stop any pending navigations, otherwise the interstital
// might not get created properly.
web_contents()->GetController().DiscardNonCommittedEntries();
resource.callback = base::Bind(&EmptyUrlCheckCallback);
ui_manager_->DoDisplayBlockingPage(resource);
}
}
}
}
void ClientSideDetectionHost::FeatureExtractionDone(
bool success,
ClientPhishingRequest* request) {
if (!request) {
DLOG(FATAL) << "Invalid request object in FeatureExtractionDone";
return;
}
VLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
<< request->url() << ". Start sending client phishing request.";
ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
// If the client-side verdict isn't phishing we don't care about the server
// response because we aren't going to display a warning.
if (request->is_phishing()) {
callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
weak_factory_.GetWeakPtr());
}
// Send ping even if the browser feature extraction failed.
csd_service_->SendClientReportPhishingRequest(
request, // The service takes ownership of the request object.
callback);
}
void ClientSideDetectionHost::MalwareFeatureExtractionDone(
scoped_ptr<ClientMalwareRequest> request) {
if (!request) {
DLOG(FATAL) << "Invalid request object in MalwareFeatureExtractionDone";
return;
}
VLOG(2) << "Malware Feature extraction done for URL: " << request->url()
<< ", with features count:" << request->feature_map_size();
// Send ping if there is matching features.
if (request->feature_map_size() > 0) {
VLOG(1) << "Start sending client malware request.";
ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
csd_service_->SendClientReportMalwareRequest(
request.release(), // The service takes ownership of the request object
callback); // no action after request sent for now
}
}
void ClientSideDetectionHost::UpdateIPUrlMap(const std::string& ip,
const std::string& url) {
if (ip.empty() || url.empty())
return;
IPUrlMap::iterator it = browse_info_->ips.find(ip);
if (it == browse_info_->ips.end()) {
if (int(browse_info_->ips.size()) < kMaxIPsPerBrowse) {
std::set<std::string> urls;
urls.insert(url);
browse_info_->ips.insert(make_pair(ip, urls));
}
} else if (int(it->second.size()) < kMaxUrlsPerIP) {
it->second.insert(url);
}
}
void ClientSideDetectionHost::Observe(
int type,
const content::NotificationSource& source,
const content::NotificationDetails& details) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
details).ptr();
if (req && browse_info_.get() && malware_report_enabled_ &&
!MalwareKillSwitchIsOn()) {
if (req->url.is_valid()) {
UpdateIPUrlMap(req->socket_address.host() /* ip */,
req->url.spec() /* url */);
}
}
}
bool ClientSideDetectionHost::DidShowSBInterstitial() {
if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
return false;
}
const NavigationEntry* nav_entry =
web_contents()->GetController().GetActiveEntry();
return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
}
void ClientSideDetectionHost::set_client_side_detection_service(
ClientSideDetectionService* service) {
csd_service_ = service;
}
void ClientSideDetectionHost::set_safe_browsing_managers(
SafeBrowsingUIManager* ui_manager,
SafeBrowsingDatabaseManager* database_manager) {
if (ui_manager_.get())
ui_manager_->RemoveObserver(this);
ui_manager_ = ui_manager;
if (ui_manager)
ui_manager_->AddObserver(this);
database_manager_ = database_manager;
}
bool ClientSideDetectionHost::MalwareKillSwitchIsOn() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
return malware_killswitch_on_;
}
void ClientSideDetectionHost::SetMalwareKillSwitch(bool killswitch_on) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
malware_killswitch_on_ = killswitch_on;
}
} // namespace safe_browsing