| // Copyright 2013 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "content/child/site_isolation_policy.h" |
| |
| #include "base/basictypes.h" |
| #include "base/command_line.h" |
| #include "base/lazy_instance.h" |
| #include "base/logging.h" |
| #include "base/metrics/histogram.h" |
| #include "base/strings/string_util.h" |
| #include "content/public/common/content_switches.h" |
| #include "content/public/common/resource_response_info.h" |
| #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| #include "net/http/http_response_headers.h" |
| |
| using base::StringPiece; |
| |
| namespace content { |
| |
| namespace { |
| |
| // The cross-site document blocking/UMA data collection is deactivated by |
| // default, and only activated in renderer processes. |
| static bool g_policy_enabled = false; |
| |
| // MIME types |
| const char kTextHtml[] = "text/html"; |
| const char kTextXml[] = "text/xml"; |
| const char xAppRssXml[] = "application/rss+xml"; |
| const char kAppXml[] = "application/xml"; |
| const char kAppJson[] = "application/json"; |
| const char kTextJson[] = "text/json"; |
| const char kTextXjson[] = "text/x-json"; |
| const char kTextPlain[] = "text/plain"; |
| |
| // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted |
| // when this class is used for actual blocking. |
| bool IsRenderableStatusCode(int status_code) { |
| // Chrome only uses the content of a response with one of these status codes |
| // for CSS/JavaScript. For images, Chrome just ignores status code. |
| const int renderable_status_code[] = {200, 201, 202, 203, 206, 300, |
| 301, 302, 303, 305, 306, 307}; |
| for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { |
| if (renderable_status_code[i] == status_code) |
| return true; |
| } |
| return false; |
| } |
| |
| bool MatchesSignature(StringPiece data, |
| const StringPiece signatures[], |
| size_t arr_size) { |
| |
| size_t offset = data.find_first_not_of(" \t\r\n"); |
| // There is no not-whitespace character in this document. |
| if (offset == base::StringPiece::npos) |
| return false; |
| |
| data.remove_prefix(offset); |
| size_t length = data.length(); |
| |
| for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { |
| const StringPiece& signature = signatures[sig_index]; |
| size_t signature_length = signature.length(); |
| if (length < signature_length) |
| continue; |
| |
| if (LowerCaseEqualsASCII( |
| data.begin(), data.begin() + signature_length, signature.data())) |
| return true; |
| } |
| return false; |
| } |
| |
| void IncrementHistogramCount(const std::string& name) { |
| // The default value of min, max, bucket_count are copied from histogram.h. |
| base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet( |
| name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag); |
| histogram_pointer->Add(1); |
| } |
| |
| void IncrementHistogramEnum(const std::string& name, |
| uint32 sample, |
| uint32 boundary_value) { |
| // The default value of min, max, bucket_count are copied from histogram.h. |
| base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet( |
| name, |
| 1, |
| boundary_value, |
| boundary_value + 1, |
| base::HistogramBase::kUmaTargetedHistogramFlag); |
| histogram_pointer->Add(sample); |
| } |
| |
| void HistogramCountBlockedResponse( |
| const std::string& bucket_prefix, |
| linked_ptr<SiteIsolationResponseMetaData>& resp_data, |
| bool nosniff_block) { |
| std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked"); |
| IncrementHistogramCount(bucket_prefix + block_label); |
| |
| // The content is blocked if it is sniffed as HTML/JSON/XML. When |
| // the blocked response is with an error status code, it is not |
| // disruptive for the following reasons : 1) the blocked content is |
| // not a binary object (such as an image) since it is sniffed as |
| // text; 2) then, this blocking only breaks the renderer behavior |
| // only if it is either JavaScript or CSS. However, the renderer |
| // doesn't use the contents of JS/CSS with unaffected status code |
| // (e.g, 404). 3) the renderer is expected not to use the cross-site |
| // document content for purposes other than JS/CSS (e.g, XHR). |
| bool renderable_status_code = |
| IsRenderableStatusCode(resp_data->http_status_code); |
| |
| if (renderable_status_code) { |
| IncrementHistogramEnum( |
| bucket_prefix + block_label + ".RenderableStatusCode", |
| resp_data->resource_type, |
| ResourceType::LAST_TYPE); |
| } else { |
| IncrementHistogramCount(bucket_prefix + block_label + |
| ".NonRenderableStatusCode"); |
| } |
| } |
| |
| void HistogramCountNotBlockedResponse(const std::string& bucket_prefix, |
| bool sniffed_as_js) { |
| IncrementHistogramCount(bucket_prefix + ".NotBlocked"); |
| if (sniffed_as_js) |
| IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS"); |
| } |
| |
| } // namespace |
| |
| SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {} |
| |
| void SiteIsolationPolicy::SetPolicyEnabled(bool enabled) { |
| g_policy_enabled = enabled; |
| } |
| |
| linked_ptr<SiteIsolationResponseMetaData> |
| SiteIsolationPolicy::OnReceivedResponse(const GURL& frame_origin, |
| const GURL& response_url, |
| ResourceType::Type resource_type, |
| int origin_pid, |
| const ResourceResponseInfo& info) { |
| if (!g_policy_enabled) |
| return linked_ptr<SiteIsolationResponseMetaData>(); |
| |
| // if |origin_pid| is non-zero, it means that this response is for a plugin |
| // spawned from this renderer process. We exclude responses for plugins for |
| // now, but eventually, we're going to make plugin processes directly talk to |
| // the browser process so that we don't apply cross-site document blocking to |
| // them. |
| if (origin_pid) |
| return linked_ptr<SiteIsolationResponseMetaData>(); |
| |
| UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1); |
| |
| // See if this is for navigation. If it is, don't block it, under the |
| // assumption that we will put it in an appropriate process. |
| if (ResourceType::IsFrame(resource_type)) |
| return linked_ptr<SiteIsolationResponseMetaData>(); |
| |
| if (!IsBlockableScheme(response_url)) |
| return linked_ptr<SiteIsolationResponseMetaData>(); |
| |
| if (IsSameSite(frame_origin, response_url)) |
| return linked_ptr<SiteIsolationResponseMetaData>(); |
| |
| SiteIsolationResponseMetaData::CanonicalMimeType canonical_mime_type = |
| GetCanonicalMimeType(info.mime_type); |
| |
| if (canonical_mime_type == SiteIsolationResponseMetaData::Others) |
| return linked_ptr<SiteIsolationResponseMetaData>(); |
| |
| // Every CORS request should have the Access-Control-Allow-Origin header even |
| // if it is preceded by a pre-flight request. Therefore, if this is a CORS |
| // request, it has this header. response.httpHeaderField() internally uses |
| // case-insensitive matching for the header name. |
| std::string access_control_origin; |
| |
| // We can use a case-insensitive header name for EnumerateHeader(). |
| info.headers->EnumerateHeader( |
| NULL, "access-control-allow-origin", &access_control_origin); |
| if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin)) |
| return linked_ptr<SiteIsolationResponseMetaData>(); |
| |
| // Real XSD data collection starts from here. |
| std::string no_sniff; |
| info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff); |
| |
| linked_ptr<SiteIsolationResponseMetaData> resp_data( |
| new SiteIsolationResponseMetaData); |
| resp_data->frame_origin = frame_origin.spec(); |
| resp_data->response_url = response_url; |
| resp_data->resource_type = resource_type; |
| resp_data->canonical_mime_type = canonical_mime_type; |
| resp_data->http_status_code = info.headers->response_code(); |
| resp_data->no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff"); |
| |
| return resp_data; |
| } |
| |
| bool SiteIsolationPolicy::ShouldBlockResponse( |
| linked_ptr<SiteIsolationResponseMetaData>& resp_data, |
| const char* raw_data, |
| int raw_length, |
| std::string* alternative_data) { |
| if (!g_policy_enabled) |
| return false; |
| |
| DCHECK(resp_data.get()); |
| |
| StringPiece data(raw_data, raw_length); |
| |
| // Record the length of the first received network packet to see if it's |
| // enough for sniffing. |
| UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length); |
| |
| // Record the number of cross-site document responses with a specific mime |
| // type (text/html, text/xml, etc). |
| UMA_HISTOGRAM_ENUMERATION( |
| "SiteIsolation.XSD.MimeType", |
| resp_data->canonical_mime_type, |
| SiteIsolationResponseMetaData::MaxCanonicalMimeType); |
| |
| // Store the result of cross-site document blocking analysis. |
| bool is_blocked = false; |
| bool sniffed_as_js = SniffForJS(data); |
| |
| // Record the number of responses whose content is sniffed for what its mime |
| // type claims it to be. For example, we apply a HTML sniffer for a document |
| // tagged with text/html here. Whenever this check becomes true, we'll block |
| // the response. |
| if (resp_data->canonical_mime_type != |
| SiteIsolationResponseMetaData::Plain) { |
| std::string bucket_prefix; |
| bool sniffed_as_target_document = false; |
| if (resp_data->canonical_mime_type == |
| SiteIsolationResponseMetaData::HTML) { |
| bucket_prefix = "SiteIsolation.XSD.HTML"; |
| sniffed_as_target_document = SniffForHTML(data); |
| } else if (resp_data->canonical_mime_type == |
| SiteIsolationResponseMetaData::XML) { |
| bucket_prefix = "SiteIsolation.XSD.XML"; |
| sniffed_as_target_document = SniffForXML(data); |
| } else if (resp_data->canonical_mime_type == |
| SiteIsolationResponseMetaData::JSON) { |
| bucket_prefix = "SiteIsolation.XSD.JSON"; |
| sniffed_as_target_document = SniffForJSON(data); |
| } else { |
| NOTREACHED() << "Not a blockable mime type: " |
| << resp_data->canonical_mime_type; |
| } |
| |
| if (sniffed_as_target_document) { |
| is_blocked = true; |
| HistogramCountBlockedResponse(bucket_prefix, resp_data, false); |
| } else { |
| if (resp_data->no_sniff) { |
| is_blocked = true; |
| HistogramCountBlockedResponse(bucket_prefix, resp_data, true); |
| } else { |
| HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js); |
| } |
| } |
| } else { |
| // This block is for plain text documents. We apply our HTML, XML, |
| // and JSON sniffer to a text document in the order, and block it |
| // if any of them succeeds in sniffing. |
| std::string bucket_prefix; |
| if (SniffForHTML(data)) |
| bucket_prefix = "SiteIsolation.XSD.Plain.HTML"; |
| else if (SniffForXML(data)) |
| bucket_prefix = "SiteIsolation.XSD.Plain.XML"; |
| else if (SniffForJSON(data)) |
| bucket_prefix = "SiteIsolation.XSD.Plain.JSON"; |
| |
| if (bucket_prefix.size() > 0) { |
| is_blocked = true; |
| HistogramCountBlockedResponse(bucket_prefix, resp_data, false); |
| } else if (resp_data->no_sniff) { |
| is_blocked = true; |
| HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true); |
| } else { |
| HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain", |
| sniffed_as_js); |
| } |
| } |
| |
| if (!CommandLine::ForCurrentProcess()->HasSwitch( |
| switches::kBlockCrossSiteDocuments)) |
| is_blocked = false; |
| |
| if (is_blocked) { |
| alternative_data->erase(); |
| alternative_data->insert(0, " "); |
| LOG(ERROR) << resp_data->response_url |
| << " is blocked as an illegal cross-site document from " |
| << resp_data->frame_origin; |
| } |
| return is_blocked; |
| } |
| |
| SiteIsolationResponseMetaData::CanonicalMimeType |
| SiteIsolationPolicy::GetCanonicalMimeType(const std::string& mime_type) { |
| if (LowerCaseEqualsASCII(mime_type, kTextHtml)) { |
| return SiteIsolationResponseMetaData::HTML; |
| } |
| |
| if (LowerCaseEqualsASCII(mime_type, kTextPlain)) { |
| return SiteIsolationResponseMetaData::Plain; |
| } |
| |
| if (LowerCaseEqualsASCII(mime_type, kAppJson) || |
| LowerCaseEqualsASCII(mime_type, kTextJson) || |
| LowerCaseEqualsASCII(mime_type, kTextXjson)) { |
| return SiteIsolationResponseMetaData::JSON; |
| } |
| |
| if (LowerCaseEqualsASCII(mime_type, kTextXml) || |
| LowerCaseEqualsASCII(mime_type, xAppRssXml) || |
| LowerCaseEqualsASCII(mime_type, kAppXml)) { |
| return SiteIsolationResponseMetaData::XML; |
| } |
| |
| return SiteIsolationResponseMetaData::Others; |
| } |
| |
| bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) { |
| // We exclude ftp:// from here. FTP doesn't provide a Content-Type |
| // header which our policy depends on, so we cannot protect any |
| // document from FTP servers. |
| return url.SchemeIs("http") || url.SchemeIs("https"); |
| } |
| |
| bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin, |
| const GURL& response_url) { |
| |
| if (!frame_origin.is_valid() || !response_url.is_valid()) |
| return false; |
| |
| if (frame_origin.scheme() != response_url.scheme()) |
| return false; |
| |
| // SameDomainOrHost() extracts the effective domains (public suffix plus one) |
| // from the two URLs and compare them. |
| return net::registry_controlled_domains::SameDomainOrHost( |
| frame_origin, |
| response_url, |
| net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); |
| } |
| |
| // We don't use Webkit's existing CORS policy implementation since |
| // their policy works in terms of origins, not sites. For example, |
| // when frame is sub.a.com and it is not allowed to access a document |
| // with sub1.a.com. But under Site Isolation, it's allowed. |
| bool SiteIsolationPolicy::IsValidCorsHeaderSet( |
| const GURL& frame_origin, |
| const GURL& website_origin, |
| const std::string& access_control_origin) { |
| // Many websites are sending back "\"*\"" instead of "*". This is |
| // non-standard practice, and not supported by Chrome. Refer to |
| // CrossOriginAccessControl::passesAccessControlCheck(). |
| |
| // TODO(dsjang): * is not allowed for the response from a request |
| // with cookies. This allows for more than what the renderer will |
| // eventually be able to receive, so we won't see illegal cross-site |
| // documents allowed by this. We have to find a way to see if this |
| // response is from a cookie-tagged request or not in the future. |
| if (access_control_origin == "*") |
| return true; |
| |
| // TODO(dsjang): The CORS spec only treats a fully specified URL, except for |
| // "*", but many websites are using just a domain for access_control_origin, |
| // and this is blocked by Webkit's CORS logic here : |
| // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set |
| // is_valid() to false when it is created from a URL containing * in the |
| // domain part. |
| |
| GURL cors_origin(access_control_origin); |
| return IsSameSite(frame_origin, cors_origin); |
| } |
| |
| // This function is a slight modification of |net::SniffForHTML|. |
| bool SiteIsolationPolicy::SniffForHTML(StringPiece data) { |
| // The content sniffer used by Chrome and Firefox are using "<!--" |
| // as one of the HTML signatures, but it also appears in valid |
| // JavaScript, considered as well-formed JS by the browser. Since |
| // we do not want to block any JS, we exclude it from our HTML |
| // signatures. This can weaken our document block policy, but we can |
| // break less websites. |
| // TODO(dsjang): parameterize |net::SniffForHTML| with an option |
| // that decides whether to include <!-- or not, so that we can |
| // remove this function. |
| // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser |
| // process, we should do single-thread checking here for the static |
| // initializer. |
| static const StringPiece kHtmlSignatures[] = { |
| StringPiece("<!DOCTYPE html"), // HTML5 spec |
| StringPiece("<script"), // HTML5 spec, Mozilla |
| StringPiece("<html"), // HTML5 spec, Mozilla |
| StringPiece("<head"), // HTML5 spec, Mozilla |
| StringPiece("<iframe"), // Mozilla |
| StringPiece("<h1"), // Mozilla |
| StringPiece("<div"), // Mozilla |
| StringPiece("<font"), // Mozilla |
| StringPiece("<table"), // Mozilla |
| StringPiece("<a"), // Mozilla |
| StringPiece("<style"), // Mozilla |
| StringPiece("<title"), // Mozilla |
| StringPiece("<b"), // Mozilla |
| StringPiece("<body"), // Mozilla |
| StringPiece("<br"), // Mozilla |
| StringPiece("<p"), // Mozilla |
| StringPiece("<?xml") // Mozilla |
| }; |
| |
| while (data.length() > 0) { |
| if (MatchesSignature( |
| data, kHtmlSignatures, arraysize(kHtmlSignatures))) |
| return true; |
| |
| // If we cannot find "<!--", we fail sniffing this as HTML. |
| static const StringPiece kCommentBegins[] = { StringPiece("<!--") }; |
| if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins))) |
| break; |
| |
| // Search for --> and do SniffForHTML after that. If we can find the |
| // comment's end, we start HTML sniffing from there again. |
| static const char kEndComment[] = "-->"; |
| size_t offset = data.find(kEndComment); |
| if (offset == base::StringPiece::npos) |
| break; |
| |
| // Proceed to the index next to the ending comment (-->). |
| data.remove_prefix(offset + strlen(kEndComment)); |
| } |
| |
| return false; |
| } |
| |
| bool SiteIsolationPolicy::SniffForXML(base::StringPiece data) { |
| // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for |
| // this signature. However, XML is case-sensitive. Don't we have to |
| // be more lenient only to block documents starting with the exact |
| // string <?xml rather than <?XML ? |
| // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser |
| // process, we should do single-thread checking here for the static |
| // initializer. |
| static const StringPiece kXmlSignatures[] = { StringPiece("<?xml") }; |
| return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures)); |
| } |
| |
| bool SiteIsolationPolicy::SniffForJSON(base::StringPiece data) { |
| // TODO(dsjang): We have to come up with a better way to sniff |
| // JSON. However, even RE cannot help us that much due to the fact |
| // that we don't do full parsing. This DFA starts with state 0, and |
| // finds {, "/' and : in that order. We're avoiding adding a |
| // dependency on a regular expression library. |
| enum { |
| kStartState, |
| kLeftBraceState, |
| kLeftQuoteState, |
| kColonState, |
| kTerminalState, |
| } state = kStartState; |
| |
| size_t length = data.length(); |
| for (size_t i = 0; i < length && state < kColonState; ++i) { |
| const char c = data[i]; |
| if (c == ' ' || c == '\t' || c == '\r' || c == '\n') |
| continue; |
| |
| switch (state) { |
| case kStartState: |
| if (c == '{') |
| state = kLeftBraceState; |
| else |
| state = kTerminalState; |
| break; |
| case kLeftBraceState: |
| if (c == '\"' || c == '\'') |
| state = kLeftQuoteState; |
| else |
| state = kTerminalState; |
| break; |
| case kLeftQuoteState: |
| if (c == ':') |
| state = kColonState; |
| break; |
| case kColonState: |
| case kTerminalState: |
| NOTREACHED(); |
| break; |
| } |
| } |
| return state == kColonState; |
| } |
| |
| bool SiteIsolationPolicy::SniffForJS(StringPiece data) { |
| // TODO(dsjang): This is a real hack. The only purpose of this function is to |
| // try to see if there's any possibility that this data can be JavaScript |
| // (superset of JS). This function will be removed once UMA stats are |
| // gathered. |
| |
| // Search for "var " for JS detection. |
| return data.find("var ") != base::StringPiece::npos; |
| } |
| |
| } // namespace content |