net/cookies/cookie_util.cc - platform/external/chromium_org - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "net/cookies/cookie_util.h"

 #include <cstdio>
 #include <cstdlib>

 #include "base/logging.h"
 #include "base/strings/string_tokenizer.h"
 #include "base/strings/string_util.h"
 #include "build/build_config.h"
 #include "net/base/net_util.h"
 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
 #include "url/gurl.h"

 namespace net {
 namespace cookie_util {

 bool DomainIsHostOnly(const std::string& domain_string) {
   return (domain_string.empty() || domain_string[0] != '.');
 }

 std::string GetEffectiveDomain(const std::string& scheme,
                                const std::string& host) {
   if (scheme == "http" || scheme == "https") {
     return registry_controlled_domains::GetDomainAndRegistry(
         host,
         registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
   }

   if (!DomainIsHostOnly(host))
     return host.substr(1);
   return host;
 }

 bool GetCookieDomainWithString(const GURL& url,
                                const std::string& domain_string,
                                std::string* result) {
   const std::string url_host(url.host());

   // If no domain was specified in the domain string, default to a host cookie.
   // We match IE/Firefox in allowing a domain=IPADDR if it matches the url
   // ip address hostname exactly.  It should be treated as a host cookie.
   if (domain_string.empty() ||
       (url.HostIsIPAddress() && url_host == domain_string)) {
     *result = url_host;
     DCHECK(DomainIsHostOnly(*result));
     return true;
   }

   // Get the normalized domain specified in cookie line.
   url::CanonHostInfo ignored;
   std::string cookie_domain(CanonicalizeHost(domain_string, &ignored));
   if (cookie_domain.empty())
     return false;
   if (cookie_domain[0] != '.')
     cookie_domain = "." + cookie_domain;

   // Ensure |url| and |cookie_domain| have the same domain+registry.
   const std::string url_scheme(url.scheme());
   const std::string url_domain_and_registry(
       GetEffectiveDomain(url_scheme, url_host));
   if (url_domain_and_registry.empty())
     return false;  // IP addresses/intranet hosts can't set domain cookies.
   const std::string cookie_domain_and_registry(
       GetEffectiveDomain(url_scheme, cookie_domain));
   if (url_domain_and_registry != cookie_domain_and_registry)
     return false;  // Can't set a cookie on a different domain + registry.

   // Ensure |url_host| is |cookie_domain| or one of its subdomains.  Given that
   // we know the domain+registry are the same from the above checks, this is
   // basically a simple string suffix check.
   const bool is_suffix = (url_host.length() < cookie_domain.length()) ?
       (cookie_domain != ("." + url_host)) :
       (url_host.compare(url_host.length() - cookie_domain.length(),
                         cookie_domain.length(), cookie_domain) != 0);
   if (is_suffix)
     return false;

   *result = cookie_domain;
   return true;
 }

 // Parse a cookie expiration time.  We try to be lenient, but we need to
 // assume some order to distinguish the fields.  The basic rules:
 //  - The month name must be present and prefix the first 3 letters of the
 //    full month name (jan for January, jun for June).
 //  - If the year is <= 2 digits, it must occur after the day of month.
 //  - The time must be of the format hh:mm:ss.
 // An average cookie expiration will look something like this:
 //   Sat, 15-Apr-17 21:01:22 GMT
 base::Time ParseCookieTime(const std::string& time_string) {
   static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun",
                                    "jul", "aug", "sep", "oct", "nov", "dec" };
   static const int kMonthsLen = arraysize(kMonths);
   // We want to be pretty liberal, and support most non-ascii and non-digit
   // characters as a delimiter.  We can't treat : as a delimiter, because it
   // is the delimiter for hh:mm:ss, and we want to keep this field together.
   // We make sure to include - and +, since they could prefix numbers.
   // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes
   // will be preserved, and we will get them here.  So we make sure to include
   // quote characters, and also \ for anything that was internally escaped.
   static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~";

   base::Time::Exploded exploded = {0};

   base::StringTokenizer tokenizer(time_string, kDelimiters);

   bool found_day_of_month = false;
   bool found_month = false;
   bool found_time = false;
   bool found_year = false;

   while (tokenizer.GetNext()) {
     const std::string token = tokenizer.token();
     DCHECK(!token.empty());
     bool numerical = IsAsciiDigit(token[0]);

     // String field
     if (!numerical) {
       if (!found_month) {
         for (int i = 0; i < kMonthsLen; ++i) {
           // Match prefix, so we could match January, etc
           if (base::strncasecmp(token.c_str(), kMonths[i], 3) == 0) {
             exploded.month = i + 1;
             found_month = true;
             break;
           }
         }
       } else {
         // If we've gotten here, it means we've already found and parsed our
         // month, and we have another string, which we would expect to be the
         // the time zone name.  According to the RFC and my experiments with
         // how sites format their expirations, we don't have much of a reason
         // to support timezones.  We don't want to ever barf on user input,
         // but this DCHECK should pass for well-formed data.
         // DCHECK(token == "GMT");
       }
     // Numeric field w/ a colon
     } else if (token.find(':') != std::string::npos) {
       if (!found_time &&
 #ifdef COMPILER_MSVC
           sscanf_s(
 #else
           sscanf(
 #endif
                  token.c_str(), "%2u:%2u:%2u", &exploded.hour,
                  &exploded.minute, &exploded.second) == 3) {
         found_time = true;
       } else {
         // We should only ever encounter one time-like thing.  If we're here,
         // it means we've found a second, which shouldn't happen.  We keep
         // the first.  This check should be ok for well-formed input:
         // NOTREACHED();
       }
     // Numeric field
     } else {
       // Overflow with atoi() is unspecified, so we enforce a max length.
       if (!found_day_of_month && token.length() <= 2) {
         exploded.day_of_month = atoi(token.c_str());
         found_day_of_month = true;
       } else if (!found_year && token.length() <= 5) {
         exploded.year = atoi(token.c_str());
         found_year = true;
       } else {
         // If we're here, it means we've either found an extra numeric field,
         // or a numeric field which was too long.  For well-formed input, the
         // following check would be reasonable:
         // NOTREACHED();
       }
     }
   }

   if (!found_day_of_month || !found_month || !found_time || !found_year) {
     // We didn't find all of the fields we need.  For well-formed input, the
     // following check would be reasonable:
     // NOTREACHED() << "Cookie parse expiration failed: " << time_string;
     return base::Time();
   }

   // Normalize the year to expand abbreviated years to the full year.
   if (exploded.year >= 69 && exploded.year <= 99)
     exploded.year += 1900;
   if (exploded.year >= 0 && exploded.year <= 68)
     exploded.year += 2000;

   // If our values are within their correct ranges, we got our time.
   if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 &&
       exploded.month >= 1 && exploded.month <= 12 &&
       exploded.year >= 1601 && exploded.year <= 30827 &&
       exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) {
     return base::Time::FromUTCExploded(exploded);
   }

   // One of our values was out of expected range.  For well-formed input,
   // the following check would be reasonable:
   // NOTREACHED() << "Cookie exploded expiration failed: " << time_string;

   return base::Time();
 }

 GURL CookieOriginToURL(const std::string& domain, bool is_https) {
   if (domain.empty())
     return GURL();

   const std::string scheme = is_https ? "https" : "http";
   const std::string host = domain[0] == '.' ? domain.substr(1) : domain;
   return GURL(scheme + "://" + host);
 }

 void ParseRequestCookieLine(const std::string& header_value,
                             ParsedRequestCookies* parsed_cookies) {
   std::string::const_iterator i = header_value.begin();
   while (i != header_value.end()) {
     // Here we are at the beginning of a cookie.

     // Eat whitespace.
     while (i != header_value.end() && *i == ' ') ++i;
     if (i == header_value.end()) return;

     // Find cookie name.
     std::string::const_iterator cookie_name_beginning = i;
     while (i != header_value.end() && *i != '=') ++i;
     base::StringPiece cookie_name(cookie_name_beginning, i);

     // Find cookie value.
     base::StringPiece cookie_value;
     // Cookies may have no value, in this case '=' may or may not be there.
     if (i != header_value.end() && i + 1 != header_value.end()) {
       ++i;  // Skip '='.
       std::string::const_iterator cookie_value_beginning = i;
       if (*i == '"') {
         ++i;  // Skip '"'.
         while (i != header_value.end() && *i != '"') ++i;
         if (i == header_value.end()) return;
         ++i;  // Skip '"'.
         cookie_value = base::StringPiece(cookie_value_beginning, i);
         // i points to character after '"', potentially a ';'.
       } else {
         while (i != header_value.end() && *i != ';') ++i;
         cookie_value = base::StringPiece(cookie_value_beginning, i);
         // i points to ';' or end of string.
       }
     }
     parsed_cookies->push_back(std::make_pair(cookie_name, cookie_value));
     // Eat ';'.
     if (i != header_value.end()) ++i;
   }
 }

 std::string SerializeRequestCookieLine(
     const ParsedRequestCookies& parsed_cookies) {
   std::string buffer;
   for (ParsedRequestCookies::const_iterator i = parsed_cookies.begin();
        i != parsed_cookies.end(); ++i) {
     if (!buffer.empty())
       buffer.append("; ");
     buffer.append(i->first.begin(), i->first.end());
     buffer.push_back('=');
     buffer.append(i->second.begin(), i->second.end());
   }
   return buffer;
 }

 }  // namespace cookie_utils
 }  // namespace net
	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "net/cookies/cookie_util.h"

	#include <cstdio>
	#include <cstdlib>

	#include "base/logging.h"
	#include "base/strings/string_tokenizer.h"
	#include "base/strings/string_util.h"
	#include "build/build_config.h"
	#include "net/base/net_util.h"
	#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
	#include "url/gurl.h"

	namespace net {
	namespace cookie_util {

	bool DomainIsHostOnly(const std::string& domain_string) {
	return (domain_string.empty() \|\| domain_string[0] != '.');
	}

	std::string GetEffectiveDomain(const std::string& scheme,
	const std::string& host) {
	if (scheme == "http" \|\| scheme == "https") {
	return registry_controlled_domains::GetDomainAndRegistry(
	host,
	registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
	}

	if (!DomainIsHostOnly(host))
	return host.substr(1);
	return host;
	}

	bool GetCookieDomainWithString(const GURL& url,
	const std::string& domain_string,
	std::string* result) {
	const std::string url_host(url.host());

	// If no domain was specified in the domain string, default to a host cookie.
	// We match IE/Firefox in allowing a domain=IPADDR if it matches the url
	// ip address hostname exactly. It should be treated as a host cookie.
	if (domain_string.empty() \|\|
	(url.HostIsIPAddress() && url_host == domain_string)) {
	*result = url_host;
	DCHECK(DomainIsHostOnly(*result));
	return true;
	}

	// Get the normalized domain specified in cookie line.
	url::CanonHostInfo ignored;
	std::string cookie_domain(CanonicalizeHost(domain_string, &ignored));
	if (cookie_domain.empty())
	return false;
	if (cookie_domain[0] != '.')
	cookie_domain = "." + cookie_domain;

	// Ensure \|url\| and \|cookie_domain\| have the same domain+registry.
	const std::string url_scheme(url.scheme());
	const std::string url_domain_and_registry(
	GetEffectiveDomain(url_scheme, url_host));
	if (url_domain_and_registry.empty())
	return false; // IP addresses/intranet hosts can't set domain cookies.
	const std::string cookie_domain_and_registry(
	GetEffectiveDomain(url_scheme, cookie_domain));
	if (url_domain_and_registry != cookie_domain_and_registry)
	return false; // Can't set a cookie on a different domain + registry.

	// Ensure \|url_host\| is \|cookie_domain\| or one of its subdomains. Given that
	// we know the domain+registry are the same from the above checks, this is
	// basically a simple string suffix check.
	const bool is_suffix = (url_host.length() < cookie_domain.length()) ?
	(cookie_domain != ("." + url_host)) :
	(url_host.compare(url_host.length() - cookie_domain.length(),
	cookie_domain.length(), cookie_domain) != 0);
	if (is_suffix)
	return false;

	*result = cookie_domain;
	return true;
	}

	// Parse a cookie expiration time. We try to be lenient, but we need to
	// assume some order to distinguish the fields. The basic rules:
	// - The month name must be present and prefix the first 3 letters of the
	// full month name (jan for January, jun for June).
	// - If the year is <= 2 digits, it must occur after the day of month.
	// - The time must be of the format hh:mm:ss.
	// An average cookie expiration will look something like this:
	// Sat, 15-Apr-17 21:01:22 GMT
	base::Time ParseCookieTime(const std::string& time_string) {
	static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun",
	"jul", "aug", "sep", "oct", "nov", "dec" };
	static const int kMonthsLen = arraysize(kMonths);
	// We want to be pretty liberal, and support most non-ascii and non-digit
	// characters as a delimiter. We can't treat : as a delimiter, because it
	// is the delimiter for hh:mm:ss, and we want to keep this field together.
	// We make sure to include - and +, since they could prefix numbers.
	// If the cookie attribute came in in quotes (ex expires="XXX"), the quotes
	// will be preserved, and we will get them here. So we make sure to include
	// quote characters, and also \ for anything that was internally escaped.
	static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{\|}~";

	base::Time::Exploded exploded = {0};

	base::StringTokenizer tokenizer(time_string, kDelimiters);

	bool found_day_of_month = false;
	bool found_month = false;
	bool found_time = false;
	bool found_year = false;

	while (tokenizer.GetNext()) {
	const std::string token = tokenizer.token();
	DCHECK(!token.empty());
	bool numerical = IsAsciiDigit(token[0]);

	// String field
	if (!numerical) {
	if (!found_month) {
	for (int i = 0; i < kMonthsLen; ++i) {
	// Match prefix, so we could match January, etc
	if (base::strncasecmp(token.c_str(), kMonths[i], 3) == 0) {
	exploded.month = i + 1;
	found_month = true;
	break;
	}
	}
	} else {
	// If we've gotten here, it means we've already found and parsed our
	// month, and we have another string, which we would expect to be the
	// the time zone name. According to the RFC and my experiments with
	// how sites format their expirations, we don't have much of a reason
	// to support timezones. We don't want to ever barf on user input,
	// but this DCHECK should pass for well-formed data.
	// DCHECK(token == "GMT");
	}
	// Numeric field w/ a colon
	} else if (token.find(':') != std::string::npos) {
	if (!found_time &&
	#ifdef COMPILER_MSVC
	sscanf_s(
	#else
	sscanf(
	#endif
	token.c_str(), "%2u:%2u:%2u", &exploded.hour,
	&exploded.minute, &exploded.second) == 3) {
	found_time = true;
	} else {
	// We should only ever encounter one time-like thing. If we're here,
	// it means we've found a second, which shouldn't happen. We keep
	// the first. This check should be ok for well-formed input:
	// NOTREACHED();
	}
	// Numeric field
	} else {
	// Overflow with atoi() is unspecified, so we enforce a max length.
	if (!found_day_of_month && token.length() <= 2) {
	exploded.day_of_month = atoi(token.c_str());
	found_day_of_month = true;
	} else if (!found_year && token.length() <= 5) {
	exploded.year = atoi(token.c_str());
	found_year = true;
	} else {
	// If we're here, it means we've either found an extra numeric field,
	// or a numeric field which was too long. For well-formed input, the
	// following check would be reasonable:
	// NOTREACHED();
	}
	}
	}

	if (!found_day_of_month \|\| !found_month \|\| !found_time \|\| !found_year) {
	// We didn't find all of the fields we need. For well-formed input, the
	// following check would be reasonable:
	// NOTREACHED() << "Cookie parse expiration failed: " << time_string;
	return base::Time();
	}

	// Normalize the year to expand abbreviated years to the full year.
	if (exploded.year >= 69 && exploded.year <= 99)
	exploded.year += 1900;
	if (exploded.year >= 0 && exploded.year <= 68)
	exploded.year += 2000;

	// If our values are within their correct ranges, we got our time.
	if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 &&
	exploded.month >= 1 && exploded.month <= 12 &&
	exploded.year >= 1601 && exploded.year <= 30827 &&
	exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) {
	return base::Time::FromUTCExploded(exploded);
	}

	// One of our values was out of expected range. For well-formed input,
	// the following check would be reasonable:
	// NOTREACHED() << "Cookie exploded expiration failed: " << time_string;

	return base::Time();
	}

	GURL CookieOriginToURL(const std::string& domain, bool is_https) {
	if (domain.empty())
	return GURL();

	const std::string scheme = is_https ? "https" : "http";
	const std::string host = domain[0] == '.' ? domain.substr(1) : domain;
	return GURL(scheme + "://" + host);
	}

	void ParseRequestCookieLine(const std::string& header_value,
	ParsedRequestCookies* parsed_cookies) {
	std::string::const_iterator i = header_value.begin();
	while (i != header_value.end()) {
	// Here we are at the beginning of a cookie.

	// Eat whitespace.
	while (i != header_value.end() && *i == ' ') ++i;
	if (i == header_value.end()) return;

	// Find cookie name.
	std::string::const_iterator cookie_name_beginning = i;
	while (i != header_value.end() && *i != '=') ++i;
	base::StringPiece cookie_name(cookie_name_beginning, i);

	// Find cookie value.
	base::StringPiece cookie_value;
	// Cookies may have no value, in this case '=' may or may not be there.
	if (i != header_value.end() && i + 1 != header_value.end()) {
	++i; // Skip '='.
	std::string::const_iterator cookie_value_beginning = i;
	if (*i == '"') {
	++i; // Skip '"'.
	while (i != header_value.end() && *i != '"') ++i;
	if (i == header_value.end()) return;
	++i; // Skip '"'.
	cookie_value = base::StringPiece(cookie_value_beginning, i);
	// i points to character after '"', potentially a ';'.
	} else {
	while (i != header_value.end() && *i != ';') ++i;
	cookie_value = base::StringPiece(cookie_value_beginning, i);
	// i points to ';' or end of string.
	}
	}
	parsed_cookies->push_back(std::make_pair(cookie_name, cookie_value));
	// Eat ';'.
	if (i != header_value.end()) ++i;
	}
	}

	std::string SerializeRequestCookieLine(
	const ParsedRequestCookies& parsed_cookies) {
	std::string buffer;
	for (ParsedRequestCookies::const_iterator i = parsed_cookies.begin();
	i != parsed_cookies.end(); ++i) {
	if (!buffer.empty())
	buffer.append("; ");
	buffer.append(i->first.begin(), i->first.end());
	buffer.push_back('=');
	buffer.append(i->second.begin(), i->second.end());
	}
	return buffer;
	}

	} // namespace cookie_utils
	} // namespace net