blob: dc3580f2bced3e33fec3d89a7c80b7750621523c [file] [log] [blame]
/*
* Copyright (C) 2009 Julien Chaffraix <jchaffraix@pleyo.com>
* Copyright (C) 2010, 2011, 2012 Research In Motion Limited. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "CookieParser.h"
#include "Logging.h"
#include "ParsedCookie.h"
#include <network/DomainTools.h>
#include <wtf/CurrentTime.h>
#include <wtf/text/CString.h>
namespace WebCore {
#define LOG_AND_DELETE(format, ...) \
{ \
LOG_ERROR(format, ## __VA_ARGS__); \
delete res; \
return 0; \
}
static inline bool isCookieHeaderSeparator(UChar c)
{
return (c == '\r' || c =='\n');
}
static inline bool isLightweightSpace(UChar c)
{
return (c == ' ' || c == '\t');
}
CookieParser::CookieParser(const KURL& defaultCookieURL)
: m_defaultCookieURL(defaultCookieURL)
{
m_defaultCookieHost = defaultCookieURL.host();
m_defaultDomainIsIPAddress = false;
string hostDomainCanonical = BlackBerry::Platform::getCanonicalIPFormat(m_defaultCookieHost.utf8().data()).c_str();
if (!hostDomainCanonical.empty()) {
m_defaultCookieHost = String(hostDomainCanonical.c_str());
m_defaultDomainIsIPAddress = true;
} else
m_defaultCookieHost = m_defaultCookieHost.startsWith(".") ? m_defaultCookieHost : "." + m_defaultCookieHost;
}
CookieParser::~CookieParser()
{
}
Vector<ParsedCookie*> CookieParser::parse(const String& cookies)
{
unsigned cookieStart, cookieEnd = 0;
double curTime = currentTime();
Vector<ParsedCookie*, 4> parsedCookies;
unsigned cookiesLength = cookies.length();
if (!cookiesLength) // Code below doesn't handle this case
return parsedCookies;
// Iterate over the header to parse all the cookies.
while (cookieEnd <= cookiesLength) {
cookieStart = cookieEnd;
// Find a cookie separator.
while (cookieEnd <= cookiesLength && !isCookieHeaderSeparator(cookies[cookieEnd]))
cookieEnd++;
// Detect an empty cookie and go to the next one.
if (cookieStart == cookieEnd) {
++cookieEnd;
continue;
}
if (cookieEnd < cookiesLength && isCookieHeaderSeparator(cookies[cookieEnd]))
++cookieEnd;
ParsedCookie* cookie = parseOneCookie(cookies, cookieStart, cookieEnd - 1, curTime);
if (cookie)
parsedCookies.append(cookie);
}
return parsedCookies;
}
ParsedCookie* CookieParser::parseOneCookie(const String& cookie)
{
return parseOneCookie(cookie, 0, cookie.length() - 1, currentTime());
}
// The cookie String passed into this method will only contian the name value pairs as well as other related cookie
// attributes such as max-age and domain. Set-Cookie should never be part of this string.
ParsedCookie* CookieParser::parseOneCookie(const String& cookie, unsigned start, unsigned end, double curTime)
{
ParsedCookie* res = new ParsedCookie(curTime);
if (!res)
LOG_AND_DELETE("Out of memory");
res->setProtocol(m_defaultCookieURL.protocol());
// Parse [NAME "="] VALUE
unsigned tokenEnd = start; // Token end contains the position of the '=' or the end of a token
unsigned pairEnd = start; // Pair end contains always the position of the ';'
// Find the first ';' which is not double-quoted and the '=' (if they exist).
bool foundEqual = false;
while (pairEnd < end && cookie[pairEnd] != ';') {
if (cookie[pairEnd] == '=') {
if (tokenEnd == start) {
tokenEnd = pairEnd;
foundEqual = true;
}
} else if (cookie[pairEnd] == '"') {
size_t secondQuotePosition = cookie.find('"', pairEnd + 1);
if (secondQuotePosition != notFound && secondQuotePosition <= end) {
pairEnd = secondQuotePosition + 1;
continue;
}
}
pairEnd++;
}
unsigned tokenStart = start;
bool hasName = false; // This is a hack to avoid changing too much in this
// brutally brittle code.
if (tokenEnd != start) {
// There is a '=' so parse the NAME
unsigned nameEnd = tokenEnd;
// The tokenEnd is the position of the '=' so the nameEnd is one less
nameEnd--;
// Remove lightweight spaces.
while (nameEnd && isLightweightSpace(cookie[nameEnd]))
nameEnd--;
while (tokenStart < nameEnd && isLightweightSpace(cookie[tokenStart]))
tokenStart++;
if (nameEnd + 1 <= tokenStart)
LOG_AND_DELETE("Empty name. Rejecting the cookie");
String name = cookie.substring(tokenStart, nameEnd + 1 - start);
res->setName(name);
hasName = true;
}
// Now parse the VALUE
tokenStart = tokenEnd + 1;
if (!hasName)
--tokenStart;
// Skip lightweight spaces in our token
while (tokenStart < pairEnd && isLightweightSpace(cookie[tokenStart]))
tokenStart++;
tokenEnd = pairEnd;
while (tokenEnd > tokenStart && isLightweightSpace(cookie[tokenEnd - 1]))
tokenEnd--;
String value;
if (tokenEnd == tokenStart) {
// Firefox accepts empty value so we will do the same
value = String();
} else
value = cookie.substring(tokenStart, tokenEnd - tokenStart);
if (hasName)
res->setValue(value);
else if (foundEqual) {
delete res;
return 0;
} else
res->setName(value); // No NAME=VALUE, only NAME
while (pairEnd < end) {
// Switch to the next pair as pairEnd is on the ';' and fast-forward any lightweight spaces.
pairEnd++;
while (pairEnd < end && isLightweightSpace(cookie[pairEnd]))
pairEnd++;
tokenStart = pairEnd;
tokenEnd = tokenStart; // initialize token end to catch first '='
while (pairEnd < end && cookie[pairEnd] != ';') {
if (tokenEnd == tokenStart && cookie[pairEnd] == '=')
tokenEnd = pairEnd;
pairEnd++;
}
// FIXME : should we skip lightweight spaces here ?
unsigned length = tokenEnd - tokenStart;
unsigned tokenStartSvg = tokenStart;
String parsedValue;
if (tokenStart != tokenEnd) {
// There is an equal sign so remove lightweight spaces in VALUE
tokenStart = tokenEnd + 1;
while (tokenStart < pairEnd && isLightweightSpace(cookie[tokenStart]))
tokenStart++;
tokenEnd = pairEnd;
while (tokenEnd > tokenStart && isLightweightSpace(cookie[tokenEnd - 1]))
tokenEnd--;
parsedValue = cookie.substring(tokenStart, tokenEnd - tokenStart);
} else {
// If the parsedValue is empty, initialise it in case we need it
parsedValue = String();
// Handle a token without value.
length = pairEnd - tokenStart;
}
// Detect which "cookie-av" is parsed
// Look at the first char then parse the whole for performance issue
switch (cookie[tokenStartSvg]) {
case 'P':
case 'p' : {
if (length >= 4 && cookie.find("ath", tokenStartSvg + 1, false)) {
// We need the path to be decoded to match those returned from KURL::path().
// The path attribute may or may not include percent-encoded characters. Fortunately
// if there are no percent-encoded characters, decoding the url is a no-op.
res->setPath(decodeURLEscapeSequences(parsedValue));
// We have to disable the following check because sites like Facebook and
// Gmail currently do not follow the spec.
#if 0
// Check if path attribute is a prefix of the request URI.
if (!m_defaultCookieURL.path().startsWith(res->path()))
LOG_AND_DELETE("Invalid cookie %s (path): it does not math the URL", cookie.ascii().data());
#endif
} else
LOG_AND_DELETE("Invalid cookie %s (path)", cookie.ascii().data());
break;
}
case 'D':
case 'd' : {
if (length >= 6 && cookie.find("omain", tokenStartSvg + 1, false)) {
if (parsedValue.length() > 1 && parsedValue[0] == '"' && parsedValue[parsedValue.length() - 1] == '"')
parsedValue = parsedValue.substring(1, parsedValue.length() - 2);
// Check if the domain contains an embedded dot.
size_t dotPosition = parsedValue.find(".", 1);
if (dotPosition == notFound || dotPosition == parsedValue.length())
LOG_AND_DELETE("Invalid cookie %s (domain): it does not contain an embedded dot", cookie.ascii().data());
// If the domain does not start with a dot, add one for security checks,
// For example: ab.c.com dose not domain match b.c.com;
String realDomain = parsedValue[0] == '.' ? parsedValue : "." + parsedValue;
// Try to return an canonical ip address if the domain is an ip
bool isIPAddress = false;
// We only check if the current domain is an IP address when the default domain is an IP address
// We know if the default domain is not an IP address and the current domain is, it won't suffix match
// If it is an IP Address, we should treat it only if it matches the host exactly
// We determine the canonical IP format before comparing because IPv6 could be represented in multiple formats
if (m_defaultDomainIsIPAddress) {
String realDomainCanonical = String(BlackBerry::Platform::getCanonicalIPFormat(realDomain.utf8().data()).c_str());
if (realDomainCanonical.isEmpty() || realDomainCanonical != m_defaultCookieHost)
LOG_AND_DELETE("Invalid cookie %s (domain): domain is IP but does not match host's IP", cookie.ascii().data());
realDomain = realDomainCanonical;
isIPAddress = true;
} else {
// The request host should domain match the Domain attribute.
// Domain string starts with a dot, so a.b.com should domain match .a.b.com.
// add a "." at beginning of host name, because it can handle many cases such as
// a.b.com matches b.com, a.b.com matches .B.com and a.b.com matches .A.b.Com
// and so on.
// We also have to make a special case for IP addresses. If a website tries to set
// a cookie to 61.97, that domain is not an IP address and will end with the m_defaultCookieHost
if (!m_defaultCookieHost.endsWith(realDomain, false))
LOG_AND_DELETE("Invalid cookie %s (domain): it does not domain match the host", cookie.ascii().data());
// We should check for an embedded dot in the portion of string in the host not in the domain
// but to match firefox behaviour we do not.
// Check whether the domain is a top level domain, if it is throw it out
// http://publicsuffix.org/list/
if (BlackBerry::Platform::isTopLevelDomain(realDomain.utf8().data()))
LOG_AND_DELETE("Invalid cookie %s (domain): it did not pass the top level domain check", cookie.ascii().data());
}
res->setDomain(realDomain, isIPAddress);
} else
LOG_AND_DELETE("Invalid cookie %s (domain)", cookie.ascii().data());
break;
}
case 'E' :
case 'e' : {
if (length >= 7 && cookie.find("xpires", tokenStartSvg + 1, false))
res->setExpiry(parsedValue);
else
LOG_AND_DELETE("Invalid cookie %s (expires)", cookie.ascii().data());
break;
}
case 'M' :
case 'm' : {
if (length >= 7 && cookie.find("ax-age", tokenStartSvg + 1, false))
res->setMaxAge(parsedValue);
else
LOG_AND_DELETE("Invalid cookie %s (max-age)", cookie.ascii().data());
break;
}
case 'C' :
case 'c' : {
if (length >= 7 && cookie.find("omment", tokenStartSvg + 1, false))
// We do not have room for the comment part (and so do Mozilla) so just log the comment.
LOG(Network, "Comment %s for ParsedCookie : %s\n", parsedValue.ascii().data(), cookie.ascii().data());
else
LOG_AND_DELETE("Invalid cookie %s (comment)", cookie.ascii().data());
break;
}
case 'V' :
case 'v' : {
if (length >= 7 && cookie.find("ersion", tokenStartSvg + 1, false)) {
// Although the out-of-dated Cookie Spec(RFC2965, http://tools.ietf.org/html/rfc2965) defined
// the value of version can only contain DIGIT, some random sites, e.g. https://devforums.apple.com
// would use double quotation marks to quote the digit. So we need to get rid of them for compliance.
if (parsedValue.length() > 1 && parsedValue[0] == '"' && parsedValue[parsedValue.length() - 1] == '"')
parsedValue = parsedValue.substring(1, parsedValue.length() - 2);
if (parsedValue.toInt() != 1)
LOG_AND_DELETE("ParsedCookie version %d not supported (only support version=1)", parsedValue.toInt());
} else
LOG_AND_DELETE("Invalid cookie %s (version)", cookie.ascii().data());
break;
}
case 'S' :
case 's' : {
// Secure is a standalone token ("Secure;")
if (length >= 6 && cookie.find("ecure", tokenStartSvg + 1, false))
res->setSecureFlag(true);
else
LOG_AND_DELETE("Invalid cookie %s (secure)", cookie.ascii().data());
break;
}
case 'H':
case 'h': {
// HttpOnly is a standalone token ("HttpOnly;")
if (length >= 8 && cookie.find("ttpOnly", tokenStartSvg + 1, false))
res->setIsHttpOnly(true);
else
LOG_AND_DELETE("Invalid cookie %s (HttpOnly)", cookie.ascii().data());
break;
}
default : {
// If length == 0, we should be at the end of the cookie (case : ";\r") so ignore it
if (length)
LOG_ERROR("Invalid token for cookie %s", cookie.ascii().data());
}
}
}
// Check if the cookie is valid with respect to the size limit.
if (!res->isUnderSizeLimit())
LOG_AND_DELETE("ParsedCookie %s is above the 4kb in length : REJECTED", cookie.ascii().data());
// If some pair was not provided, during parsing then apply some default value
// the rest has been done in the constructor.
// If no domain was provided, set it to the host
if (!res->domain())
res->setDomain(m_defaultCookieHost, m_defaultDomainIsIPAddress);
// According to the Cookie Specificaiton (RFC6265, section 4.1.2.4 and 5.2.4, http://tools.ietf.org/html/rfc6265),
// If no path was provided or the first character of the path value is not '/', set it to the host's path
//
// REFERENCE
// 4.1.2.4. The Path Attribute
//
// The scope of each cookie is limited to a set of paths, controlled by
// the Path attribute. If the server omits the Path attribute, the user
// agent will use the "directory" of the request-uri's path component as
// the default value. (See Section 5.1.4 for more details.)
// ...........
// 5.2.4. The Path Attribute
//
// If the attribute-name case-insensitively matches the string "Path",
// the user agent MUST process the cookie-av as follows.
//
// If the attribute-value is empty or if the first character of the
// attribute-value is not %x2F ("/"):
//
// Let cookie-path be the default-path.
//
// Otherwise:
//
// Let cookie-path be the attribute-value.
//
// Append an attribute to the cookie-attribute-list with an attribute-
// name of Path and an attribute-value of cookie-path.
if (!res->path() || !res->path().length() || !res->path().startsWith("/", false)) {
String path = m_defaultCookieURL.string().substring(m_defaultCookieURL.pathStart(), m_defaultCookieURL.pathAfterLastSlash() - m_defaultCookieURL.pathStart() - 1);
if (path.isEmpty())
path = "/";
// Since this is reading the raw url string, it could contain percent-encoded sequences. We
// want it to be comparable to the return value of url.path(), which is not percent-encoded,
// so we must remove the escape sequences.
res->setPath(decodeURLEscapeSequences(path));
}
return res;
}
} // namespace WebCore